From 22db82dab7b7e017ca61b1582d82a7cfebac7729 Mon Sep 17 00:00:00 2001
From: Gerben Jan Dijkman <gjdijkman@gjdwebserver.nl>
Date: Mon, 17 Oct 2022 16:04:49 +0200
Subject: [PATCH] Combined both kernels and updated to 5.19.12

---
 sys-kernel/pinephone-pro-sources/Manifest     |    1 -
 ...-dts-pinephone-drop-modem-power-node.patch |  175 -
 ...-dts-pinephone-pro-remove-modem-node.patch |   86 -
 .../files/1500_XATTR_USER_PREFIX.patch        |   67 -
 ...ink-security-restrictions-by-default.patch |   17 -
 ..._sparc-address-warray-bound-warnings.patch |   17 -
 ...nly-if-Secure-Simple-Pairing-enabled.patch |   37 -
 ...-build-issue-by-selecting-CONFIG_REG.patch |   30 -
 .../2920_sign-file-patch-for-libressl.patch   |   16 -
 .../3000_Support-printing-firmware-info.patch |   14 -
 .../files/4567_distro-Gentoo-Kconfig.patch    |  341 -
 ...0_enable-cpu-optimizations-universal.patch |  675 --
 ...20_BMQ-and-PDS-io-scheduler-v5.19-r0.patch | 9956 -----------------
 .../5021_BMQ-and-PDS-gentoo-defaults.patch    |   13 -
 .../pinephone-pro-sources-5.19.3.ebuild       |   70 -
 sys-kernel/pinephone-sources/Manifest         |    2 +-
 ...k3399-pinephone-pro-add-modem-RI-pin.patch |    0
 ...104-PPP-Add-reset-resume-to-usb_wwan.patch |   21 +
 ...dd-USB_QUIRK_RESET-for-Quectel-EG25G.patch |   25 +
 ...rk818_charger-use-type-battery-again.patch |   11 +
 ...hip-i2s-Dont-disable-mclk-on-suspend.patch |   29 +
 ...usued-softback_lines-cursor-argument.patch |  150 +
 ...-fbcon-remove-no-op-fbcon_set_origin.patch |   31 +
 ...rt-fbcon-remove-soft-scrollback-code.patch |  500 +
 .../pinephone-sources/files/5.19.10-11.patch  | 1231 ++
 .../pinephone-sources/files/5.19.11-12.patch  | 9776 ++++++++++++++++
 .../pinephone-sources/files/5.19.8-9.patch    | 8234 ++++++++++++++
 .../pinephone-sources/files/5.19.9-10.patch   | 1723 +++
 .../files/Multi-Gen-LRU-Framework.patch       | 8901 ---------------
 .../files/config-ppp                          |    0
 .../files/config-ppp-old                      |    0
 .../files/dracut-ppp.conf                     |    0
 .../pinephone-sources/files/pp-keyboard.patch |  176 -
 ...build => pinephone-sources-5.19.12.ebuild} |   25 +-
 34 files changed, 21745 insertions(+), 20605 deletions(-)
 delete mode 100644 sys-kernel/pinephone-pro-sources/Manifest
 delete mode 100644 sys-kernel/pinephone-pro-sources/files/0101-arm64-dts-pinephone-drop-modem-power-node.patch
 delete mode 100644 sys-kernel/pinephone-pro-sources/files/0102-arm64-dts-pinephone-pro-remove-modem-node.patch
 delete mode 100644 sys-kernel/pinephone-pro-sources/files/1500_XATTR_USER_PREFIX.patch
 delete mode 100644 sys-kernel/pinephone-pro-sources/files/1510_fs-enable-link-security-restrictions-by-default.patch
 delete mode 100644 sys-kernel/pinephone-pro-sources/files/1700_sparc-address-warray-bound-warnings.patch
 delete mode 100644 sys-kernel/pinephone-pro-sources/files/2000_BT-Check-key-sizes-only-if-Secure-Simple-Pairing-enabled.patch
 delete mode 100644 sys-kernel/pinephone-pro-sources/files/2900_tmp513-Fix-build-issue-by-selecting-CONFIG_REG.patch
 delete mode 100644 sys-kernel/pinephone-pro-sources/files/2920_sign-file-patch-for-libressl.patch
 delete mode 100644 sys-kernel/pinephone-pro-sources/files/3000_Support-printing-firmware-info.patch
 delete mode 100644 sys-kernel/pinephone-pro-sources/files/4567_distro-Gentoo-Kconfig.patch
 delete mode 100644 sys-kernel/pinephone-pro-sources/files/5010_enable-cpu-optimizations-universal.patch
 delete mode 100644 sys-kernel/pinephone-pro-sources/files/5020_BMQ-and-PDS-io-scheduler-v5.19-r0.patch
 delete mode 100644 sys-kernel/pinephone-pro-sources/files/5021_BMQ-and-PDS-gentoo-defaults.patch
 delete mode 100644 sys-kernel/pinephone-pro-sources/pinephone-pro-sources-5.19.3.ebuild
 rename sys-kernel/{pinephone-pro-sources => pinephone-sources}/files/0103-arm64-dts-rk3399-pinephone-pro-add-modem-RI-pin.patch (100%)
 create mode 100644 sys-kernel/pinephone-sources/files/0104-PPP-Add-reset-resume-to-usb_wwan.patch
 create mode 100644 sys-kernel/pinephone-sources/files/0104-Revert-usb-quirks-Add-USB_QUIRK_RESET-for-Quectel-EG25G.patch
 create mode 100644 sys-kernel/pinephone-sources/files/0104-rk818_charger-use-type-battery-again.patch
 create mode 100644 sys-kernel/pinephone-sources/files/0106-sound-rockchip-i2s-Dont-disable-mclk-on-suspend.patch
 create mode 100644 sys-kernel/pinephone-sources/files/0201-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
 create mode 100644 sys-kernel/pinephone-sources/files/0202-revert-fbcon-remove-no-op-fbcon_set_origin.patch
 create mode 100644 sys-kernel/pinephone-sources/files/0203-revert-fbcon-remove-soft-scrollback-code.patch
 create mode 100644 sys-kernel/pinephone-sources/files/5.19.10-11.patch
 create mode 100644 sys-kernel/pinephone-sources/files/5.19.11-12.patch
 create mode 100644 sys-kernel/pinephone-sources/files/5.19.8-9.patch
 create mode 100644 sys-kernel/pinephone-sources/files/5.19.9-10.patch
 delete mode 100644 sys-kernel/pinephone-sources/files/Multi-Gen-LRU-Framework.patch
 rename sys-kernel/{pinephone-pro-sources => pinephone-sources}/files/config-ppp (100%)
 rename sys-kernel/{pinephone-pro-sources => pinephone-sources}/files/config-ppp-old (100%)
 rename sys-kernel/{pinephone-pro-sources => pinephone-sources}/files/dracut-ppp.conf (100%)
 delete mode 100644 sys-kernel/pinephone-sources/files/pp-keyboard.patch
 rename sys-kernel/pinephone-sources/{pinephone-sources-5.19.0.ebuild => pinephone-sources-5.19.12.ebuild} (71%)

diff --git a/sys-kernel/pinephone-pro-sources/Manifest b/sys-kernel/pinephone-pro-sources/Manifest
deleted file mode 100644
index 285950d..0000000
--- a/sys-kernel/pinephone-pro-sources/Manifest
+++ /dev/null
@@ -1 +0,0 @@
-DIST orange-pi-5.19-20220822-1337.tar.gz 215018577 BLAKE2B b598aee2fb3aece41e83a9916a62b450ab351ed7cd65c6006ed20f04656d260f619b4786d3dd0efcd19b7b6cbd1cec14dd2233e791d9b9e77368160dcf989c60 SHA512 93a0d29647c732716adce044af19b2ae303e6469ead0a90b364972237cc7a24ca9715e9a1d491c2f08126fe79c72072e58294453758ae80d9bf4fb5220485f1f
diff --git a/sys-kernel/pinephone-pro-sources/files/0101-arm64-dts-pinephone-drop-modem-power-node.patch b/sys-kernel/pinephone-pro-sources/files/0101-arm64-dts-pinephone-drop-modem-power-node.patch
deleted file mode 100644
index b90eced..0000000
--- a/sys-kernel/pinephone-pro-sources/files/0101-arm64-dts-pinephone-drop-modem-power-node.patch
+++ /dev/null
@@ -1,175 +0,0 @@
-From 602d05e416ae0d0fba3022fa2c3d195164b406c6 Mon Sep 17 00:00:00 2001
-From: Clayton Craft <clayton@craftyguy.net>
-Date: Wed, 16 Dec 2020 20:16:14 -0800
-Subject: [PATCH] dts: pinephone: drop modem-power node
-
----
- .../allwinner/sun50i-a64-pinephone-1.0.dts    | 26 +++---------------
- .../allwinner/sun50i-a64-pinephone-1.1.dts    | 27 +++----------------
- .../allwinner/sun50i-a64-pinephone-1.2.dts    | 27 +++----------------
- .../dts/allwinner/sun50i-a64-pinephone.dtsi   | 12 +++++++++
- 4 files changed, 24 insertions(+), 68 deletions(-)
-
-diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.0.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.0.dts
-index a21c6d78a..7f0cfdafe 100644
---- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.0.dts
-+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.0.dts
-@@ -86,28 +86,6 @@ &reg_drivevbus {
- 	status = "okay";
- };
- 
--&uart3 {
--	modem {
--		compatible = "quectel,eg25";
--		char-device-name = "modem-power";
--
--		power-supply = <&reg_vbat_bb>; /* PL7 */
--
--		enable-gpios = <&pio 7 8 GPIO_ACTIVE_LOW>; /* PH8 */
--		reset-gpios = <&pio 2 4 GPIO_ACTIVE_HIGH>; /* PC4 */
--		pwrkey-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */
--
--		sleep-gpios = <&pio 7 7 GPIO_ACTIVE_HIGH>; /* PH7 */
--		wakeup-gpios = <&pio 1 2 GPIO_ACTIVE_HIGH>; /* PB2-RI */
--
--		cts-gpios = <&pio 3 5 GPIO_ACTIVE_HIGH>; /* PD5-CTS */
--		dtr-gpios = <&r_pio 0 6 GPIO_ACTIVE_HIGH>; /* PL6-DTR */
--		rts-gpios = <&pio 3 4 GPIO_ACTIVE_HIGH>; /* PD4-RTS */
--
--		quectel,qdai = "1,1,0,1,0,0,1,1";
--	};
--};
--
- &usbphy {
- 	usb-role-switch;
- 
-@@ -118,6 +96,10 @@ usb0_drd_sw: endpoint {
- 	};
- };
- 
-+&ring_indicator {
-+	gpios = <&pio 1 2 GPIO_ACTIVE_LOW>; /* PB2 */
-+};
-+
- &sgm3140 {
- 	enable-gpios = <&pio 2 3 GPIO_ACTIVE_HIGH>; /* PC3 */
- 	flash-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* PD24 */
-diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts
-index 61ff56b17..5e85ddc12 100644
---- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts
-+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts
-@@ -109,34 +109,15 @@ &reg_drivevbus {
- 	status = "okay";
- };
- 
-+&ring_indicator {
-+	gpios = <&pio 1 2 GPIO_ACTIVE_LOW>; /* PB2 */
-+};
-+
- &sgm3140 {
- 	enable-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* PD24 */
- 	flash-gpios = <&pio 2 3 GPIO_ACTIVE_HIGH>; /* PC3 */
- };
- 
--&uart3 {
--	modem {
--		compatible = "quectel,eg25";
--		char-device-name = "modem-power";
--
--		power-supply = <&reg_vbat_bb>; /* PL7 */
--
--		enable-gpios = <&pio 7 8 GPIO_ACTIVE_LOW>; /* PH8 */
--		reset-gpios = <&pio 2 4 GPIO_ACTIVE_HIGH>; /* PC4 */
--		pwrkey-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */
--		//status-pwrkey-multiplexed; /* status acts as pwrkey */
--
--		sleep-gpios = <&pio 7 7 GPIO_ACTIVE_HIGH>; /* PH7 */
--		wakeup-gpios = <&pio 1 2 GPIO_ACTIVE_HIGH>; /* PB2-RI */
--
--		dtr-gpios = <&r_pio 0 6 GPIO_ACTIVE_HIGH>; /* PL6-DTR */
--		cts-gpios = <&pio 3 5 GPIO_ACTIVE_HIGH>; /* PD5-CTS */
--		rts-gpios = <&pio 3 4 GPIO_ACTIVE_HIGH>; /* PD4-RTS */
--
--		quectel,qdai = "1,1,0,1,0,0,1,1";
--	};
--};
--
- &usbphy {
- 	usb-role-switch;
- 
-diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts
-index fe7d567a8..f4b9b0991 100644
---- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts
-+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts
-@@ -101,34 +101,15 @@ &reg_anx1v0 {
- 	enable-active-high;
- };
- 
-+&ring_indicator {
-+	gpios = <&r_pio 0 6 GPIO_ACTIVE_LOW>; /* PL6 */
-+};
-+
- &sgm3140 {
- 	enable-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* PD24 */
- 	flash-gpios = <&pio 2 3 GPIO_ACTIVE_HIGH>; /* PC3 */
- };
- 
--&uart3 {
--	modem {
--		compatible = "quectel,eg25";
--		char-device-name = "modem-power";
--
--		power-supply = <&reg_vbat_bb>; /* PL7 */
--
--		enable-gpios = <&pio 7 8 GPIO_ACTIVE_LOW>; /* PH8 */
--		reset-gpios = <&pio 2 4 GPIO_ACTIVE_HIGH>; /* PC4 */
--		status-gpios = <&pio 7 9 GPIO_ACTIVE_HIGH>; /* PH9 */
--		pwrkey-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */
--
--		host-ready-gpios = <&pio 7 7 GPIO_ACTIVE_HIGH>; /* PH7 */
--		wakeup-gpios = <&r_pio 0 6 GPIO_ACTIVE_HIGH>; /* PL6-RI */
--
--		dtr-gpios = <&pio 1 2 GPIO_ACTIVE_HIGH>; /* PB2-DTR */
--		cts-gpios = <&pio 3 5 GPIO_ACTIVE_HIGH>; /* PD5-CTS */
--		rts-gpios = <&pio 3 4 GPIO_ACTIVE_HIGH>; /* PD4-RTS */
--
--		quectel,qdai = "1,1,0,1,0,0,1,1";
--	};
--};
--
- &usbphy {
- 	usb-role-switch;
- 
-diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
-index 346113382..7b48126d1 100644
---- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
-+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
-@@ -192,6 +192,17 @@ ec25_codec: ec25-codec {
- 		sound-name-prefix = "Modem";
- 	};
- 
-+	gpio-keys {
-+		compatible = "gpio-keys";
-+
-+		ring_indicator: ring-indicator {
-+			label = "Ring Indicator";
-+			linux,can-disable;
-+			linux,code = <KEY_WAKEUP>;
-+			wakeup-source;
-+		};
-+	};
-+
- 	i2c_csi: i2c-csi {
- 		compatible = "i2c-gpio";
- 		sda-gpios = <&pio 4 13 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; /* PE13 */
-@@ -264,6 +275,7 @@ reg_usb_5v: usb-5v {
- 	reg_vbat_bb: vbat-bb {
- 		compatible = "regulator-fixed";
- 		regulator-name = "vbat-bb";
-+		regulator-always-on;
- 		regulator-min-microvolt = <3500000>;
- 		regulator-max-microvolt = <3500000>;
- 		gpio = <&r_pio 0 7 GPIO_ACTIVE_HIGH>; /* PL7 */
--- 
-2.31.1
-
diff --git a/sys-kernel/pinephone-pro-sources/files/0102-arm64-dts-pinephone-pro-remove-modem-node.patch b/sys-kernel/pinephone-pro-sources/files/0102-arm64-dts-pinephone-pro-remove-modem-node.patch
deleted file mode 100644
index 24be3b4..0000000
--- a/sys-kernel/pinephone-pro-sources/files/0102-arm64-dts-pinephone-pro-remove-modem-node.patch
+++ /dev/null
@@ -1,86 +0,0 @@
-From 60d8aedea7c8c390ee744730ab3e565ea84496fb Mon Sep 17 00:00:00 2001
-From: Danct12 <danct12@disroot.org>
-Date: Fri, 10 Dec 2021 23:01:34 +0700
-Subject: [PATCH] arm64: dts: rk3399-pinephone-pro: Remove modem node
-
-Since we don't use modem-power driver, this can be removed
-for eg25-manager.
----
- .../dts/rockchip/rk3399-pinephone-pro.dts     | 40 +------------------
- 1 file changed, 2 insertions(+), 38 deletions(-)
-
-diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts
-index 61c990764..13141c643 100644
---- a/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts
-+++ b/arch/arm64/boot/dts/rockchip/rk3399-pinephone-pro.dts
-@@ -326,6 +326,7 @@ vcc_4g_5v: vcc-4g-5v {
- 		regulator-min-microvolt = <5000000>;
- 		regulator-max-microvolt = <5000000>;
- 		vin-supply = <&vcc5v0_sys>;
-+		regulator-always-on;
- 	};
- 
- 	vcc_4g: vcc-4g {
-@@ -338,6 +339,7 @@ vcc_4g: vcc-4g {
- 		regulator-min-microvolt = <3800000>;
- 		regulator-max-microvolt = <3800000>;
- 		vin-supply = <&vcc_sysin>;
-+		regulator-always-on;
- 	};
- 
- 	vcc1v8_codec: vcc1v8-codec-regulator {
-@@ -1058,31 +1060,6 @@ mipi_in_panel: endpoint {
- 
- &uart3 {
- 	status = "okay";
--
--	modem {
--		compatible = "quectel,eg25";
--		char-device-name = "modem-power";
--
--		pinctrl-names = "default";
--		pinctrl-0 = <&modem_control_pins>;
--
--		power-supply = <&vcc_4g>;
--		vbus-supply = <&vcc_4g_5v>;
--
--		enable-gpios = <&gpio0 RK_PB0 GPIO_ACTIVE_HIGH>; // W_DISABLE#
--		reset-gpios = <&gpio3 RK_PB0 GPIO_ACTIVE_HIGH>;
--		status-gpios = <&gpio3 RK_PA6 GPIO_ACTIVE_HIGH>;
--		pwrkey-gpios = <&gpio0 RK_PB5 GPIO_ACTIVE_HIGH>;
--
--		host-ready-gpios = <&gpio0 RK_PB4 GPIO_ACTIVE_HIGH>; // apready
--		wakeup-gpios = <&gpio0 RK_PA1 GPIO_ACTIVE_HIGH>; // ri
--
--		dtr-gpios = <&gpio0 RK_PA3 GPIO_ACTIVE_HIGH>;
--		cts-gpios = <&gpio3 RK_PC0 GPIO_ACTIVE_HIGH>;
--		rts-gpios = <&gpio3 RK_PC1 GPIO_ACTIVE_HIGH>;
--
--		quectel,qdai = "3,0,0,4,0,0,1,1";
--	};
- };
- 
- &pmu_io_domains {
-@@ -1153,19 +1130,6 @@ vcc_4g_5v_en: vcc-4g-5v-en-pin {
- 		vcc_4g_en: vcc-4g-en-pin {
- 			rockchip,pins = <4 RK_PC7 RK_FUNC_GPIO &pcfg_pull_none>;
- 		};
--
--		modem_control_pins: modem-control-pins {
--			rockchip,pins =
--				<0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>,
--				<3 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>,
--				<3 RK_PA6 RK_FUNC_GPIO &pcfg_pull_none>,
--				<0 RK_PB5 RK_FUNC_GPIO &pcfg_pull_none>,
--				<0 RK_PB4 RK_FUNC_GPIO &pcfg_pull_none>,
--				<0 RK_PA1 RK_FUNC_GPIO &pcfg_pull_none>,
--				<0 RK_PA3 RK_FUNC_GPIO &pcfg_pull_none>,
--				<3 RK_PC0 RK_FUNC_GPIO &pcfg_pull_none>,
--				<3 RK_PC1 RK_FUNC_GPIO &pcfg_pull_none>;
--		};
- 	};
- 
- 	pmic {
--- 
-2.34.1
-
diff --git a/sys-kernel/pinephone-pro-sources/files/1500_XATTR_USER_PREFIX.patch b/sys-kernel/pinephone-pro-sources/files/1500_XATTR_USER_PREFIX.patch
deleted file mode 100644
index 245dcc2..0000000
--- a/sys-kernel/pinephone-pro-sources/files/1500_XATTR_USER_PREFIX.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-From: Anthony G. Basile <blueness@gentoo.org>
-
-This patch adds support for a restricted user-controlled namespace on
-tmpfs filesystem used to house PaX flags.  The namespace must be of the
-form user.pax.* and its value cannot exceed a size of 8 bytes.
-
-This is needed even on all Gentoo systems so that XATTR_PAX flags
-are preserved for users who might build packages using portage on
-a tmpfs system with a non-hardened kernel and then switch to a
-hardened kernel with XATTR_PAX enabled.
-
-The namespace is added to any user with Extended Attribute support
-enabled for tmpfs.  Users who do not enable xattrs will not have
-the XATTR_PAX flags preserved.
-
-diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
-index 1590c49..5eab462 100644
---- a/include/uapi/linux/xattr.h
-+++ b/include/uapi/linux/xattr.h
-@@ -73,5 +73,9 @@
- #define XATTR_POSIX_ACL_DEFAULT  "posix_acl_default"
- #define XATTR_NAME_POSIX_ACL_DEFAULT XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_DEFAULT
- 
-+/* User namespace */
-+#define XATTR_PAX_PREFIX XATTR_USER_PREFIX "pax."
-+#define XATTR_PAX_FLAGS_SUFFIX "flags"
-+#define XATTR_NAME_PAX_FLAGS XATTR_PAX_PREFIX XATTR_PAX_FLAGS_SUFFIX
- 
- #endif /* _UAPI_LINUX_XATTR_H */
---- a/mm/shmem.c	2020-05-04 15:30:27.042035334 -0400
-+++ b/mm/shmem.c	2020-05-04 15:34:57.013881725 -0400
-@@ -3238,6 +3238,14 @@ static int shmem_xattr_handler_set(const
- 	struct shmem_inode_info *info = SHMEM_I(inode);
- 
- 	name = xattr_full_name(handler, name);
-+
-+	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
-+		if (strcmp(name, XATTR_NAME_PAX_FLAGS))
-+			return -EOPNOTSUPP;
-+		if (size > 8)
-+			return -EINVAL;
-+	}
-+
- 	return simple_xattr_set(&info->xattrs, name, value, size, flags, NULL);
- }
- 
-@@ -3253,6 +3261,12 @@ static const struct xattr_handler shmem_
- 	.set = shmem_xattr_handler_set,
- };
- 
-+static const struct xattr_handler shmem_user_xattr_handler = {
-+	.prefix = XATTR_USER_PREFIX,
-+	.get = shmem_xattr_handler_get,
-+	.set = shmem_xattr_handler_set,
-+};
-+
- static const struct xattr_handler *shmem_xattr_handlers[] = {
- #ifdef CONFIG_TMPFS_POSIX_ACL
- 	&posix_acl_access_xattr_handler,
-@@ -3260,6 +3274,7 @@ static const struct xattr_handler *shmem
- #endif
- 	&shmem_security_xattr_handler,
- 	&shmem_trusted_xattr_handler,
-+	&shmem_user_xattr_handler,
- 	NULL
- };
- 
diff --git a/sys-kernel/pinephone-pro-sources/files/1510_fs-enable-link-security-restrictions-by-default.patch b/sys-kernel/pinephone-pro-sources/files/1510_fs-enable-link-security-restrictions-by-default.patch
deleted file mode 100644
index e8c3015..0000000
--- a/sys-kernel/pinephone-pro-sources/files/1510_fs-enable-link-security-restrictions-by-default.patch
+++ /dev/null
@@ -1,17 +0,0 @@
---- a/fs/namei.c	2022-01-23 13:02:27.876558299 -0500
-+++ b/fs/namei.c	2022-03-06 12:47:39.375719693 -0500
-@@ -1020,10 +1020,10 @@ static inline void put_link(struct namei
- 		path_put(&last->link);
- }
- 
--static int sysctl_protected_symlinks __read_mostly;
--static int sysctl_protected_hardlinks __read_mostly;
--static int sysctl_protected_fifos __read_mostly;
--static int sysctl_protected_regular __read_mostly;
-+static int sysctl_protected_symlinks __read_mostly = 1;
-+static int sysctl_protected_hardlinks __read_mostly = 1;
-+int sysctl_protected_fifos __read_mostly = 1;
-+int sysctl_protected_regular __read_mostly = 1;
- 
- #ifdef CONFIG_SYSCTL
- static struct ctl_table namei_sysctls[] = {
diff --git a/sys-kernel/pinephone-pro-sources/files/1700_sparc-address-warray-bound-warnings.patch b/sys-kernel/pinephone-pro-sources/files/1700_sparc-address-warray-bound-warnings.patch
deleted file mode 100644
index f939355..0000000
--- a/sys-kernel/pinephone-pro-sources/files/1700_sparc-address-warray-bound-warnings.patch
+++ /dev/null
@@ -1,17 +0,0 @@
---- a/arch/sparc/mm/init_64.c	2022-05-24 16:48:40.749677491 -0400
-+++ b/arch/sparc/mm/init_64.c	2022-05-24 16:55:15.511356945 -0400
-@@ -3052,11 +3052,11 @@ static inline resource_size_t compute_ke
- static void __init kernel_lds_init(void)
- {
- 	code_resource.start = compute_kern_paddr(_text);
--	code_resource.end   = compute_kern_paddr(_etext - 1);
-+	code_resource.end   = compute_kern_paddr(_etext) - 1;
- 	data_resource.start = compute_kern_paddr(_etext);
--	data_resource.end   = compute_kern_paddr(_edata - 1);
-+	data_resource.end   = compute_kern_paddr(_edata) - 1;
- 	bss_resource.start  = compute_kern_paddr(__bss_start);
--	bss_resource.end    = compute_kern_paddr(_end - 1);
-+	bss_resource.end    = compute_kern_paddr(_end) - 1;
- }
- 
- static int __init report_memory(void)
diff --git a/sys-kernel/pinephone-pro-sources/files/2000_BT-Check-key-sizes-only-if-Secure-Simple-Pairing-enabled.patch b/sys-kernel/pinephone-pro-sources/files/2000_BT-Check-key-sizes-only-if-Secure-Simple-Pairing-enabled.patch
deleted file mode 100644
index 394ad48..0000000
--- a/sys-kernel/pinephone-pro-sources/files/2000_BT-Check-key-sizes-only-if-Secure-Simple-Pairing-enabled.patch
+++ /dev/null
@@ -1,37 +0,0 @@
-The encryption is only mandatory to be enforced when both sides are using
-Secure Simple Pairing and this means the key size check makes only sense
-in that case.
-
-On legacy Bluetooth 2.0 and earlier devices like mice the encryption was
-optional and thus causing an issue if the key size check is not bound to
-using Secure Simple Pairing.
-
-Fixes: d5bb334a8e17 ("Bluetooth: Align minimum encryption key size for LE and BR/EDR connections")
-Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
-Cc: stable@vger.kernel.org
----
- net/bluetooth/hci_conn.c | 9 +++++++--
- 1 file changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
-index 3cf0764d5793..7516cdde3373 100644
---- a/net/bluetooth/hci_conn.c
-+++ b/net/bluetooth/hci_conn.c
-@@ -1272,8 +1272,13 @@ int hci_conn_check_link_mode(struct hci_conn *conn)
- 			return 0;
- 	}
- 
--	if (hci_conn_ssp_enabled(conn) &&
--	    !test_bit(HCI_CONN_ENCRYPT, &conn->flags))
-+	/* If Secure Simple Pairing is not enabled, then legacy connection
-+	 * setup is used and no encryption or key sizes can be enforced.
-+	 */
-+	if (!hci_conn_ssp_enabled(conn))
-+		return 1;
-+
-+	if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
- 		return 0;
- 
- 	/* The minimum encryption key size needs to be enforced by the
--- 
-2.20.1
diff --git a/sys-kernel/pinephone-pro-sources/files/2900_tmp513-Fix-build-issue-by-selecting-CONFIG_REG.patch b/sys-kernel/pinephone-pro-sources/files/2900_tmp513-Fix-build-issue-by-selecting-CONFIG_REG.patch
deleted file mode 100644
index 4335685..0000000
--- a/sys-kernel/pinephone-pro-sources/files/2900_tmp513-Fix-build-issue-by-selecting-CONFIG_REG.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-From dc328d75a6f37f4ff11a81ae16b1ec88c3197640 Mon Sep 17 00:00:00 2001
-From: Mike Pagano <mpagano@gentoo.org>
-Date: Mon, 23 Mar 2020 08:20:06 -0400
-Subject: [PATCH 1/1] This driver requires REGMAP_I2C to build.  Select it by
- default in Kconfig. Reported at gentoo bugzilla:
- https://bugs.gentoo.org/710790
-Cc: mpagano@gentoo.org
-
-Reported-by: Phil Stracchino <phils@caerllewys.net>
-
-Signed-off-by: Mike Pagano <mpagano@gentoo.org>
----
- drivers/hwmon/Kconfig | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
-index 47ac20aee06f..530b4f29ba85 100644
---- a/drivers/hwmon/Kconfig
-+++ b/drivers/hwmon/Kconfig
-@@ -1769,6 +1769,7 @@ config SENSORS_TMP421
- config SENSORS_TMP513
- 	tristate "Texas Instruments TMP513 and compatibles"
- 	depends on I2C
-+	select REGMAP_I2C
- 	help
- 	  If you say yes here you get support for Texas Instruments TMP512,
- 	  and TMP513 temperature and power supply sensor chips.
--- 
-2.24.1
-
diff --git a/sys-kernel/pinephone-pro-sources/files/2920_sign-file-patch-for-libressl.patch b/sys-kernel/pinephone-pro-sources/files/2920_sign-file-patch-for-libressl.patch
deleted file mode 100644
index e6ec017..0000000
--- a/sys-kernel/pinephone-pro-sources/files/2920_sign-file-patch-for-libressl.patch
+++ /dev/null
@@ -1,16 +0,0 @@
---- a/scripts/sign-file.c	2020-05-20 18:47:21.282820662 -0400
-+++ b/scripts/sign-file.c	2020-05-20 18:48:37.991081899 -0400
-@@ -41,9 +41,10 @@
-  * signing with anything other than SHA1 - so we're stuck with that if such is
-  * the case.
-  */
--#if defined(LIBRESSL_VERSION_NUMBER) || \
--	OPENSSL_VERSION_NUMBER < 0x10000000L || \
--	defined(OPENSSL_NO_CMS)
-+#if defined(OPENSSL_NO_CMS) || \
-+	( defined(LIBRESSL_VERSION_NUMBER) \
-+	&& (LIBRESSL_VERSION_NUMBER < 0x3010000fL) ) || \
-+	OPENSSL_VERSION_NUMBER < 0x10000000L
- #define USE_PKCS7
- #endif
- #ifndef USE_PKCS7
diff --git a/sys-kernel/pinephone-pro-sources/files/3000_Support-printing-firmware-info.patch b/sys-kernel/pinephone-pro-sources/files/3000_Support-printing-firmware-info.patch
deleted file mode 100644
index a630cfb..0000000
--- a/sys-kernel/pinephone-pro-sources/files/3000_Support-printing-firmware-info.patch
+++ /dev/null
@@ -1,14 +0,0 @@
---- a/drivers/base/firmware_loader/main.c	2021-08-24 15:42:07.025482085 -0400
-+++ b/drivers/base/firmware_loader/main.c	2021-08-24 15:44:40.782975313 -0400
-@@ -809,6 +809,11 @@ _request_firmware(const struct firmware
- 
- 	ret = _request_firmware_prepare(&fw, name, device, buf, size,
- 					offset, opt_flags);
-+
-+#ifdef CONFIG_GENTOO_PRINT_FIRMWARE_INFO
-+        printk(KERN_NOTICE "Loading firmware: %s\n", name);
-+#endif
-+
- 	if (ret <= 0) /* error or already assigned */
- 		goto out;
- 
diff --git a/sys-kernel/pinephone-pro-sources/files/4567_distro-Gentoo-Kconfig.patch b/sys-kernel/pinephone-pro-sources/files/4567_distro-Gentoo-Kconfig.patch
deleted file mode 100644
index 0a38098..0000000
--- a/sys-kernel/pinephone-pro-sources/files/4567_distro-Gentoo-Kconfig.patch
+++ /dev/null
@@ -1,341 +0,0 @@
---- a/Kconfig	2022-05-11 13:20:07.110347567 -0400
-+++ b/Kconfig	2022-05-11 13:21:12.127174393 -0400
-@@ -30,3 +30,5 @@ source "lib/Kconfig"
- source "lib/Kconfig.debug"
- 
- source "Documentation/Kconfig"
-+
-+source "distro/Kconfig"
---- /dev/null	2022-05-10 13:47:17.750578524 -0400
-+++ b/distro/Kconfig	2022-05-11 13:21:20.540529032 -0400
-@@ -0,0 +1,290 @@
-+menu "Gentoo Linux"
-+
-+config GENTOO_LINUX
-+	bool "Gentoo Linux support"
-+
-+	default y
-+
-+	select CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
-+
-+	help
-+		In order to boot Gentoo Linux a minimal set of config settings needs to
-+		be enabled in the kernel; to avoid the users from having to enable them
-+		manually as part of a Gentoo Linux installation or a new clean config,
-+		we enable these config settings by default for convenience.
-+
-+		See the settings that become available for more details and fine-tuning.
-+
-+config GENTOO_LINUX_UDEV
-+	bool "Linux dynamic and persistent device naming (userspace devfs) support"
-+
-+	depends on GENTOO_LINUX
-+	default y if GENTOO_LINUX
-+
-+	select DEVTMPFS
-+	select TMPFS
-+	select UNIX
-+
-+	select MMU
-+	select SHMEM
-+
-+	help
-+		In order to boot Gentoo Linux a minimal set of config settings needs to
-+		be enabled in the kernel; to avoid the users from having to enable them
-+		manually as part of a Gentoo Linux installation or a new clean config,
-+		we enable these config settings by default for convenience.
-+
-+		Currently this only selects TMPFS, DEVTMPFS and their dependencies.
-+		TMPFS is enabled to maintain a tmpfs file system at /dev/shm, /run and
-+		/sys/fs/cgroup; DEVTMPFS to maintain a devtmpfs file system at /dev.
-+
-+		Some of these are critical files that need to be available early in the
-+		boot process; if not available, it causes sysfs and udev to malfunction.
-+
-+		To ensure Gentoo Linux boots, it is best to leave this setting enabled;
-+		if you run a custom setup, you could consider whether to disable this.
-+
-+config GENTOO_LINUX_PORTAGE
-+	bool "Select options required by Portage features"
-+
-+	depends on GENTOO_LINUX
-+	default y if GENTOO_LINUX
-+
-+	select CGROUPS
-+	select NAMESPACES
-+	select IPC_NS
-+	select NET_NS
-+	select PID_NS
-+	select SYSVIPC
-+	select USER_NS
-+	select UTS_NS
-+
-+	help
-+		This enables options required by various Portage FEATURES.
-+		Currently this selects:
-+
-+		CGROUPS     (required for FEATURES=cgroup)
-+		IPC_NS      (required for FEATURES=ipc-sandbox)
-+		NET_NS      (required for FEATURES=network-sandbox)
-+		PID_NS		(required for FEATURES=pid-sandbox)
-+		SYSVIPC     (required by IPC_NS)
-+   
-+
-+		It is highly recommended that you leave this enabled as these FEATURES
-+		are, or will soon be, enabled by default.
-+
-+menu "Support for init systems, system and service managers"
-+	visible if GENTOO_LINUX
-+
-+config GENTOO_LINUX_INIT_SCRIPT
-+	bool "OpenRC, runit and other script based systems and managers"
-+
-+	default y if GENTOO_LINUX
-+
-+	depends on GENTOO_LINUX
-+
-+	select BINFMT_SCRIPT
-+	select CGROUPS
-+	select EPOLL
-+	select FILE_LOCKING
-+	select INOTIFY_USER
-+	select SIGNALFD
-+	select TIMERFD
-+
-+	help
-+		The init system is the first thing that loads after the kernel booted.
-+
-+		These config settings allow you to select which init systems to support;
-+		instead of having to select all the individual settings all over the
-+		place, these settings allows you to select all the settings at once.
-+
-+		This particular setting enables all the known requirements for OpenRC,
-+		runit and similar script based systems and managers.
-+
-+		If you are unsure about this, it is best to leave this setting enabled.
-+
-+config GENTOO_LINUX_INIT_SYSTEMD
-+	bool "systemd"
-+
-+	default n
-+
-+	depends on GENTOO_LINUX && GENTOO_LINUX_UDEV
-+
-+	select AUTOFS_FS
-+	select BLK_DEV_BSG
-+	select BPF_SYSCALL
-+	select CGROUP_BPF
-+	select CGROUPS
-+	select CRYPTO_HMAC 
-+	select CRYPTO_SHA256
-+	select CRYPTO_USER_API_HASH
-+	select DEVPTS_MULTIPLE_INSTANCES
-+	select DMIID if X86_32 || X86_64 || X86
-+	select EPOLL
-+	select FANOTIFY
-+	select FHANDLE
-+	select FILE_LOCKING
-+	select INOTIFY_USER
-+	select IPV6
-+	select KCMP
-+	select NET
-+	select NET_NS
-+	select PROC_FS
-+	select SECCOMP if HAVE_ARCH_SECCOMP
-+	select SECCOMP_FILTER if HAVE_ARCH_SECCOMP_FILTER
-+	select SIGNALFD
-+	select SYSFS
-+	select TIMERFD
-+	select TMPFS_POSIX_ACL
-+	select TMPFS_XATTR
-+
-+	select ANON_INODES
-+	select BLOCK
-+	select EVENTFD
-+	select FSNOTIFY
-+	select INET
-+	select NLATTR
-+
-+	help
-+		The init system is the first thing that loads after the kernel booted.
-+
-+		These config settings allow you to select which init systems to support;
-+		instead of having to select all the individual settings all over the
-+		place, these settings allows you to select all the settings at once.
-+
-+		This particular setting enables all the known requirements for systemd;
-+		it also enables suggested optional settings, as the package suggests to.
-+
-+endmenu
-+
-+menuconfig GENTOO_KERNEL_SELF_PROTECTION
-+	bool "Kernel Self Protection Project"
-+	depends on GENTOO_LINUX
-+	help
-+		Recommended Kernel settings based on the suggestions from the Kernel Self Protection Project
-+		See: https://kernsec.org/wiki/index.php/Kernel_Self_Protection_Project/Recommended_Settings
-+		Note, there may be additional settings for which the CONFIG_ setting is invisible in menuconfig due 
-+		to unmet dependencies. Search for GENTOO_KERNEL_SELF_PROTECTION_COMMON and search for 
-+		GENTOO_KERNEL_SELF_PROTECTION_{X86_64, ARM64, X86_32, ARM} for dependency information on your 
-+		specific architecture.
-+		Note 2: Please see the URL above for numeric settings, e.g. CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 
-+		for X86_64
-+
-+if GENTOO_KERNEL_SELF_PROTECTION
-+config GENTOO_KERNEL_SELF_PROTECTION_COMMON
-+	bool "Enable Kernel Self Protection Project Recommendations"
-+
-+	depends on GENTOO_LINUX && !ACPI_CUSTOM_METHOD && !COMPAT_BRK && !PROC_KCORE && !COMPAT_VDSO && !KEXEC && !HIBERNATION && !LEGACY_PTYS && !X86_X32 && !MODIFY_LDT_SYSCALL && GCC_PLUGINS && !IOMMU_DEFAULT_DMA_LAZY && !IOMMU_DEFAULT_PASSTHROUGH && IOMMU_DEFAULT_DMA_STRICT
-+
-+	select BUG
-+	select STRICT_KERNEL_RWX
-+	select DEBUG_WX
-+	select STACKPROTECTOR
-+	select STACKPROTECTOR_STRONG
-+	select STRICT_DEVMEM if DEVMEM=y
-+	select IO_STRICT_DEVMEM if DEVMEM=y
-+	select SYN_COOKIES
-+	select DEBUG_CREDENTIALS
-+	select DEBUG_NOTIFIERS
-+	select DEBUG_LIST
-+	select DEBUG_SG
-+	select HARDENED_USERCOPY if HAVE_HARDENED_USERCOPY_ALLOCATOR=y
-+	select KFENCE if HAVE_ARCH_KFENCE && (!SLAB || SLUB)
-+	select RANDOMIZE_KSTACK_OFFSET_DEFAULT if HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET && (INIT_STACK_NONE || !CC_IS_CLANG || CLANG_VERSION>=140000)
-+	select SCHED_CORE if SCHED_SMT
-+	select BUG_ON_DATA_CORRUPTION
-+	select SCHED_STACK_END_CHECK
-+	select SECCOMP if HAVE_ARCH_SECCOMP
-+	select SECCOMP_FILTER if HAVE_ARCH_SECCOMP_FILTER
-+	select SECURITY_YAMA
-+	select SLAB_FREELIST_RANDOM
-+	select SLAB_FREELIST_HARDENED
-+	select SHUFFLE_PAGE_ALLOCATOR
-+	select SLUB_DEBUG
-+	select PAGE_POISONING
-+	select PAGE_POISONING_NO_SANITY
-+	select PAGE_POISONING_ZERO
-+	select INIT_ON_ALLOC_DEFAULT_ON
-+	select INIT_ON_FREE_DEFAULT_ON
-+	select REFCOUNT_FULL
-+	select FORTIFY_SOURCE
-+	select SECURITY_DMESG_RESTRICT
-+	select PANIC_ON_OOPS
-+	select GCC_PLUGIN_LATENT_ENTROPY
-+	select GCC_PLUGIN_STRUCTLEAK
-+	select GCC_PLUGIN_STRUCTLEAK_BYREF_ALL
-+	select GCC_PLUGIN_RANDSTRUCT
-+	select GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
-+	select ZERO_CALL_USED_REGS if CC_HAS_ZERO_CALL_USED_REGS
-+
-+	help
-+		Search for GENTOO_KERNEL_SELF_PROTECTION_{X86_64, ARM64, X86_32, ARM} for dependency 
-+		information on your specific architecture.  Note 2: Please see the URL above for 
-+		numeric settings, e.g. CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 for X86_64
-+
-+config GENTOO_KERNEL_SELF_PROTECTION_X86_64
-+	bool "X86_64 KSPP Settings" if GENTOO_KERNEL_SELF_PROTECTION_COMMON
-+
-+	depends on !X86_MSR && X86_64 && GENTOO_KERNEL_SELF_PROTECTION
-+	default n
-+	
-+	select RANDOMIZE_BASE
-+	select RANDOMIZE_MEMORY
-+	select RELOCATABLE
-+	select LEGACY_VSYSCALL_NONE
-+ 	select PAGE_TABLE_ISOLATION
-+	select GCC_PLUGIN_STACKLEAK
-+	select VMAP_STACK
-+
-+
-+config GENTOO_KERNEL_SELF_PROTECTION_ARM64
-+	bool "ARM64 KSPP Settings"
-+
-+	depends on ARM64
-+	default n
-+
-+	select RANDOMIZE_BASE
-+	select RELOCATABLE
-+	select ARM64_SW_TTBR0_PAN
-+	select CONFIG_UNMAP_KERNEL_AT_EL0
-+	select GCC_PLUGIN_STACKLEAK
-+	select VMAP_STACK
-+
-+config GENTOO_KERNEL_SELF_PROTECTION_X86_32
-+	bool "X86_32 KSPP Settings"
-+
-+	depends on !X86_MSR && !MODIFY_LDT_SYSCALL && !M486 && X86_32
-+	default n
-+
-+	select HIGHMEM64G
-+	select X86_PAE
-+	select RANDOMIZE_BASE
-+	select RELOCATABLE
-+	select PAGE_TABLE_ISOLATION
-+
-+config GENTOO_KERNEL_SELF_PROTECTION_ARM
-+	bool "ARM KSPP Settings"
-+
-+	depends on !OABI_COMPAT && ARM
-+	default n
-+
-+	select VMSPLIT_3G
-+	select STRICT_MEMORY_RWX
-+	select CPU_SW_DOMAIN_PAN
-+
-+endif
-+
-+config GENTOO_PRINT_FIRMWARE_INFO
-+	bool "Print firmware information that the kernel attempts to load"
-+
-+	depends on GENTOO_LINUX
-+	default y
-+
-+	help
-+		Enable this option to print information about firmware that the kernel
-+		is attempting to load.  This information can be accessible via the
-+		dmesg command-line utility
-+
-+		See the settings that become available for more details and fine-tuning.
-+
-+endmenu
-diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig
-index 9e921fc72..f29bc13fa 100644
---- a/security/selinux/Kconfig
-+++ b/security/selinux/Kconfig
-@@ -26,6 +26,7 @@ config SECURITY_SELINUX_BOOTPARAM
- config SECURITY_SELINUX_DISABLE
- 	bool "NSA SELinux runtime disable"
- 	depends on SECURITY_SELINUX
-+	depends on !GENTOO_KERNEL_SELF_PROTECTION
- 	select SECURITY_WRITABLE_HOOKS
- 	default n
- 	help
--- 
-2.31.1
-
-From bd3ff0b16792c18c0614c2b95e148943209f460a Mon Sep 17 00:00:00 2001
-From: Georgy Yakovlev <gyakovlev@gentoo.org>
-Date: Tue, 8 Jun 2021 13:59:57 -0700
-Subject: [PATCH 2/2] set DEFAULT_MMAP_MIN_ADDR by default
-
----
- mm/Kconfig | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/mm/Kconfig b/mm/Kconfig
-index 24c045b24..e13fc740c 100644
---- a/mm/Kconfig
-+++ b/mm/Kconfig
-@@ -321,6 +321,8 @@ config KSM
- config DEFAULT_MMAP_MIN_ADDR
- 	int "Low address space to protect from user allocation"
- 	depends on MMU
-+	default 65536 if ( X86_64 || X86_32 || PPC64 || IA64 ) && GENTOO_KERNEL_SELF_PROTECTION
-+	default 32768 if ( ARM64 || ARM ) && GENTOO_KERNEL_SELF_PROTECTION
- 	default 4096
- 	help
- 	  This is the portion of low virtual memory which should be protected
--- 
-2.31.1
-```
diff --git a/sys-kernel/pinephone-pro-sources/files/5010_enable-cpu-optimizations-universal.patch b/sys-kernel/pinephone-pro-sources/files/5010_enable-cpu-optimizations-universal.patch
deleted file mode 100644
index b9c03cb..0000000
--- a/sys-kernel/pinephone-pro-sources/files/5010_enable-cpu-optimizations-universal.patch
+++ /dev/null
@@ -1,675 +0,0 @@
-From b5892719c43f739343c628e3d357471a3bdaa368 Mon Sep 17 00:00:00 2001
-From: graysky <graysky@archlinux.us>
-Date: Tue, 15 Mar 2022 05:58:43 -0400
-Subject: [PATCH] more uarches for kernel 5.17+
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-FEATURES
-This patch adds additional CPU options to the Linux kernel accessible under:
- Processor type and features  --->
-  Processor family --->
-
-With the release of gcc 11.1 and clang 12.0, several generic 64-bit levels are
-offered which are good for supported Intel or AMD CPUs:
-• x86-64-v2
-• x86-64-v3
-• x86-64-v4
-
-Users of glibc 2.33 and above can see which level is supported by current
-hardware by running:
-  /lib/ld-linux-x86-64.so.2 --help | grep supported
-
-Alternatively, compare the flags from /proc/cpuinfo to this list.[1]
-
-CPU-specific microarchitectures include:
-• AMD Improved K8-family
-• AMD K10-family
-• AMD Family 10h (Barcelona)
-• AMD Family 14h (Bobcat)
-• AMD Family 16h (Jaguar)
-• AMD Family 15h (Bulldozer)
-• AMD Family 15h (Piledriver)
-• AMD Family 15h (Steamroller)
-• AMD Family 15h (Excavator)
-• AMD Family 17h (Zen)
-• AMD Family 17h (Zen 2)
-• AMD Family 19h (Zen 3)†
-• Intel Silvermont low-power processors
-• Intel Goldmont low-power processors (Apollo Lake and Denverton)
-• Intel Goldmont Plus low-power processors (Gemini Lake)
-• Intel 1st Gen Core i3/i5/i7 (Nehalem)
-• Intel 1.5 Gen Core i3/i5/i7 (Westmere)
-• Intel 2nd Gen Core i3/i5/i7 (Sandybridge)
-• Intel 3rd Gen Core i3/i5/i7 (Ivybridge)
-• Intel 4th Gen Core i3/i5/i7 (Haswell)
-• Intel 5th Gen Core i3/i5/i7 (Broadwell)
-• Intel 6th Gen Core i3/i5/i7 (Skylake)
-• Intel 6th Gen Core i7/i9 (Skylake X)
-• Intel 8th Gen Core i3/i5/i7 (Cannon Lake)
-• Intel 10th Gen Core i7/i9 (Ice Lake)
-• Intel Xeon (Cascade Lake)
-• Intel Xeon (Cooper Lake)*
-• Intel 3rd Gen 10nm++ i3/i5/i7/i9-family (Tiger Lake)*
-• Intel 3rd Gen 10nm++ Xeon (Sapphire Rapids)‡
-• Intel 11th Gen i3/i5/i7/i9-family (Rocket Lake)‡
-• Intel 12th Gen i3/i5/i7/i9-family (Alder Lake)‡
-
-Notes: If not otherwise noted, gcc >=9.1 is required for support.
-       *Requires gcc >=10.1 or clang >=10.0
-       †Required gcc >=10.3 or clang >=12.0
-       ‡Required gcc >=11.1 or clang >=12.0
-
-It also offers to compile passing the 'native' option which, "selects the CPU
-to generate code for at compilation time by determining the processor type of
-the compiling machine. Using -march=native enables all instruction subsets
-supported by the local machine and will produce code optimized for the local
-machine under the constraints of the selected instruction set."[2]
-
-Users of Intel CPUs should select the 'Intel-Native' option and users of AMD
-CPUs should select the 'AMD-Native' option.
-
-MINOR NOTES RELATING TO INTEL ATOM PROCESSORS
-This patch also changes -march=atom to -march=bonnell in accordance with the
-gcc v4.9 changes. Upstream is using the deprecated -match=atom flags when I
-believe it should use the newer -march=bonnell flag for atom processors.[3]
-
-It is not recommended to compile on Atom-CPUs with the 'native' option.[4] The
-recommendation is to use the 'atom' option instead.
-
-BENEFITS
-Small but real speed increases are measurable using a make endpoint comparing
-a generic kernel to one built with one of the respective microarchs.
-
-See the following experimental evidence supporting this statement:
-https://github.com/graysky2/kernel_gcc_patch
-
-REQUIREMENTS
-linux version 5.17+
-gcc version >=9.0 or clang version >=9.0
-
-ACKNOWLEDGMENTS
-This patch builds on the seminal work by Jeroen.[5]
-
-REFERENCES
-1.  https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9
-2.  https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html#index-x86-Options
-3.  https://bugzilla.kernel.org/show_bug.cgi?id=77461
-4.  https://github.com/graysky2/kernel_gcc_patch/issues/15
-5.  http://www.linuxforge.net/docs/linux/linux-gcc.php
-
-Signed-off-by: graysky <graysky@archlinux.us>
----
- arch/x86/Kconfig.cpu            | 332 ++++++++++++++++++++++++++++++--
- arch/x86/Makefile               |  40 +++-
- arch/x86/include/asm/vermagic.h |  66 +++++++
- 3 files changed, 424 insertions(+), 14 deletions(-)
-
-diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
-index 542377cd419d..22b919cdb6d1 100644
---- a/arch/x86/Kconfig.cpu
-+++ b/arch/x86/Kconfig.cpu
-@@ -157,7 +157,7 @@ config MPENTIUM4
- 
- 
- config MK6
--	bool "K6/K6-II/K6-III"
-+	bool "AMD K6/K6-II/K6-III"
- 	depends on X86_32
- 	help
- 	  Select this for an AMD K6-family processor.  Enables use of
-@@ -165,7 +165,7 @@ config MK6
- 	  flags to GCC.
- 
- config MK7
--	bool "Athlon/Duron/K7"
-+	bool "AMD Athlon/Duron/K7"
- 	depends on X86_32
- 	help
- 	  Select this for an AMD Athlon K7-family processor.  Enables use of
-@@ -173,12 +173,98 @@ config MK7
- 	  flags to GCC.
- 
- config MK8
--	bool "Opteron/Athlon64/Hammer/K8"
-+	bool "AMD Opteron/Athlon64/Hammer/K8"
- 	help
- 	  Select this for an AMD Opteron or Athlon64 Hammer-family processor.
- 	  Enables use of some extended instructions, and passes appropriate
- 	  optimization flags to GCC.
- 
-+config MK8SSE3
-+	bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3"
-+	help
-+	  Select this for improved AMD Opteron or Athlon64 Hammer-family processors.
-+	  Enables use of some extended instructions, and passes appropriate
-+	  optimization flags to GCC.
-+
-+config MK10
-+	bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
-+	help
-+	  Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
-+	  Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
-+	  Enables use of some extended instructions, and passes appropriate
-+	  optimization flags to GCC.
-+
-+config MBARCELONA
-+	bool "AMD Barcelona"
-+	help
-+	  Select this for AMD Family 10h Barcelona processors.
-+
-+	  Enables -march=barcelona
-+
-+config MBOBCAT
-+	bool "AMD Bobcat"
-+	help
-+	  Select this for AMD Family 14h Bobcat processors.
-+
-+	  Enables -march=btver1
-+
-+config MJAGUAR
-+	bool "AMD Jaguar"
-+	help
-+	  Select this for AMD Family 16h Jaguar processors.
-+
-+	  Enables -march=btver2
-+
-+config MBULLDOZER
-+	bool "AMD Bulldozer"
-+	help
-+	  Select this for AMD Family 15h Bulldozer processors.
-+
-+	  Enables -march=bdver1
-+
-+config MPILEDRIVER
-+	bool "AMD Piledriver"
-+	help
-+	  Select this for AMD Family 15h Piledriver processors.
-+
-+	  Enables -march=bdver2
-+
-+config MSTEAMROLLER
-+	bool "AMD Steamroller"
-+	help
-+	  Select this for AMD Family 15h Steamroller processors.
-+
-+	  Enables -march=bdver3
-+
-+config MEXCAVATOR
-+	bool "AMD Excavator"
-+	help
-+	  Select this for AMD Family 15h Excavator processors.
-+
-+	  Enables -march=bdver4
-+
-+config MZEN
-+	bool "AMD Zen"
-+	help
-+	  Select this for AMD Family 17h Zen processors.
-+
-+	  Enables -march=znver1
-+
-+config MZEN2
-+	bool "AMD Zen 2"
-+	help
-+	  Select this for AMD Family 17h Zen 2 processors.
-+
-+	  Enables -march=znver2
-+
-+config MZEN3
-+	bool "AMD Zen 3"
-+	depends on (CC_IS_GCC && GCC_VERSION >= 100300) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	help
-+	  Select this for AMD Family 19h Zen 3 processors.
-+
-+	  Enables -march=znver3
-+
- config MCRUSOE
- 	bool "Crusoe"
- 	depends on X86_32
-@@ -270,7 +356,7 @@ config MPSC
- 	  in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
- 
- config MCORE2
--	bool "Core 2/newer Xeon"
-+	bool "Intel Core 2"
- 	help
- 
- 	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
-@@ -278,6 +364,8 @@ config MCORE2
- 	  family in /proc/cpuinfo. Newer ones have 6 and older ones 15
- 	  (not a typo)
- 
-+	  Enables -march=core2
-+
- config MATOM
- 	bool "Intel Atom"
- 	help
-@@ -287,6 +375,182 @@ config MATOM
- 	  accordingly optimized code. Use a recent GCC with specific Atom
- 	  support in order to fully benefit from selecting this option.
- 
-+config MNEHALEM
-+	bool "Intel Nehalem"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 1st Gen Core processors in the Nehalem family.
-+
-+	  Enables -march=nehalem
-+
-+config MWESTMERE
-+	bool "Intel Westmere"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for the Intel Westmere formerly Nehalem-C family.
-+
-+	  Enables -march=westmere
-+
-+config MSILVERMONT
-+	bool "Intel Silvermont"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for the Intel Silvermont platform.
-+
-+	  Enables -march=silvermont
-+
-+config MGOLDMONT
-+	bool "Intel Goldmont"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for the Intel Goldmont platform including Apollo Lake and Denverton.
-+
-+	  Enables -march=goldmont
-+
-+config MGOLDMONTPLUS
-+	bool "Intel Goldmont Plus"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for the Intel Goldmont Plus platform including Gemini Lake.
-+
-+	  Enables -march=goldmont-plus
-+
-+config MSANDYBRIDGE
-+	bool "Intel Sandy Bridge"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 2nd Gen Core processors in the Sandy Bridge family.
-+
-+	  Enables -march=sandybridge
-+
-+config MIVYBRIDGE
-+	bool "Intel Ivy Bridge"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 3rd Gen Core processors in the Ivy Bridge family.
-+
-+	  Enables -march=ivybridge
-+
-+config MHASWELL
-+	bool "Intel Haswell"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 4th Gen Core processors in the Haswell family.
-+
-+	  Enables -march=haswell
-+
-+config MBROADWELL
-+	bool "Intel Broadwell"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 5th Gen Core processors in the Broadwell family.
-+
-+	  Enables -march=broadwell
-+
-+config MSKYLAKE
-+	bool "Intel Skylake"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 6th Gen Core processors in the Skylake family.
-+
-+	  Enables -march=skylake
-+
-+config MSKYLAKEX
-+	bool "Intel Skylake X"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 6th Gen Core processors in the Skylake X family.
-+
-+	  Enables -march=skylake-avx512
-+
-+config MCANNONLAKE
-+	bool "Intel Cannon Lake"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 8th Gen Core processors
-+
-+	  Enables -march=cannonlake
-+
-+config MICELAKE
-+	bool "Intel Ice Lake"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for 10th Gen Core processors in the Ice Lake family.
-+
-+	  Enables -march=icelake-client
-+
-+config MCASCADELAKE
-+	bool "Intel Cascade Lake"
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for Xeon processors in the Cascade Lake family.
-+
-+	  Enables -march=cascadelake
-+
-+config MCOOPERLAKE
-+	bool "Intel Cooper Lake"
-+	depends on (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for Xeon processors in the Cooper Lake family.
-+
-+	  Enables -march=cooperlake
-+
-+config MTIGERLAKE
-+	bool "Intel Tiger Lake"
-+	depends on  (CC_IS_GCC && GCC_VERSION > 100100) || (CC_IS_CLANG && CLANG_VERSION >= 100000)
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for third-generation 10 nm process processors in the Tiger Lake family.
-+
-+	  Enables -march=tigerlake
-+
-+config MSAPPHIRERAPIDS
-+	bool "Intel Sapphire Rapids"
-+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for third-generation 10 nm process processors in the Sapphire Rapids family.
-+
-+	  Enables -march=sapphirerapids
-+
-+config MROCKETLAKE
-+	bool "Intel Rocket Lake"
-+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for eleventh-generation processors in the Rocket Lake family.
-+
-+	  Enables -march=rocketlake
-+
-+config MALDERLAKE
-+	bool "Intel Alder Lake"
-+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	select X86_P6_NOP
-+	help
-+
-+	  Select this for twelfth-generation processors in the Alder Lake family.
-+
-+	  Enables -march=alderlake
-+
- config GENERIC_CPU
- 	bool "Generic-x86-64"
- 	depends on X86_64
-@@ -294,6 +558,50 @@ config GENERIC_CPU
- 	  Generic x86-64 CPU.
- 	  Run equally well on all x86-64 CPUs.
- 
-+config GENERIC_CPU2
-+	bool "Generic-x86-64-v2"
-+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	depends on X86_64
-+	help
-+	  Generic x86-64 CPU.
-+	  Run equally well on all x86-64 CPUs with min support of x86-64-v2.
-+
-+config GENERIC_CPU3
-+	bool "Generic-x86-64-v3"
-+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	depends on X86_64
-+	help
-+	  Generic x86-64-v3 CPU with v3 instructions.
-+	  Run equally well on all x86-64 CPUs with min support of x86-64-v3.
-+
-+config GENERIC_CPU4
-+	bool "Generic-x86-64-v4"
-+	depends on (CC_IS_GCC && GCC_VERSION > 110000) || (CC_IS_CLANG && CLANG_VERSION >= 120000)
-+	depends on X86_64
-+	help
-+	  Generic x86-64 CPU with v4 instructions.
-+	  Run equally well on all x86-64 CPUs with min support of x86-64-v4.
-+
-+config MNATIVE_INTEL
-+	bool "Intel-Native optimizations autodetected by the compiler"
-+	help
-+
-+	  Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
-+	  the optimum settings to use based on your processor. Do NOT use this
-+	  for AMD CPUs.  Intel Only!
-+
-+	  Enables -march=native
-+
-+config MNATIVE_AMD
-+	bool "AMD-Native optimizations autodetected by the compiler"
-+	help
-+
-+	  Clang 3.8, GCC 4.2 and above support -march=native, which automatically detects
-+	  the optimum settings to use based on your processor. Do NOT use this
-+	  for Intel CPUs.  AMD Only!
-+
-+	  Enables -march=native
-+
- endchoice
- 
- config X86_GENERIC
-@@ -318,7 +626,7 @@ config X86_INTERNODE_CACHE_SHIFT
- config X86_L1_CACHE_SHIFT
- 	int
- 	default "7" if MPENTIUM4 || MPSC
--	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
-+	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD || X86_GENERIC || GENERIC_CPU || GENERIC_CPU2 || GENERIC_CPU3 || GENERIC_CPU4
- 	default "4" if MELAN || M486SX || M486 || MGEODEGX1
- 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
- 
-@@ -336,11 +644,11 @@ config X86_ALIGNMENT_16
- 
- config X86_INTEL_USERCOPY
- 	def_bool y
--	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
-+	depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL
- 
- config X86_USE_PPRO_CHECKSUM
- 	def_bool y
--	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
-+	depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD
- 
- #
- # P6_NOPs are a relatively minor optimization that require a family >=
-@@ -356,26 +664,26 @@ config X86_USE_PPRO_CHECKSUM
- config X86_P6_NOP
- 	def_bool y
- 	depends on X86_64
--	depends on (MCORE2 || MPENTIUM4 || MPSC)
-+	depends on (MCORE2 || MPENTIUM4 || MPSC || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL)
- 
- config X86_TSC
- 	def_bool y
--	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
-+	depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD) || X86_64
- 
- config X86_CMPXCHG64
- 	def_bool y
--	depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8
-+	depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD
- 
- # this should be set for all -march=.. options where the compiler
- # generates cmov.
- config X86_CMOV
- 	def_bool y
--	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
-+	depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD)
- 
- config X86_MINIMUM_CPU_FAMILY
- 	int
- 	default "64" if X86_64
--	default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
-+	default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8 ||  MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MROCKETLAKE || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD)
- 	default "5" if X86_32 && X86_CMPXCHG64
- 	default "4"
- 
-diff --git a/arch/x86/Makefile b/arch/x86/Makefile
-index e84cdd409b64..7d3bbf060079 100644
---- a/arch/x86/Makefile
-+++ b/arch/x86/Makefile
-@@ -131,8 +131,44 @@ else
-         # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
-         cflags-$(CONFIG_MK8)		+= -march=k8
-         cflags-$(CONFIG_MPSC)		+= -march=nocona
--        cflags-$(CONFIG_MCORE2)		+= -march=core2
--        cflags-$(CONFIG_MATOM)		+= -march=atom
-+        cflags-$(CONFIG_MK8SSE3)	+= -march=k8-sse3
-+        cflags-$(CONFIG_MK10) 		+= -march=amdfam10
-+        cflags-$(CONFIG_MBARCELONA) 	+= -march=barcelona
-+        cflags-$(CONFIG_MBOBCAT) 	+= -march=btver1
-+        cflags-$(CONFIG_MJAGUAR) 	+= -march=btver2
-+        cflags-$(CONFIG_MBULLDOZER) 	+= -march=bdver1
-+        cflags-$(CONFIG_MPILEDRIVER)	+= -march=bdver2 -mno-tbm
-+        cflags-$(CONFIG_MSTEAMROLLER) 	+= -march=bdver3 -mno-tbm
-+        cflags-$(CONFIG_MEXCAVATOR) 	+= -march=bdver4 -mno-tbm
-+        cflags-$(CONFIG_MZEN) 		+= -march=znver1
-+        cflags-$(CONFIG_MZEN2) 	+= -march=znver2
-+        cflags-$(CONFIG_MZEN3) 	+= -march=znver3
-+        cflags-$(CONFIG_MNATIVE_INTEL) += -march=native
-+        cflags-$(CONFIG_MNATIVE_AMD) 	+= -march=native
-+        cflags-$(CONFIG_MATOM) 	+= -march=bonnell
-+        cflags-$(CONFIG_MCORE2) 	+= -march=core2
-+        cflags-$(CONFIG_MNEHALEM) 	+= -march=nehalem
-+        cflags-$(CONFIG_MWESTMERE) 	+= -march=westmere
-+        cflags-$(CONFIG_MSILVERMONT) 	+= -march=silvermont
-+        cflags-$(CONFIG_MGOLDMONT) 	+= -march=goldmont
-+        cflags-$(CONFIG_MGOLDMONTPLUS) += -march=goldmont-plus
-+        cflags-$(CONFIG_MSANDYBRIDGE) 	+= -march=sandybridge
-+        cflags-$(CONFIG_MIVYBRIDGE) 	+= -march=ivybridge
-+        cflags-$(CONFIG_MHASWELL) 	+= -march=haswell
-+        cflags-$(CONFIG_MBROADWELL) 	+= -march=broadwell
-+        cflags-$(CONFIG_MSKYLAKE) 	+= -march=skylake
-+        cflags-$(CONFIG_MSKYLAKEX) 	+= -march=skylake-avx512
-+        cflags-$(CONFIG_MCANNONLAKE) 	+= -march=cannonlake
-+        cflags-$(CONFIG_MICELAKE) 	+= -march=icelake-client
-+        cflags-$(CONFIG_MCASCADELAKE) 	+= -march=cascadelake
-+        cflags-$(CONFIG_MCOOPERLAKE) 	+= -march=cooperlake
-+        cflags-$(CONFIG_MTIGERLAKE) 	+= -march=tigerlake
-+        cflags-$(CONFIG_MSAPPHIRERAPIDS) += -march=sapphirerapids
-+        cflags-$(CONFIG_MROCKETLAKE) 	+= -march=rocketlake
-+        cflags-$(CONFIG_MALDERLAKE) 	+= -march=alderlake
-+        cflags-$(CONFIG_GENERIC_CPU2) 	+= -march=x86-64-v2
-+        cflags-$(CONFIG_GENERIC_CPU3) 	+= -march=x86-64-v3
-+        cflags-$(CONFIG_GENERIC_CPU4) 	+= -march=x86-64-v4
-         cflags-$(CONFIG_GENERIC_CPU)	+= -mtune=generic
-         KBUILD_CFLAGS += $(cflags-y)
- 
-diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h
-index 75884d2cdec3..4e6a08d4c7e5 100644
---- a/arch/x86/include/asm/vermagic.h
-+++ b/arch/x86/include/asm/vermagic.h
-@@ -17,6 +17,48 @@
- #define MODULE_PROC_FAMILY "586MMX "
- #elif defined CONFIG_MCORE2
- #define MODULE_PROC_FAMILY "CORE2 "
-+#elif defined CONFIG_MNATIVE_INTEL
-+#define MODULE_PROC_FAMILY "NATIVE_INTEL "
-+#elif defined CONFIG_MNATIVE_AMD
-+#define MODULE_PROC_FAMILY "NATIVE_AMD "
-+#elif defined CONFIG_MNEHALEM
-+#define MODULE_PROC_FAMILY "NEHALEM "
-+#elif defined CONFIG_MWESTMERE
-+#define MODULE_PROC_FAMILY "WESTMERE "
-+#elif defined CONFIG_MSILVERMONT
-+#define MODULE_PROC_FAMILY "SILVERMONT "
-+#elif defined CONFIG_MGOLDMONT
-+#define MODULE_PROC_FAMILY "GOLDMONT "
-+#elif defined CONFIG_MGOLDMONTPLUS
-+#define MODULE_PROC_FAMILY "GOLDMONTPLUS "
-+#elif defined CONFIG_MSANDYBRIDGE
-+#define MODULE_PROC_FAMILY "SANDYBRIDGE "
-+#elif defined CONFIG_MIVYBRIDGE
-+#define MODULE_PROC_FAMILY "IVYBRIDGE "
-+#elif defined CONFIG_MHASWELL
-+#define MODULE_PROC_FAMILY "HASWELL "
-+#elif defined CONFIG_MBROADWELL
-+#define MODULE_PROC_FAMILY "BROADWELL "
-+#elif defined CONFIG_MSKYLAKE
-+#define MODULE_PROC_FAMILY "SKYLAKE "
-+#elif defined CONFIG_MSKYLAKEX
-+#define MODULE_PROC_FAMILY "SKYLAKEX "
-+#elif defined CONFIG_MCANNONLAKE
-+#define MODULE_PROC_FAMILY "CANNONLAKE "
-+#elif defined CONFIG_MICELAKE
-+#define MODULE_PROC_FAMILY "ICELAKE "
-+#elif defined CONFIG_MCASCADELAKE
-+#define MODULE_PROC_FAMILY "CASCADELAKE "
-+#elif defined CONFIG_MCOOPERLAKE
-+#define MODULE_PROC_FAMILY "COOPERLAKE "
-+#elif defined CONFIG_MTIGERLAKE
-+#define MODULE_PROC_FAMILY "TIGERLAKE "
-+#elif defined CONFIG_MSAPPHIRERAPIDS
-+#define MODULE_PROC_FAMILY "SAPPHIRERAPIDS "
-+#elif defined CONFIG_ROCKETLAKE
-+#define MODULE_PROC_FAMILY "ROCKETLAKE "
-+#elif defined CONFIG_MALDERLAKE
-+#define MODULE_PROC_FAMILY "ALDERLAKE "
- #elif defined CONFIG_MATOM
- #define MODULE_PROC_FAMILY "ATOM "
- #elif defined CONFIG_M686
-@@ -35,6 +77,30 @@
- #define MODULE_PROC_FAMILY "K7 "
- #elif defined CONFIG_MK8
- #define MODULE_PROC_FAMILY "K8 "
-+#elif defined CONFIG_MK8SSE3
-+#define MODULE_PROC_FAMILY "K8SSE3 "
-+#elif defined CONFIG_MK10
-+#define MODULE_PROC_FAMILY "K10 "
-+#elif defined CONFIG_MBARCELONA
-+#define MODULE_PROC_FAMILY "BARCELONA "
-+#elif defined CONFIG_MBOBCAT
-+#define MODULE_PROC_FAMILY "BOBCAT "
-+#elif defined CONFIG_MBULLDOZER
-+#define MODULE_PROC_FAMILY "BULLDOZER "
-+#elif defined CONFIG_MPILEDRIVER
-+#define MODULE_PROC_FAMILY "PILEDRIVER "
-+#elif defined CONFIG_MSTEAMROLLER
-+#define MODULE_PROC_FAMILY "STEAMROLLER "
-+#elif defined CONFIG_MJAGUAR
-+#define MODULE_PROC_FAMILY "JAGUAR "
-+#elif defined CONFIG_MEXCAVATOR
-+#define MODULE_PROC_FAMILY "EXCAVATOR "
-+#elif defined CONFIG_MZEN
-+#define MODULE_PROC_FAMILY "ZEN "
-+#elif defined CONFIG_MZEN2
-+#define MODULE_PROC_FAMILY "ZEN2 "
-+#elif defined CONFIG_MZEN3
-+#define MODULE_PROC_FAMILY "ZEN3 "
- #elif defined CONFIG_MELAN
- #define MODULE_PROC_FAMILY "ELAN "
- #elif defined CONFIG_MCRUSOE
--- 
-2.35.1
-
diff --git a/sys-kernel/pinephone-pro-sources/files/5020_BMQ-and-PDS-io-scheduler-v5.19-r0.patch b/sys-kernel/pinephone-pro-sources/files/5020_BMQ-and-PDS-io-scheduler-v5.19-r0.patch
deleted file mode 100644
index 610cfe8..0000000
--- a/sys-kernel/pinephone-pro-sources/files/5020_BMQ-and-PDS-io-scheduler-v5.19-r0.patch
+++ /dev/null
@@ -1,9956 +0,0 @@
-diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
-index cc3ea8febc62..ab4c5a35b999 100644
---- a/Documentation/admin-guide/kernel-parameters.txt
-+++ b/Documentation/admin-guide/kernel-parameters.txt
-@@ -5299,6 +5299,12 @@
- 	sa1100ir	[NET]
- 			See drivers/net/irda/sa1100_ir.c.
- 
-+	sched_timeslice=
-+			[KNL] Time slice in ms for Project C BMQ/PDS scheduler.
-+			Format: integer 2, 4
-+			Default: 4
-+			See Documentation/scheduler/sched-BMQ.txt
-+
- 	sched_verbose	[KNL] Enables verbose scheduler debug messages.
- 
- 	schedstats=	[KNL,X86] Enable or disable scheduled statistics.
-diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
-index ddccd1077462..e24781970a3d 100644
---- a/Documentation/admin-guide/sysctl/kernel.rst
-+++ b/Documentation/admin-guide/sysctl/kernel.rst
-@@ -1524,3 +1524,13 @@ is 10 seconds.
- 
- The softlockup threshold is (``2 * watchdog_thresh``). Setting this
- tunable to zero will disable lockup detection altogether.
-+
-+yield_type:
-+===========
-+
-+BMQ/PDS CPU scheduler only. This determines what type of yield calls
-+to sched_yield will perform.
-+
-+  0 - No yield.
-+  1 - Deboost and requeue task. (default)
-+  2 - Set run queue skip task.
-diff --git a/Documentation/scheduler/sched-BMQ.txt b/Documentation/scheduler/sched-BMQ.txt
-new file mode 100644
-index 000000000000..05c84eec0f31
---- /dev/null
-+++ b/Documentation/scheduler/sched-BMQ.txt
-@@ -0,0 +1,110 @@
-+                         BitMap queue CPU Scheduler
-+                         --------------------------
-+
-+CONTENT
-+========
-+
-+ Background
-+ Design
-+   Overview
-+   Task policy
-+   Priority management
-+   BitMap Queue
-+   CPU Assignment and Migration
-+
-+
-+Background
-+==========
-+
-+BitMap Queue CPU scheduler, referred to as BMQ from here on, is an evolution
-+of previous Priority and Deadline based Skiplist multiple queue scheduler(PDS),
-+and inspired by Zircon scheduler. The goal of it is to keep the scheduler code
-+simple, while efficiency and scalable for interactive tasks, such as desktop,
-+movie playback and gaming etc.
-+
-+Design
-+======
-+
-+Overview
-+--------
-+
-+BMQ use per CPU run queue design, each CPU(logical) has it's own run queue,
-+each CPU is responsible for scheduling the tasks that are putting into it's
-+run queue.
-+
-+The run queue is a set of priority queues. Note that these queues are fifo
-+queue for non-rt tasks or priority queue for rt tasks in data structure. See
-+BitMap Queue below for details. BMQ is optimized for non-rt tasks in the fact
-+that most applications are non-rt tasks. No matter the queue is fifo or
-+priority, In each queue is an ordered list of runnable tasks awaiting execution
-+and the data structures are the same. When it is time for a new task to run,
-+the scheduler simply looks the lowest numbered queueue that contains a task,
-+and runs the first task from the head of that queue. And per CPU idle task is
-+also in the run queue, so the scheduler can always find a task to run on from
-+its run queue.
-+
-+Each task will assigned the same timeslice(default 4ms) when it is picked to
-+start running. Task will be reinserted at the end of the appropriate priority
-+queue when it uses its whole timeslice. When the scheduler selects a new task
-+from the priority queue it sets the CPU's preemption timer for the remainder of
-+the previous timeslice. When that timer fires the scheduler will stop execution
-+on that task, select another task and start over again.
-+
-+If a task blocks waiting for a shared resource then it's taken out of its
-+priority queue and is placed in a wait queue for the shared resource. When it
-+is unblocked it will be reinserted in the appropriate priority queue of an
-+eligible CPU.
-+
-+Task policy
-+-----------
-+
-+BMQ supports DEADLINE, FIFO, RR, NORMAL, BATCH and IDLE task policy like the
-+mainline CFS scheduler. But BMQ is heavy optimized for non-rt task, that's
-+NORMAL/BATCH/IDLE policy tasks. Below is the implementation detail of each
-+policy.
-+
-+DEADLINE
-+	It is squashed as priority 0 FIFO task.
-+
-+FIFO/RR
-+	All RT tasks share one single priority queue in BMQ run queue designed. The
-+complexity of insert operation is O(n). BMQ is not designed for system runs
-+with major rt policy tasks.
-+
-+NORMAL/BATCH/IDLE
-+	BATCH and IDLE tasks are treated as the same policy. They compete CPU with
-+NORMAL policy tasks, but they just don't boost. To control the priority of
-+NORMAL/BATCH/IDLE tasks, simply use nice level.
-+
-+ISO
-+	ISO policy is not supported in BMQ. Please use nice level -20 NORMAL policy
-+task instead.
-+
-+Priority management
-+-------------------
-+
-+RT tasks have priority from 0-99. For non-rt tasks, there are three different
-+factors used to determine the effective priority of a task. The effective
-+priority being what is used to determine which queue it will be in.
-+
-+The first factor is simply the task’s static priority. Which is assigned from
-+task's nice level, within [-20, 19] in userland's point of view and [0, 39]
-+internally.
-+
-+The second factor is the priority boost. This is a value bounded between
-+[-MAX_PRIORITY_ADJ, MAX_PRIORITY_ADJ] used to offset the base priority, it is
-+modified by the following cases:
-+
-+*When a thread has used up its entire timeslice, always deboost its boost by
-+increasing by one.
-+*When a thread gives up cpu control(voluntary or non-voluntary) to reschedule,
-+and its switch-in time(time after last switch and run) below the thredhold
-+based on its priority boost, will boost its boost by decreasing by one buti is
-+capped at 0 (won’t go negative).
-+
-+The intent in this system is to ensure that interactive threads are serviced
-+quickly. These are usually the threads that interact directly with the user
-+and cause user-perceivable latency. These threads usually do little work and
-+spend most of their time blocked awaiting another user event. So they get the
-+priority boost from unblocking while background threads that do most of the
-+processing receive the priority penalty for using their entire timeslice.
-diff --git a/fs/proc/base.c b/fs/proc/base.c
-index 8dfa36a99c74..46397c606e01 100644
---- a/fs/proc/base.c
-+++ b/fs/proc/base.c
-@@ -479,7 +479,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
- 		seq_puts(m, "0 0 0\n");
- 	else
- 		seq_printf(m, "%llu %llu %lu\n",
--		   (unsigned long long)task->se.sum_exec_runtime,
-+		   (unsigned long long)tsk_seruntime(task),
- 		   (unsigned long long)task->sched_info.run_delay,
- 		   task->sched_info.pcount);
- 
-diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h
-index 8874f681b056..59eb72bf7d5f 100644
---- a/include/asm-generic/resource.h
-+++ b/include/asm-generic/resource.h
-@@ -23,7 +23,7 @@
- 	[RLIMIT_LOCKS]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
- 	[RLIMIT_SIGPENDING]	= { 		0,	       0 },	\
- 	[RLIMIT_MSGQUEUE]	= {   MQ_BYTES_MAX,   MQ_BYTES_MAX },	\
--	[RLIMIT_NICE]		= { 0, 0 },				\
-+	[RLIMIT_NICE]		= { 30, 30 },				\
- 	[RLIMIT_RTPRIO]		= { 0, 0 },				\
- 	[RLIMIT_RTTIME]		= {  RLIM_INFINITY,  RLIM_INFINITY },	\
- }
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index c46f3a63b758..7c65e6317d97 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -751,8 +751,14 @@ struct task_struct {
- 	unsigned int			ptrace;
- 
- #ifdef CONFIG_SMP
--	int				on_cpu;
- 	struct __call_single_node	wake_entry;
-+#endif
-+#if defined(CONFIG_SMP) || defined(CONFIG_SCHED_ALT)
-+	int				on_cpu;
-+#endif
-+
-+#ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_ALT
- 	unsigned int			wakee_flips;
- 	unsigned long			wakee_flip_decay_ts;
- 	struct task_struct		*last_wakee;
-@@ -766,6 +772,7 @@ struct task_struct {
- 	 */
- 	int				recent_used_cpu;
- 	int				wake_cpu;
-+#endif /* !CONFIG_SCHED_ALT */
- #endif
- 	int				on_rq;
- 
-@@ -774,6 +781,20 @@ struct task_struct {
- 	int				normal_prio;
- 	unsigned int			rt_priority;
- 
-+#ifdef CONFIG_SCHED_ALT
-+	u64				last_ran;
-+	s64				time_slice;
-+	int				sq_idx;
-+	struct list_head		sq_node;
-+#ifdef CONFIG_SCHED_BMQ
-+	int				boost_prio;
-+#endif /* CONFIG_SCHED_BMQ */
-+#ifdef CONFIG_SCHED_PDS
-+	u64				deadline;
-+#endif /* CONFIG_SCHED_PDS */
-+	/* sched_clock time spent running */
-+	u64				sched_time;
-+#else /* !CONFIG_SCHED_ALT */
- 	struct sched_entity		se;
- 	struct sched_rt_entity		rt;
- 	struct sched_dl_entity		dl;
-@@ -784,6 +805,7 @@ struct task_struct {
- 	unsigned long			core_cookie;
- 	unsigned int			core_occupation;
- #endif
-+#endif /* !CONFIG_SCHED_ALT */
- 
- #ifdef CONFIG_CGROUP_SCHED
- 	struct task_group		*sched_task_group;
-@@ -1517,6 +1539,15 @@ struct task_struct {
- 	 */
- };
- 
-+#ifdef CONFIG_SCHED_ALT
-+#define tsk_seruntime(t)		((t)->sched_time)
-+/* replace the uncertian rt_timeout with 0UL */
-+#define tsk_rttimeout(t)		(0UL)
-+#else /* CFS */
-+#define tsk_seruntime(t)	((t)->se.sum_exec_runtime)
-+#define tsk_rttimeout(t)	((t)->rt.timeout)
-+#endif /* !CONFIG_SCHED_ALT */
-+
- static inline struct pid *task_pid(struct task_struct *task)
- {
- 	return task->thread_pid;
-diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
-index 7c83d4d5a971..fa30f98cb2be 100644
---- a/include/linux/sched/deadline.h
-+++ b/include/linux/sched/deadline.h
-@@ -1,5 +1,24 @@
- /* SPDX-License-Identifier: GPL-2.0 */
- 
-+#ifdef CONFIG_SCHED_ALT
-+
-+static inline int dl_task(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+#ifdef CONFIG_SCHED_BMQ
-+#define __tsk_deadline(p)	(0UL)
-+#endif
-+
-+#ifdef CONFIG_SCHED_PDS
-+#define __tsk_deadline(p)	((((u64) ((p)->prio))<<56) | (p)->deadline)
-+#endif
-+
-+#else
-+
-+#define __tsk_deadline(p)	((p)->dl.deadline)
-+
- /*
-  * SCHED_DEADLINE tasks has negative priorities, reflecting
-  * the fact that any of them has higher prio than RT and
-@@ -21,6 +40,7 @@ static inline int dl_task(struct task_struct *p)
- {
- 	return dl_prio(p->prio);
- }
-+#endif /* CONFIG_SCHED_ALT */
- 
- static inline bool dl_time_before(u64 a, u64 b)
- {
-diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
-index ab83d85e1183..6af9ae681116 100644
---- a/include/linux/sched/prio.h
-+++ b/include/linux/sched/prio.h
-@@ -18,6 +18,32 @@
- #define MAX_PRIO		(MAX_RT_PRIO + NICE_WIDTH)
- #define DEFAULT_PRIO		(MAX_RT_PRIO + NICE_WIDTH / 2)
- 
-+#ifdef CONFIG_SCHED_ALT
-+
-+/* Undefine MAX_PRIO and DEFAULT_PRIO */
-+#undef MAX_PRIO
-+#undef DEFAULT_PRIO
-+
-+/* +/- priority levels from the base priority */
-+#ifdef CONFIG_SCHED_BMQ
-+#define MAX_PRIORITY_ADJ	(7)
-+
-+#define MIN_NORMAL_PRIO		(MAX_RT_PRIO)
-+#define MAX_PRIO		(MIN_NORMAL_PRIO + NICE_WIDTH)
-+#define DEFAULT_PRIO		(MIN_NORMAL_PRIO + NICE_WIDTH / 2)
-+#endif
-+
-+#ifdef CONFIG_SCHED_PDS
-+#define MAX_PRIORITY_ADJ	(0)
-+
-+#define MIN_NORMAL_PRIO		(128)
-+#define NORMAL_PRIO_NUM		(64)
-+#define MAX_PRIO		(MIN_NORMAL_PRIO + NORMAL_PRIO_NUM)
-+#define DEFAULT_PRIO		(MAX_PRIO - NICE_WIDTH / 2)
-+#endif
-+
-+#endif /* CONFIG_SCHED_ALT */
-+
- /*
-  * Convert user-nice values [ -20 ... 0 ... 19 ]
-  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
-diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
-index e5af028c08b4..0a7565d0d3cf 100644
---- a/include/linux/sched/rt.h
-+++ b/include/linux/sched/rt.h
-@@ -24,8 +24,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
- 
- 	if (policy == SCHED_FIFO || policy == SCHED_RR)
- 		return true;
-+#ifndef CONFIG_SCHED_ALT
- 	if (policy == SCHED_DEADLINE)
- 		return true;
-+#endif
- 	return false;
- }
- 
-diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
-index 56cffe42abbc..e020fc572b22 100644
---- a/include/linux/sched/topology.h
-+++ b/include/linux/sched/topology.h
-@@ -233,7 +233,8 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
- 
- #endif	/* !CONFIG_SMP */
- 
--#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
-+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) && \
-+	!defined(CONFIG_SCHED_ALT)
- extern void rebuild_sched_domains_energy(void);
- #else
- static inline void rebuild_sched_domains_energy(void)
-diff --git a/init/Kconfig b/init/Kconfig
-index c7900e8975f1..d2b593e3807d 100644
---- a/init/Kconfig
-+++ b/init/Kconfig
-@@ -812,6 +812,7 @@ menu "Scheduler features"
- config UCLAMP_TASK
- 	bool "Enable utilization clamping for RT/FAIR tasks"
- 	depends on CPU_FREQ_GOV_SCHEDUTIL
-+	depends on !SCHED_ALT
- 	help
- 	  This feature enables the scheduler to track the clamped utilization
- 	  of each CPU based on RUNNABLE tasks scheduled on that CPU.
-@@ -858,6 +859,35 @@ config UCLAMP_BUCKETS_COUNT
- 
- 	  If in doubt, use the default value.
- 
-+menuconfig SCHED_ALT
-+	bool "Alternative CPU Schedulers"
-+	default y
-+	help
-+	  This feature enable alternative CPU scheduler"
-+
-+if SCHED_ALT
-+
-+choice
-+	prompt "Alternative CPU Scheduler"
-+	default SCHED_BMQ
-+
-+config SCHED_BMQ
-+	bool "BMQ CPU scheduler"
-+	help
-+	  The BitMap Queue CPU scheduler for excellent interactivity and
-+	  responsiveness on the desktop and solid scalability on normal
-+	  hardware and commodity servers.
-+
-+config SCHED_PDS
-+	bool "PDS CPU scheduler"
-+	help
-+	  The Priority and Deadline based Skip list multiple queue CPU
-+	  Scheduler.
-+
-+endchoice
-+
-+endif
-+
- endmenu
- 
- #
-@@ -911,6 +941,7 @@ config NUMA_BALANCING
- 	depends on ARCH_SUPPORTS_NUMA_BALANCING
- 	depends on !ARCH_WANT_NUMA_VARIABLE_LOCALITY
- 	depends on SMP && NUMA && MIGRATION && !PREEMPT_RT
-+	depends on !SCHED_ALT
- 	help
- 	  This option adds support for automatic NUMA aware memory/task placement.
- 	  The mechanism is quite primitive and is based on migrating memory when
-@@ -1003,6 +1034,7 @@ config FAIR_GROUP_SCHED
- 	depends on CGROUP_SCHED
- 	default CGROUP_SCHED
- 
-+if !SCHED_ALT
- config CFS_BANDWIDTH
- 	bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
- 	depends on FAIR_GROUP_SCHED
-@@ -1025,6 +1057,7 @@ config RT_GROUP_SCHED
- 	  realtime bandwidth for them.
- 	  See Documentation/scheduler/sched-rt-group.rst for more information.
- 
-+endif #!SCHED_ALT
- endif #CGROUP_SCHED
- 
- config UCLAMP_TASK_GROUP
-@@ -1268,6 +1301,7 @@ config CHECKPOINT_RESTORE
- 
- config SCHED_AUTOGROUP
- 	bool "Automatic process group scheduling"
-+	depends on !SCHED_ALT
- 	select CGROUPS
- 	select CGROUP_SCHED
- 	select FAIR_GROUP_SCHED
-diff --git a/init/init_task.c b/init/init_task.c
-index 73cc8f03511a..2d0bad762895 100644
---- a/init/init_task.c
-+++ b/init/init_task.c
-@@ -75,9 +75,15 @@ struct task_struct init_task
- 	.stack		= init_stack,
- 	.usage		= REFCOUNT_INIT(2),
- 	.flags		= PF_KTHREAD,
-+#ifdef CONFIG_SCHED_ALT
-+	.prio		= DEFAULT_PRIO + MAX_PRIORITY_ADJ,
-+	.static_prio	= DEFAULT_PRIO,
-+	.normal_prio	= DEFAULT_PRIO + MAX_PRIORITY_ADJ,
-+#else
- 	.prio		= MAX_PRIO - 20,
- 	.static_prio	= MAX_PRIO - 20,
- 	.normal_prio	= MAX_PRIO - 20,
-+#endif
- 	.policy		= SCHED_NORMAL,
- 	.cpus_ptr	= &init_task.cpus_mask,
- 	.user_cpus_ptr	= NULL,
-@@ -88,6 +94,17 @@ struct task_struct init_task
- 	.restart_block	= {
- 		.fn = do_no_restart_syscall,
- 	},
-+#ifdef CONFIG_SCHED_ALT
-+	.sq_node	= LIST_HEAD_INIT(init_task.sq_node),
-+#ifdef CONFIG_SCHED_BMQ
-+	.boost_prio	= 0,
-+	.sq_idx		= 15,
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+	.deadline	= 0,
-+#endif
-+	.time_slice	= HZ,
-+#else
- 	.se		= {
- 		.group_node 	= LIST_HEAD_INIT(init_task.se.group_node),
- 	},
-@@ -95,6 +112,7 @@ struct task_struct init_task
- 		.run_list	= LIST_HEAD_INIT(init_task.rt.run_list),
- 		.time_slice	= RR_TIMESLICE,
- 	},
-+#endif
- 	.tasks		= LIST_HEAD_INIT(init_task.tasks),
- #ifdef CONFIG_SMP
- 	.pushable_tasks	= PLIST_NODE_INIT(init_task.pushable_tasks, MAX_PRIO),
-diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
-index c2f1fd95a821..41654679b1b2 100644
---- a/kernel/Kconfig.preempt
-+++ b/kernel/Kconfig.preempt
-@@ -117,7 +117,7 @@ config PREEMPT_DYNAMIC
- 
- config SCHED_CORE
- 	bool "Core Scheduling for SMT"
--	depends on SCHED_SMT
-+	depends on SCHED_SMT && !SCHED_ALT
- 	help
- 	  This option permits Core Scheduling, a means of coordinated task
- 	  selection across SMT siblings. When enabled -- see
-diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
-index 71a418858a5e..7e3016873db1 100644
---- a/kernel/cgroup/cpuset.c
-+++ b/kernel/cgroup/cpuset.c
-@@ -704,7 +704,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
- 	return ret;
- }
- 
--#ifdef CONFIG_SMP
-+#if defined(CONFIG_SMP) && !defined(CONFIG_SCHED_ALT)
- /*
-  * Helper routine for generate_sched_domains().
-  * Do cpusets a, b have overlapping effective cpus_allowed masks?
-@@ -1100,7 +1100,7 @@ static void rebuild_sched_domains_locked(void)
- 	/* Have scheduler rebuild the domains */
- 	partition_and_rebuild_sched_domains(ndoms, doms, attr);
- }
--#else /* !CONFIG_SMP */
-+#else /* !CONFIG_SMP || CONFIG_SCHED_ALT */
- static void rebuild_sched_domains_locked(void)
- {
- }
-diff --git a/kernel/delayacct.c b/kernel/delayacct.c
-index 164ed9ef77a3..c974a84b056f 100644
---- a/kernel/delayacct.c
-+++ b/kernel/delayacct.c
-@@ -150,7 +150,7 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
- 	 */
- 	t1 = tsk->sched_info.pcount;
- 	t2 = tsk->sched_info.run_delay;
--	t3 = tsk->se.sum_exec_runtime;
-+	t3 = tsk_seruntime(tsk);
- 
- 	d->cpu_count += t1;
- 
-diff --git a/kernel/exit.c b/kernel/exit.c
-index 64c938ce36fe..a353f7ef5392 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -124,7 +124,7 @@ static void __exit_signal(struct task_struct *tsk)
- 			sig->curr_target = next_thread(tsk);
- 	}
- 
--	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-+	add_device_randomness((const void*) &tsk_seruntime(tsk),
- 			      sizeof(unsigned long long));
- 
- 	/*
-@@ -145,7 +145,7 @@ static void __exit_signal(struct task_struct *tsk)
- 	sig->inblock += task_io_get_inblock(tsk);
- 	sig->oublock += task_io_get_oublock(tsk);
- 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
--	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
-+	sig->sum_sched_runtime += tsk_seruntime(tsk);
- 	sig->nr_threads--;
- 	__unhash_process(tsk, group_dead);
- 	write_sequnlock(&sig->stats_lock);
-diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
-index 7779ee8abc2a..5b9893cdfb1b 100644
---- a/kernel/locking/rtmutex.c
-+++ b/kernel/locking/rtmutex.c
-@@ -300,21 +300,25 @@ static __always_inline void
- waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
- {
- 	waiter->prio = __waiter_prio(task);
--	waiter->deadline = task->dl.deadline;
-+	waiter->deadline = __tsk_deadline(task);
- }
- 
- /*
-  * Only use with rt_mutex_waiter_{less,equal}()
-  */
- #define task_to_waiter(p)	\
--	&(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
-+	&(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = __tsk_deadline(p) }
- 
- static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
- 						struct rt_mutex_waiter *right)
- {
-+#ifdef CONFIG_SCHED_PDS
-+	return (left->deadline < right->deadline);
-+#else
- 	if (left->prio < right->prio)
- 		return 1;
- 
-+#ifndef CONFIG_SCHED_BMQ
- 	/*
- 	 * If both waiters have dl_prio(), we check the deadlines of the
- 	 * associated tasks.
-@@ -323,16 +327,22 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
- 	 */
- 	if (dl_prio(left->prio))
- 		return dl_time_before(left->deadline, right->deadline);
-+#endif
- 
- 	return 0;
-+#endif
- }
- 
- static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
- 						 struct rt_mutex_waiter *right)
- {
-+#ifdef CONFIG_SCHED_PDS
-+	return (left->deadline == right->deadline);
-+#else
- 	if (left->prio != right->prio)
- 		return 0;
- 
-+#ifndef CONFIG_SCHED_BMQ
- 	/*
- 	 * If both waiters have dl_prio(), we check the deadlines of the
- 	 * associated tasks.
-@@ -341,8 +351,10 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
- 	 */
- 	if (dl_prio(left->prio))
- 		return left->deadline == right->deadline;
-+#endif
- 
- 	return 1;
-+#endif
- }
- 
- static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
-diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
-index 976092b7bd45..31d587c16ec1 100644
---- a/kernel/sched/Makefile
-+++ b/kernel/sched/Makefile
-@@ -28,7 +28,12 @@ endif
- # These compilation units have roughly the same size and complexity - so their
- # build parallelizes well and finishes roughly at once:
- #
-+ifdef CONFIG_SCHED_ALT
-+obj-y += alt_core.o
-+obj-$(CONFIG_SCHED_DEBUG) += alt_debug.o
-+else
- obj-y += core.o
- obj-y += fair.o
-+endif
- obj-y += build_policy.o
- obj-y += build_utility.o
-diff --git a/kernel/sched/alt_core.c b/kernel/sched/alt_core.c
-new file mode 100644
-index 000000000000..d0ab41c4d9ad
---- /dev/null
-+++ b/kernel/sched/alt_core.c
-@@ -0,0 +1,7807 @@
-+/*
-+ *  kernel/sched/alt_core.c
-+ *
-+ *  Core alternative kernel scheduler code and related syscalls
-+ *
-+ *  Copyright (C) 1991-2002  Linus Torvalds
-+ *
-+ *  2009-08-13	Brainfuck deadline scheduling policy by Con Kolivas deletes
-+ *		a whole lot of those previous things.
-+ *  2017-09-06	Priority and Deadline based Skip list multiple queue kernel
-+ *		scheduler by Alfred Chen.
-+ *  2019-02-20	BMQ(BitMap Queue) kernel scheduler by Alfred Chen.
-+ */
-+#include <linux/sched/cputime.h>
-+#include <linux/sched/debug.h>
-+#include <linux/sched/isolation.h>
-+#include <linux/sched/loadavg.h>
-+#include <linux/sched/mm.h>
-+#include <linux/sched/nohz.h>
-+#include <linux/sched/stat.h>
-+#include <linux/sched/wake_q.h>
-+
-+#include <linux/blkdev.h>
-+#include <linux/context_tracking.h>
-+#include <linux/cpuset.h>
-+#include <linux/delayacct.h>
-+#include <linux/init_task.h>
-+#include <linux/kcov.h>
-+#include <linux/kprobes.h>
-+#include <linux/profile.h>
-+#include <linux/nmi.h>
-+#include <linux/scs.h>
-+
-+#include <uapi/linux/sched/types.h>
-+
-+#include <asm/switch_to.h>
-+
-+#define CREATE_TRACE_POINTS
-+#include <trace/events/sched.h>
-+#undef CREATE_TRACE_POINTS
-+
-+#include "sched.h"
-+
-+#include "pelt.h"
-+
-+#include "../../fs/io-wq.h"
-+#include "../smpboot.h"
-+
-+/*
-+ * Export tracepoints that act as a bare tracehook (ie: have no trace event
-+ * associated with them) to allow external modules to probe them.
-+ */
-+EXPORT_TRACEPOINT_SYMBOL_GPL(pelt_irq_tp);
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+#define sched_feat(x)	(1)
-+/*
-+ * Print a warning if need_resched is set for the given duration (if
-+ * LATENCY_WARN is enabled).
-+ *
-+ * If sysctl_resched_latency_warn_once is set, only one warning will be shown
-+ * per boot.
-+ */
-+__read_mostly int sysctl_resched_latency_warn_ms = 100;
-+__read_mostly int sysctl_resched_latency_warn_once = 1;
-+#else
-+#define sched_feat(x)	(0)
-+#endif /* CONFIG_SCHED_DEBUG */
-+
-+#define ALT_SCHED_VERSION "v5.19-r0"
-+
-+/* rt_prio(prio) defined in include/linux/sched/rt.h */
-+#define rt_task(p)		rt_prio((p)->prio)
-+#define rt_policy(policy)	((policy) == SCHED_FIFO || (policy) == SCHED_RR)
-+#define task_has_rt_policy(p)	(rt_policy((p)->policy))
-+
-+#define STOP_PRIO		(MAX_RT_PRIO - 1)
-+
-+/* Default time slice is 4 in ms, can be set via kernel parameter "sched_timeslice" */
-+u64 sched_timeslice_ns __read_mostly = (4 << 20);
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq, int idx);
-+
-+#ifdef CONFIG_SCHED_BMQ
-+#include "bmq.h"
-+#endif
-+#ifdef CONFIG_SCHED_PDS
-+#include "pds.h"
-+#endif
-+
-+static int __init sched_timeslice(char *str)
-+{
-+	int timeslice_ms;
-+
-+	get_option(&str, &timeslice_ms);
-+	if (2 != timeslice_ms)
-+		timeslice_ms = 4;
-+	sched_timeslice_ns = timeslice_ms << 20;
-+	sched_timeslice_imp(timeslice_ms);
-+
-+	return 0;
-+}
-+early_param("sched_timeslice", sched_timeslice);
-+
-+/* Reschedule if less than this many μs left */
-+#define RESCHED_NS		(100 << 10)
-+
-+/**
-+ * sched_yield_type - Choose what sort of yield sched_yield will perform.
-+ * 0: No yield.
-+ * 1: Deboost and requeue task. (default)
-+ * 2: Set rq skip task.
-+ */
-+int sched_yield_type __read_mostly = 1;
-+
-+#ifdef CONFIG_SMP
-+static cpumask_t sched_rq_pending_mask ____cacheline_aligned_in_smp;
-+
-+DEFINE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_llc_mask);
-+DEFINE_PER_CPU(cpumask_t *, sched_cpu_topo_end_mask);
-+
-+#ifdef CONFIG_SCHED_SMT
-+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
-+EXPORT_SYMBOL_GPL(sched_smt_present);
-+#endif
-+
-+/*
-+ * Keep a unique ID per domain (we use the first CPUs number in the cpumask of
-+ * the domain), this allows us to quickly tell if two cpus are in the same cache
-+ * domain, see cpus_share_cache().
-+ */
-+DEFINE_PER_CPU(int, sd_llc_id);
-+#endif /* CONFIG_SMP */
-+
-+static DEFINE_MUTEX(sched_hotcpu_mutex);
-+
-+DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+#ifdef CONFIG_SCHED_SMT
-+static cpumask_t sched_sg_idle_mask ____cacheline_aligned_in_smp;
-+#endif
-+static cpumask_t sched_rq_watermark[SCHED_QUEUE_BITS] ____cacheline_aligned_in_smp;
-+
-+/* sched_queue related functions */
-+static inline void sched_queue_init(struct sched_queue *q)
-+{
-+	int i;
-+
-+	bitmap_zero(q->bitmap, SCHED_QUEUE_BITS);
-+	for(i = 0; i < SCHED_BITS; i++)
-+		INIT_LIST_HEAD(&q->heads[i]);
-+}
-+
-+/*
-+ * Init idle task and put into queue structure of rq
-+ * IMPORTANT: may be called multiple times for a single cpu
-+ */
-+static inline void sched_queue_init_idle(struct sched_queue *q,
-+					 struct task_struct *idle)
-+{
-+	idle->sq_idx = IDLE_TASK_SCHED_PRIO;
-+	INIT_LIST_HEAD(&q->heads[idle->sq_idx]);
-+	list_add(&idle->sq_node, &q->heads[idle->sq_idx]);
-+}
-+
-+/* water mark related functions */
-+static inline void update_sched_rq_watermark(struct rq *rq)
-+{
-+	unsigned long watermark = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS);
-+	unsigned long last_wm = rq->watermark;
-+	unsigned long i;
-+	int cpu;
-+
-+	if (watermark == last_wm)
-+		return;
-+
-+	rq->watermark = watermark;
-+	cpu = cpu_of(rq);
-+	if (watermark < last_wm) {
-+		for (i = last_wm; i > watermark; i--)
-+			cpumask_clear_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i);
-+#ifdef CONFIG_SCHED_SMT
-+		if (static_branch_likely(&sched_smt_present) &&
-+		    IDLE_TASK_SCHED_PRIO == last_wm)
-+			cpumask_andnot(&sched_sg_idle_mask,
-+				       &sched_sg_idle_mask, cpu_smt_mask(cpu));
-+#endif
-+		return;
-+	}
-+	/* last_wm < watermark */
-+	for (i = watermark; i > last_wm; i--)
-+		cpumask_set_cpu(cpu, sched_rq_watermark + SCHED_QUEUE_BITS - i);
-+#ifdef CONFIG_SCHED_SMT
-+	if (static_branch_likely(&sched_smt_present) &&
-+	    IDLE_TASK_SCHED_PRIO == watermark) {
-+		cpumask_t tmp;
-+
-+		cpumask_and(&tmp, cpu_smt_mask(cpu), sched_rq_watermark);
-+		if (cpumask_equal(&tmp, cpu_smt_mask(cpu)))
-+			cpumask_or(&sched_sg_idle_mask,
-+				   &sched_sg_idle_mask, cpu_smt_mask(cpu));
-+	}
-+#endif
-+}
-+
-+/*
-+ * This routine assume that the idle task always in queue
-+ */
-+static inline struct task_struct *sched_rq_first_task(struct rq *rq)
-+{
-+	unsigned long idx = find_first_bit(rq->queue.bitmap, SCHED_QUEUE_BITS);
-+	const struct list_head *head = &rq->queue.heads[sched_prio2idx(idx, rq)];
-+
-+	return list_first_entry(head, struct task_struct, sq_node);
-+}
-+
-+static inline struct task_struct *
-+sched_rq_next_task(struct task_struct *p, struct rq *rq)
-+{
-+	unsigned long idx = p->sq_idx;
-+	struct list_head *head = &rq->queue.heads[idx];
-+
-+	if (list_is_last(&p->sq_node, head)) {
-+		idx = find_next_bit(rq->queue.bitmap, SCHED_QUEUE_BITS,
-+				    sched_idx2prio(idx, rq) + 1);
-+		head = &rq->queue.heads[sched_prio2idx(idx, rq)];
-+
-+		return list_first_entry(head, struct task_struct, sq_node);
-+	}
-+
-+	return list_next_entry(p, sq_node);
-+}
-+
-+static inline struct task_struct *rq_runnable_task(struct rq *rq)
-+{
-+	struct task_struct *next = sched_rq_first_task(rq);
-+
-+	if (unlikely(next == rq->skip))
-+		next = sched_rq_next_task(next, rq);
-+
-+	return next;
-+}
-+
-+/*
-+ * Serialization rules:
-+ *
-+ * Lock order:
-+ *
-+ *   p->pi_lock
-+ *     rq->lock
-+ *       hrtimer_cpu_base->lock (hrtimer_start() for bandwidth controls)
-+ *
-+ *  rq1->lock
-+ *    rq2->lock  where: rq1 < rq2
-+ *
-+ * Regular state:
-+ *
-+ * Normal scheduling state is serialized by rq->lock. __schedule() takes the
-+ * local CPU's rq->lock, it optionally removes the task from the runqueue and
-+ * always looks at the local rq data structures to find the most eligible task
-+ * to run next.
-+ *
-+ * Task enqueue is also under rq->lock, possibly taken from another CPU.
-+ * Wakeups from another LLC domain might use an IPI to transfer the enqueue to
-+ * the local CPU to avoid bouncing the runqueue state around [ see
-+ * ttwu_queue_wakelist() ]
-+ *
-+ * Task wakeup, specifically wakeups that involve migration, are horribly
-+ * complicated to avoid having to take two rq->locks.
-+ *
-+ * Special state:
-+ *
-+ * System-calls and anything external will use task_rq_lock() which acquires
-+ * both p->pi_lock and rq->lock. As a consequence the state they change is
-+ * stable while holding either lock:
-+ *
-+ *  - sched_setaffinity()/
-+ *    set_cpus_allowed_ptr():	p->cpus_ptr, p->nr_cpus_allowed
-+ *  - set_user_nice():		p->se.load, p->*prio
-+ *  - __sched_setscheduler():	p->sched_class, p->policy, p->*prio,
-+ *				p->se.load, p->rt_priority,
-+ *				p->dl.dl_{runtime, deadline, period, flags, bw, density}
-+ *  - sched_setnuma():		p->numa_preferred_nid
-+ *  - sched_move_task()/
-+ *    cpu_cgroup_fork():	p->sched_task_group
-+ *  - uclamp_update_active()	p->uclamp*
-+ *
-+ * p->state <- TASK_*:
-+ *
-+ *   is changed locklessly using set_current_state(), __set_current_state() or
-+ *   set_special_state(), see their respective comments, or by
-+ *   try_to_wake_up(). This latter uses p->pi_lock to serialize against
-+ *   concurrent self.
-+ *
-+ * p->on_rq <- { 0, 1 = TASK_ON_RQ_QUEUED, 2 = TASK_ON_RQ_MIGRATING }:
-+ *
-+ *   is set by activate_task() and cleared by deactivate_task(), under
-+ *   rq->lock. Non-zero indicates the task is runnable, the special
-+ *   ON_RQ_MIGRATING state is used for migration without holding both
-+ *   rq->locks. It indicates task_cpu() is not stable, see task_rq_lock().
-+ *
-+ * p->on_cpu <- { 0, 1 }:
-+ *
-+ *   is set by prepare_task() and cleared by finish_task() such that it will be
-+ *   set before p is scheduled-in and cleared after p is scheduled-out, both
-+ *   under rq->lock. Non-zero indicates the task is running on its CPU.
-+ *
-+ *   [ The astute reader will observe that it is possible for two tasks on one
-+ *     CPU to have ->on_cpu = 1 at the same time. ]
-+ *
-+ * task_cpu(p): is changed by set_task_cpu(), the rules are:
-+ *
-+ *  - Don't call set_task_cpu() on a blocked task:
-+ *
-+ *    We don't care what CPU we're not running on, this simplifies hotplug,
-+ *    the CPU assignment of blocked tasks isn't required to be valid.
-+ *
-+ *  - for try_to_wake_up(), called under p->pi_lock:
-+ *
-+ *    This allows try_to_wake_up() to only take one rq->lock, see its comment.
-+ *
-+ *  - for migration called under rq->lock:
-+ *    [ see task_on_rq_migrating() in task_rq_lock() ]
-+ *
-+ *    o move_queued_task()
-+ *    o detach_task()
-+ *
-+ *  - for migration called under double_rq_lock():
-+ *
-+ *    o __migrate_swap_task()
-+ *    o push_rt_task() / pull_rt_task()
-+ *    o push_dl_task() / pull_dl_task()
-+ *    o dl_task_offline_migration()
-+ *
-+ */
-+
-+/*
-+ * Context: p->pi_lock
-+ */
-+static inline struct rq
-+*__task_access_lock(struct task_struct *p, raw_spinlock_t **plock)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock(&rq->lock);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock(&rq->lock);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			*plock = NULL;
-+			return rq;
-+		}
-+	}
-+}
-+
-+static inline void
-+__task_access_unlock(struct task_struct *p, raw_spinlock_t *lock)
-+{
-+	if (NULL != lock)
-+		raw_spin_unlock(lock);
-+}
-+
-+static inline struct rq
-+*task_access_lock_irqsave(struct task_struct *p, raw_spinlock_t **plock,
-+			  unsigned long *flags)
-+{
-+	struct rq *rq;
-+	for (;;) {
-+		rq = task_rq(p);
-+		if (p->on_cpu || task_on_rq_queued(p)) {
-+			raw_spin_lock_irqsave(&rq->lock, *flags);
-+			if (likely((p->on_cpu || task_on_rq_queued(p))
-+				   && rq == task_rq(p))) {
-+				*plock = &rq->lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&rq->lock, *flags);
-+		} else if (task_on_rq_migrating(p)) {
-+			do {
-+				cpu_relax();
-+			} while (unlikely(task_on_rq_migrating(p)));
-+		} else {
-+			raw_spin_lock_irqsave(&p->pi_lock, *flags);
-+			if (likely(!p->on_cpu && !p->on_rq &&
-+				   rq == task_rq(p))) {
-+				*plock = &p->pi_lock;
-+				return rq;
-+			}
-+			raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
-+		}
-+	}
-+}
-+
-+static inline void
-+task_access_unlock_irqrestore(struct task_struct *p, raw_spinlock_t *lock,
-+			      unsigned long *flags)
-+{
-+	raw_spin_unlock_irqrestore(lock, *flags);
-+}
-+
-+/*
-+ * __task_rq_lock - lock the rq @p resides on.
-+ */
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	lockdep_assert_held(&p->pi_lock);
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p)))
-+			return rq;
-+		raw_spin_unlock(&rq->lock);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+/*
-+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
-+ */
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	for (;;) {
-+		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
-+		rq = task_rq(p);
-+		raw_spin_lock(&rq->lock);
-+		/*
-+		 *	move_queued_task()		task_rq_lock()
-+		 *
-+		 *	ACQUIRE (rq->lock)
-+		 *	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
-+		 *	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
-+		 *	[S] ->cpu = new_cpu		[L] task_rq()
-+		 *					[L] ->on_rq
-+		 *	RELEASE (rq->lock)
-+		 *
-+		 * If we observe the old CPU in task_rq_lock(), the acquire of
-+		 * the old rq->lock will fully serialize against the stores.
-+		 *
-+		 * If we observe the new CPU in task_rq_lock(), the address
-+		 * dependency headed by '[L] rq = task_rq()' and the acquire
-+		 * will pair with the WMB to ensure we then also see migrating.
-+		 */
-+		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
-+			return rq;
-+		}
-+		raw_spin_unlock(&rq->lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+
-+		while (unlikely(task_on_rq_migrating(p)))
-+			cpu_relax();
-+	}
-+}
-+
-+static inline void
-+rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock_irqsave(&rq->lock, rf->flags);
-+}
-+
-+static inline void
-+rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
-+}
-+
-+void raw_spin_rq_lock_nested(struct rq *rq, int subclass)
-+{
-+	raw_spinlock_t *lock;
-+
-+	/* Matches synchronize_rcu() in __sched_core_enable() */
-+	preempt_disable();
-+
-+	for (;;) {
-+		lock = __rq_lockp(rq);
-+		raw_spin_lock_nested(lock, subclass);
-+		if (likely(lock == __rq_lockp(rq))) {
-+			/* preempt_count *MUST* be > 1 */
-+			preempt_enable_no_resched();
-+			return;
-+		}
-+		raw_spin_unlock(lock);
-+	}
-+}
-+
-+void raw_spin_rq_unlock(struct rq *rq)
-+{
-+	raw_spin_unlock(rq_lockp(rq));
-+}
-+
-+/*
-+ * RQ-clock updating methods:
-+ */
-+
-+static void update_rq_clock_task(struct rq *rq, s64 delta)
-+{
-+/*
-+ * In theory, the compile should just see 0 here, and optimize out the call
-+ * to sched_rt_avg_update. But I don't trust it...
-+ */
-+	s64 __maybe_unused steal = 0, irq_delta = 0;
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
-+
-+	/*
-+	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-+	 * this case when a previous update_rq_clock() happened inside a
-+	 * {soft,}irq region.
-+	 *
-+	 * When this happens, we stop ->clock_task and only update the
-+	 * prev_irq_time stamp to account for the part that fit, so that a next
-+	 * update will consume the rest. This ensures ->clock_task is
-+	 * monotonic.
-+	 *
-+	 * It does however cause some slight miss-attribution of {soft,}irq
-+	 * time, a more accurate solution would be to update the irq_time using
-+	 * the current rq->clock timestamp, except that would require using
-+	 * atomic ops.
-+	 */
-+	if (irq_delta > delta)
-+		irq_delta = delta;
-+
-+	rq->prev_irq_time += irq_delta;
-+	delta -= irq_delta;
-+#endif
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	if (static_key_false((&paravirt_steal_rq_enabled))) {
-+		steal = paravirt_steal_clock(cpu_of(rq));
-+		steal -= rq->prev_steal_time_rq;
-+
-+		if (unlikely(steal > delta))
-+			steal = delta;
-+
-+		rq->prev_steal_time_rq += steal;
-+		delta -= steal;
-+	}
-+#endif
-+
-+	rq->clock_task += delta;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	if ((irq_delta + steal))
-+		update_irq_load_avg(rq, irq_delta + steal);
-+#endif
-+}
-+
-+static inline void update_rq_clock(struct rq *rq)
-+{
-+	s64 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
-+
-+	if (unlikely(delta <= 0))
-+		return;
-+	rq->clock += delta;
-+	update_rq_time_edge(rq);
-+	update_rq_clock_task(rq, delta);
-+}
-+
-+/*
-+ * RQ Load update routine
-+ */
-+#define RQ_LOAD_HISTORY_BITS		(sizeof(s32) * 8ULL)
-+#define RQ_UTIL_SHIFT			(8)
-+#define RQ_LOAD_HISTORY_TO_UTIL(l)	(((l) >> (RQ_LOAD_HISTORY_BITS - 1 - RQ_UTIL_SHIFT)) & 0xff)
-+
-+#define LOAD_BLOCK(t)		((t) >> 17)
-+#define LOAD_HALF_BLOCK(t)	((t) >> 16)
-+#define BLOCK_MASK(t)		((t) & ((0x01 << 18) - 1))
-+#define LOAD_BLOCK_BIT(b)	(1UL << (RQ_LOAD_HISTORY_BITS - 1 - (b)))
-+#define CURRENT_LOAD_BIT	LOAD_BLOCK_BIT(0)
-+
-+static inline void rq_load_update(struct rq *rq)
-+{
-+	u64 time = rq->clock;
-+	u64 delta = min(LOAD_BLOCK(time) - LOAD_BLOCK(rq->load_stamp),
-+			RQ_LOAD_HISTORY_BITS - 1);
-+	u64 prev = !!(rq->load_history & CURRENT_LOAD_BIT);
-+	u64 curr = !!rq->nr_running;
-+
-+	if (delta) {
-+		rq->load_history = rq->load_history >> delta;
-+
-+		if (delta < RQ_UTIL_SHIFT) {
-+			rq->load_block += (~BLOCK_MASK(rq->load_stamp)) * prev;
-+			if (!!LOAD_HALF_BLOCK(rq->load_block) ^ curr)
-+				rq->load_history ^= LOAD_BLOCK_BIT(delta);
-+		}
-+
-+		rq->load_block = BLOCK_MASK(time) * prev;
-+	} else {
-+		rq->load_block += (time - rq->load_stamp) * prev;
-+	}
-+	if (prev ^ curr)
-+		rq->load_history ^= CURRENT_LOAD_BIT;
-+	rq->load_stamp = time;
-+}
-+
-+unsigned long rq_load_util(struct rq *rq, unsigned long max)
-+{
-+	return RQ_LOAD_HISTORY_TO_UTIL(rq->load_history) * (max >> RQ_UTIL_SHIFT);
-+}
-+
-+#ifdef CONFIG_SMP
-+unsigned long sched_cpu_util(int cpu, unsigned long max)
-+{
-+	return rq_load_util(cpu_rq(cpu), max);
-+}
-+#endif /* CONFIG_SMP */
-+
-+#ifdef CONFIG_CPU_FREQ
-+/**
-+ * cpufreq_update_util - Take a note about CPU utilization changes.
-+ * @rq: Runqueue to carry out the update for.
-+ * @flags: Update reason flags.
-+ *
-+ * This function is called by the scheduler on the CPU whose utilization is
-+ * being updated.
-+ *
-+ * It can only be called from RCU-sched read-side critical sections.
-+ *
-+ * The way cpufreq is currently arranged requires it to evaluate the CPU
-+ * performance state (frequency/voltage) on a regular basis to prevent it from
-+ * being stuck in a completely inadequate performance level for too long.
-+ * That is not guaranteed to happen if the updates are only triggered from CFS
-+ * and DL, though, because they may not be coming in if only RT tasks are
-+ * active all the time (or there are RT tasks only).
-+ *
-+ * As a workaround for that issue, this function is called periodically by the
-+ * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
-+ * but that really is a band-aid.  Going forward it should be replaced with
-+ * solutions targeted more specifically at RT tasks.
-+ */
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
-+{
-+	struct update_util_data *data;
-+
-+#ifdef CONFIG_SMP
-+	rq_load_update(rq);
-+#endif
-+	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
-+						  cpu_of(rq)));
-+	if (data)
-+		data->func(data, rq_clock(rq), flags);
-+}
-+#else
-+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
-+{
-+#ifdef CONFIG_SMP
-+	rq_load_update(rq);
-+#endif
-+}
-+#endif /* CONFIG_CPU_FREQ */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+/*
-+ * Tick may be needed by tasks in the runqueue depending on their policy and
-+ * requirements. If tick is needed, lets send the target an IPI to kick it out
-+ * of nohz mode if necessary.
-+ */
-+static inline void sched_update_tick_dependency(struct rq *rq)
-+{
-+	int cpu = cpu_of(rq);
-+
-+	if (!tick_nohz_full_cpu(cpu))
-+		return;
-+
-+	if (rq->nr_running < 2)
-+		tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
-+	else
-+		tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
-+}
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_update_tick_dependency(struct rq *rq) { }
-+#endif
-+
-+bool sched_task_on_rq(struct task_struct *p)
-+{
-+	return task_on_rq_queued(p);
-+}
-+
-+unsigned long get_wchan(struct task_struct *p)
-+{
-+	unsigned long ip = 0;
-+	unsigned int state;
-+
-+	if (!p || p == current)
-+		return 0;
-+
-+	/* Only get wchan if task is blocked and we can keep it that way. */
-+	raw_spin_lock_irq(&p->pi_lock);
-+	state = READ_ONCE(p->__state);
-+	smp_rmb(); /* see try_to_wake_up() */
-+	if (state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq)
-+		ip = __get_wchan(p);
-+	raw_spin_unlock_irq(&p->pi_lock);
-+
-+	return ip;
-+}
-+
-+/*
-+ * Add/Remove/Requeue task to/from the runqueue routines
-+ * Context: rq->lock
-+ */
-+#define __SCHED_DEQUEUE_TASK(p, rq, flags)					\
-+	psi_dequeue(p, flags & DEQUEUE_SLEEP);					\
-+	sched_info_dequeue(rq, p);						\
-+										\
-+	list_del(&p->sq_node);							\
-+	if (list_empty(&rq->queue.heads[p->sq_idx])) 				\
-+		clear_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap);
-+
-+#define __SCHED_ENQUEUE_TASK(p, rq, flags)				\
-+	sched_info_enqueue(rq, p);					\
-+	psi_enqueue(p, flags);						\
-+									\
-+	p->sq_idx = task_sched_prio_idx(p, rq);				\
-+	list_add_tail(&p->sq_node, &rq->queue.heads[p->sq_idx]);	\
-+	set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap);
-+
-+static inline void dequeue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	/*printk(KERN_INFO "sched: dequeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/
-+	WARN_ONCE(task_rq(p) != rq, "sched: dequeue task reside on cpu%d from cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	__SCHED_DEQUEUE_TASK(p, rq, flags);
-+	--rq->nr_running;
-+#ifdef CONFIG_SMP
-+	if (1 == rq->nr_running)
-+		cpumask_clear_cpu(cpu_of(rq), &sched_rq_pending_mask);
-+#endif
-+
-+	sched_update_tick_dependency(rq);
-+}
-+
-+static inline void enqueue_task(struct task_struct *p, struct rq *rq, int flags)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	/*printk(KERN_INFO "sched: enqueue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/
-+	WARN_ONCE(task_rq(p) != rq, "sched: enqueue task reside on cpu%d to cpu%d\n",
-+		  task_cpu(p), cpu_of(rq));
-+
-+	__SCHED_ENQUEUE_TASK(p, rq, flags);
-+	update_sched_rq_watermark(rq);
-+	++rq->nr_running;
-+#ifdef CONFIG_SMP
-+	if (2 == rq->nr_running)
-+		cpumask_set_cpu(cpu_of(rq), &sched_rq_pending_mask);
-+#endif
-+
-+	sched_update_tick_dependency(rq);
-+}
-+
-+static inline void requeue_task(struct task_struct *p, struct rq *rq, int idx)
-+{
-+	lockdep_assert_held(&rq->lock);
-+	/*printk(KERN_INFO "sched: requeue(%d) %px %016llx\n", cpu_of(rq), p, p->priodl);*/
-+	WARN_ONCE(task_rq(p) != rq, "sched: cpu[%d] requeue task reside on cpu%d\n",
-+		  cpu_of(rq), task_cpu(p));
-+
-+	list_del(&p->sq_node);
-+	list_add_tail(&p->sq_node, &rq->queue.heads[idx]);
-+	if (idx != p->sq_idx) {
-+		if (list_empty(&rq->queue.heads[p->sq_idx]))
-+			clear_bit(sched_idx2prio(p->sq_idx, rq),
-+				  rq->queue.bitmap);
-+		p->sq_idx = idx;
-+		set_bit(sched_idx2prio(p->sq_idx, rq), rq->queue.bitmap);
-+		update_sched_rq_watermark(rq);
-+	}
-+}
-+
-+/*
-+ * cmpxchg based fetch_or, macro so it works for different integer types
-+ */
-+#define fetch_or(ptr, mask)						\
-+	({								\
-+		typeof(ptr) _ptr = (ptr);				\
-+		typeof(mask) _mask = (mask);				\
-+		typeof(*_ptr) _old, _val = *_ptr;			\
-+									\
-+		for (;;) {						\
-+			_old = cmpxchg(_ptr, _val, _val | _mask);	\
-+			if (_old == _val)				\
-+				break;					\
-+			_val = _old;					\
-+		}							\
-+	_old;								\
-+})
-+
-+#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
-+/*
-+ * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
-+ * this avoids any races wrt polling state changes and thereby avoids
-+ * spurious IPIs.
-+ */
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
-+}
-+
-+/*
-+ * Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
-+ *
-+ * If this returns true, then the idle task promises to call
-+ * sched_ttwu_pending() and reschedule soon.
-+ */
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	struct thread_info *ti = task_thread_info(p);
-+	typeof(ti->flags) old, val = READ_ONCE(ti->flags);
-+
-+	for (;;) {
-+		if (!(val & _TIF_POLLING_NRFLAG))
-+			return false;
-+		if (val & _TIF_NEED_RESCHED)
-+			return true;
-+		old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
-+		if (old == val)
-+			break;
-+		val = old;
-+	}
-+	return true;
-+}
-+
-+#else
-+static bool set_nr_and_not_polling(struct task_struct *p)
-+{
-+	set_tsk_need_resched(p);
-+	return true;
-+}
-+
-+#ifdef CONFIG_SMP
-+static bool set_nr_if_polling(struct task_struct *p)
-+{
-+	return false;
-+}
-+#endif
-+#endif
-+
-+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	struct wake_q_node *node = &task->wake_q;
-+
-+	/*
-+	 * Atomically grab the task, if ->wake_q is !nil already it means
-+	 * it's already queued (either by us or someone else) and will get the
-+	 * wakeup due to that.
-+	 *
-+	 * In order to ensure that a pending wakeup will observe our pending
-+	 * state, even in the failed case, an explicit smp_mb() must be used.
-+	 */
-+	smp_mb__before_atomic();
-+	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-+		return false;
-+
-+	/*
-+	 * The head is context local, there can be no concurrency.
-+	 */
-+	*head->lastp = node;
-+	head->lastp = &node->next;
-+	return true;
-+}
-+
-+/**
-+ * wake_q_add() - queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ */
-+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (__wake_q_add(head, task))
-+		get_task_struct(task);
-+}
-+
-+/**
-+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
-+ * @head: the wake_q_head to add @task to
-+ * @task: the task to queue for 'later' wakeup
-+ *
-+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
-+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
-+ * instantly.
-+ *
-+ * This function must be used as-if it were wake_up_process(); IOW the task
-+ * must be ready to be woken at this location.
-+ *
-+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
-+ * that already hold reference to @task can call the 'safe' version and trust
-+ * wake_q to do the right thing depending whether or not the @task is already
-+ * queued for wakeup.
-+ */
-+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
-+{
-+	if (!__wake_q_add(head, task))
-+		put_task_struct(task);
-+}
-+
-+void wake_up_q(struct wake_q_head *head)
-+{
-+	struct wake_q_node *node = head->first;
-+
-+	while (node != WAKE_Q_TAIL) {
-+		struct task_struct *task;
-+
-+		task = container_of(node, struct task_struct, wake_q);
-+		/* task can safely be re-inserted now: */
-+		node = node->next;
-+		task->wake_q.next = NULL;
-+
-+		/*
-+		 * wake_up_process() executes a full barrier, which pairs with
-+		 * the queueing in wake_q_add() so as not to miss wakeups.
-+		 */
-+		wake_up_process(task);
-+		put_task_struct(task);
-+	}
-+}
-+
-+/*
-+ * resched_curr - mark rq's current task 'to be rescheduled now'.
-+ *
-+ * On UP this means the setting of the need_resched flag, on SMP it
-+ * might also involve a cross-CPU call to trigger the scheduler on
-+ * the target CPU.
-+ */
-+void resched_curr(struct rq *rq)
-+{
-+	struct task_struct *curr = rq->curr;
-+	int cpu;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	if (test_tsk_need_resched(curr))
-+		return;
-+
-+	cpu = cpu_of(rq);
-+	if (cpu == smp_processor_id()) {
-+		set_tsk_need_resched(curr);
-+		set_preempt_need_resched();
-+		return;
-+	}
-+
-+	if (set_nr_and_not_polling(curr))
-+		smp_send_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+void resched_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	if (cpu_online(cpu) || cpu == smp_processor_id())
-+		resched_curr(cpu_rq(cpu));
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+}
-+
-+#ifdef CONFIG_SMP
-+#ifdef CONFIG_NO_HZ_COMMON
-+void nohz_balance_enter_idle(int cpu) {}
-+
-+void select_nohz_load_balancer(int stop_tick) {}
-+
-+void set_cpu_sd_state_idle(void) {}
-+
-+/*
-+ * In the semi idle case, use the nearest busy CPU for migrating timers
-+ * from an idle CPU.  This is good for power-savings.
-+ *
-+ * We don't do similar optimization for completely idle system, as
-+ * selecting an idle CPU will add more delays to the timers than intended
-+ * (as that CPU's timer base may not be uptodate wrt jiffies etc).
-+ */
-+int get_nohz_timer_target(void)
-+{
-+	int i, cpu = smp_processor_id(), default_cpu = -1;
-+	struct cpumask *mask;
-+	const struct cpumask *hk_mask;
-+
-+	if (housekeeping_cpu(cpu, HK_TYPE_TIMER)) {
-+		if (!idle_cpu(cpu))
-+			return cpu;
-+		default_cpu = cpu;
-+	}
-+
-+	hk_mask = housekeeping_cpumask(HK_TYPE_TIMER);
-+
-+	for (mask = per_cpu(sched_cpu_topo_masks, cpu) + 1;
-+	     mask < per_cpu(sched_cpu_topo_end_mask, cpu); mask++)
-+		for_each_cpu_and(i, mask, hk_mask)
-+			if (!idle_cpu(i))
-+				return i;
-+
-+	if (default_cpu == -1)
-+		default_cpu = housekeeping_any_cpu(HK_TYPE_TIMER);
-+	cpu = default_cpu;
-+
-+	return cpu;
-+}
-+
-+/*
-+ * When add_timer_on() enqueues a timer into the timer wheel of an
-+ * idle CPU then this timer might expire before the next timer event
-+ * which is scheduled to wake up that CPU. In case of a completely
-+ * idle system the next event might even be infinite time into the
-+ * future. wake_up_idle_cpu() ensures that the CPU is woken up and
-+ * leaves the inner idle loop so the newly added timer is taken into
-+ * account when the CPU goes back to idle and evaluates the timer
-+ * wheel for the next timer event.
-+ */
-+static inline void wake_up_idle_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (cpu == smp_processor_id())
-+		return;
-+
-+	if (set_nr_and_not_polling(rq->idle))
-+		smp_send_reschedule(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+static inline bool wake_up_full_nohz_cpu(int cpu)
-+{
-+	/*
-+	 * We just need the target to call irq_exit() and re-evaluate
-+	 * the next tick. The nohz full kick at least implies that.
-+	 * If needed we can still optimize that later with an
-+	 * empty IRQ.
-+	 */
-+	if (cpu_is_offline(cpu))
-+		return true;  /* Don't try to wake offline CPUs. */
-+	if (tick_nohz_full_cpu(cpu)) {
-+		if (cpu != smp_processor_id() ||
-+		    tick_nohz_tick_stopped())
-+			tick_nohz_full_kick_cpu(cpu);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+void wake_up_nohz_cpu(int cpu)
-+{
-+	if (!wake_up_full_nohz_cpu(cpu))
-+		wake_up_idle_cpu(cpu);
-+}
-+
-+static void nohz_csd_func(void *info)
-+{
-+	struct rq *rq = info;
-+	int cpu = cpu_of(rq);
-+	unsigned int flags;
-+
-+	/*
-+	 * Release the rq::nohz_csd.
-+	 */
-+	flags = atomic_fetch_andnot(NOHZ_KICK_MASK, nohz_flags(cpu));
-+	WARN_ON(!(flags & NOHZ_KICK_MASK));
-+
-+	rq->idle_balance = idle_cpu(cpu);
-+	if (rq->idle_balance && !need_resched()) {
-+		rq->nohz_idle_balance = flags;
-+		raise_softirq_irqoff(SCHED_SOFTIRQ);
-+	}
-+}
-+
-+#endif /* CONFIG_NO_HZ_COMMON */
-+#endif /* CONFIG_SMP */
-+
-+static inline void check_preempt_curr(struct rq *rq)
-+{
-+	if (sched_rq_first_task(rq) != rq->curr)
-+		resched_curr(rq);
-+}
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+/*
-+ * Use HR-timers to deliver accurate preemption points.
-+ */
-+
-+static void hrtick_clear(struct rq *rq)
-+{
-+	if (hrtimer_active(&rq->hrtick_timer))
-+		hrtimer_cancel(&rq->hrtick_timer);
-+}
-+
-+/*
-+ * High-resolution timer tick.
-+ * Runs from hardirq context with interrupts disabled.
-+ */
-+static enum hrtimer_restart hrtick(struct hrtimer *timer)
-+{
-+	struct rq *rq = container_of(timer, struct rq, hrtick_timer);
-+
-+	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
-+
-+	raw_spin_lock(&rq->lock);
-+	resched_curr(rq);
-+	raw_spin_unlock(&rq->lock);
-+
-+	return HRTIMER_NORESTART;
-+}
-+
-+/*
-+ * Use hrtick when:
-+ *  - enabled by features
-+ *  - hrtimer is actually high res
-+ */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	/**
-+	 * Alt schedule FW doesn't support sched_feat yet
-+	if (!sched_feat(HRTICK))
-+		return 0;
-+	*/
-+	if (!cpu_active(cpu_of(rq)))
-+		return 0;
-+	return hrtimer_is_hres_active(&rq->hrtick_timer);
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+static void __hrtick_restart(struct rq *rq)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+	ktime_t time = rq->hrtick_time;
-+
-+	hrtimer_start(timer, time, HRTIMER_MODE_ABS_PINNED_HARD);
-+}
-+
-+/*
-+ * called from hardirq (IPI) context
-+ */
-+static void __hrtick_start(void *arg)
-+{
-+	struct rq *rq = arg;
-+
-+	raw_spin_lock(&rq->lock);
-+	__hrtick_restart(rq);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	struct hrtimer *timer = &rq->hrtick_timer;
-+	s64 delta;
-+
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense and can cause timer DoS.
-+	 */
-+	delta = max_t(s64, delay, 10000LL);
-+
-+	rq->hrtick_time = ktime_add_ns(timer->base->get_time(), delta);
-+
-+	if (rq == this_rq())
-+		__hrtick_restart(rq);
-+	else
-+		smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
-+}
-+
-+#else
-+/*
-+ * Called to set the hrtick timer state.
-+ *
-+ * called with rq->lock held and irqs disabled
-+ */
-+void hrtick_start(struct rq *rq, u64 delay)
-+{
-+	/*
-+	 * Don't schedule slices shorter than 10000ns, that just
-+	 * doesn't make sense. Rely on vruntime for fairness.
-+	 */
-+	delay = max_t(u64, delay, 10000LL);
-+	hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay),
-+		      HRTIMER_MODE_REL_PINNED_HARD);
-+}
-+#endif /* CONFIG_SMP */
-+
-+static void hrtick_rq_init(struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	INIT_CSD(&rq->hrtick_csd, __hrtick_start, rq);
-+#endif
-+
-+	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
-+	rq->hrtick_timer.function = hrtick;
-+}
-+#else	/* CONFIG_SCHED_HRTICK */
-+static inline int hrtick_enabled(struct rq *rq)
-+{
-+	return 0;
-+}
-+
-+static inline void hrtick_clear(struct rq *rq)
-+{
-+}
-+
-+static inline void hrtick_rq_init(struct rq *rq)
-+{
-+}
-+#endif	/* CONFIG_SCHED_HRTICK */
-+
-+static inline int __normal_prio(int policy, int rt_prio, int static_prio)
-+{
-+	return rt_policy(policy) ? (MAX_RT_PRIO - 1 - rt_prio) :
-+		static_prio + MAX_PRIORITY_ADJ;
-+}
-+
-+/*
-+ * Calculate the expected normal priority: i.e. priority
-+ * without taking RT-inheritance into account. Might be
-+ * boosted by interactivity modifiers. Changes upon fork,
-+ * setprio syscalls, and whenever the interactivity
-+ * estimator recalculates.
-+ */
-+static inline int normal_prio(struct task_struct *p)
-+{
-+	return __normal_prio(p->policy, p->rt_priority, p->static_prio);
-+}
-+
-+/*
-+ * Calculate the current priority, i.e. the priority
-+ * taken into account by the scheduler. This value might
-+ * be boosted by RT tasks as it will be RT if the task got
-+ * RT-boosted. If not then it returns p->normal_prio.
-+ */
-+static int effective_prio(struct task_struct *p)
-+{
-+	p->normal_prio = normal_prio(p);
-+	/*
-+	 * If we are RT tasks or we were boosted to RT priority,
-+	 * keep the priority unchanged. Otherwise, update priority
-+	 * to the normal priority:
-+	 */
-+	if (!rt_prio(p->prio))
-+		return p->normal_prio;
-+	return p->prio;
-+}
-+
-+/*
-+ * activate_task - move a task to the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static void activate_task(struct task_struct *p, struct rq *rq)
-+{
-+	enqueue_task(p, rq, ENQUEUE_WAKEUP);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+
-+	/*
-+	 * If in_iowait is set, the code below may not trigger any cpufreq
-+	 * utilization updates, so do it here explicitly with the IOWAIT flag
-+	 * passed.
-+	 */
-+	cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT * p->in_iowait);
-+}
-+
-+/*
-+ * deactivate_task - remove a task from the runqueue.
-+ *
-+ * Context: rq->lock
-+ */
-+static inline void deactivate_task(struct task_struct *p, struct rq *rq)
-+{
-+	dequeue_task(p, rq, DEQUEUE_SLEEP);
-+	p->on_rq = 0;
-+	cpufreq_update_util(rq, 0);
-+}
-+
-+static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * After ->cpu is set up to a new value, task_access_lock(p, ...) can be
-+	 * successfully executed on another CPU. We must ensure that updates of
-+	 * per-task data have been completed by this moment.
-+	 */
-+	smp_wmb();
-+
-+	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
-+#endif
-+}
-+
-+static inline bool is_migration_disabled(struct task_struct *p)
-+{
-+#ifdef CONFIG_SMP
-+	return p->migration_disabled;
-+#else
-+	return false;
-+#endif
-+}
-+
-+#define SCA_CHECK		0x01
-+#define SCA_USER		0x08
-+
-+#ifdef CONFIG_SMP
-+
-+void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
-+{
-+#ifdef CONFIG_SCHED_DEBUG
-+	unsigned int state = READ_ONCE(p->__state);
-+
-+	/*
-+	 * We should never call set_task_cpu() on a blocked task,
-+	 * ttwu() will sort out the placement.
-+	 */
-+	WARN_ON_ONCE(state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq);
-+
-+#ifdef CONFIG_LOCKDEP
-+	/*
-+	 * The caller should hold either p->pi_lock or rq->lock, when changing
-+	 * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks.
-+	 *
-+	 * sched_move_task() holds both and thus holding either pins the cgroup,
-+	 * see task_group().
-+	 */
-+	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-+				      lockdep_is_held(&task_rq(p)->lock)));
-+#endif
-+	/*
-+	 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
-+	 */
-+	WARN_ON_ONCE(!cpu_online(new_cpu));
-+
-+	WARN_ON_ONCE(is_migration_disabled(p));
-+#endif
-+	if (task_cpu(p) == new_cpu)
-+		return;
-+	trace_sched_migrate_task(p, new_cpu);
-+	rseq_migrate(p);
-+	perf_event_task_migrate(p);
-+
-+	__set_task_cpu(p, new_cpu);
-+}
-+
-+#define MDF_FORCE_ENABLED	0x80
-+
-+static void
-+__do_set_cpus_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	/*
-+	 * This here violates the locking rules for affinity, since we're only
-+	 * supposed to change these variables while holding both rq->lock and
-+	 * p->pi_lock.
-+	 *
-+	 * HOWEVER, it magically works, because ttwu() is the only code that
-+	 * accesses these variables under p->pi_lock and only does so after
-+	 * smp_cond_load_acquire(&p->on_cpu, !VAL), and we're in __schedule()
-+	 * before finish_task().
-+	 *
-+	 * XXX do further audits, this smells like something putrid.
-+	 */
-+	SCHED_WARN_ON(!p->on_cpu);
-+	p->cpus_ptr = new_mask;
-+}
-+
-+void migrate_disable(void)
-+{
-+	struct task_struct *p = current;
-+	int cpu;
-+
-+	if (p->migration_disabled) {
-+		p->migration_disabled++;
-+		return;
-+	}
-+
-+	preempt_disable();
-+	cpu = smp_processor_id();
-+	if (cpumask_test_cpu(cpu, &p->cpus_mask)) {
-+		cpu_rq(cpu)->nr_pinned++;
-+		p->migration_disabled = 1;
-+		p->migration_flags &= ~MDF_FORCE_ENABLED;
-+
-+		/*
-+		 * Violates locking rules! see comment in __do_set_cpus_ptr().
-+		 */
-+		if (p->cpus_ptr == &p->cpus_mask)
-+			__do_set_cpus_ptr(p, cpumask_of(cpu));
-+	}
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(migrate_disable);
-+
-+void migrate_enable(void)
-+{
-+	struct task_struct *p = current;
-+
-+	if (0 == p->migration_disabled)
-+		return;
-+
-+	if (p->migration_disabled > 1) {
-+		p->migration_disabled--;
-+		return;
-+	}
-+
-+	if (WARN_ON_ONCE(!p->migration_disabled))
-+		return;
-+
-+	/*
-+	 * Ensure stop_task runs either before or after this, and that
-+	 * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
-+	 */
-+	preempt_disable();
-+	/*
-+	 * Assumption: current should be running on allowed cpu
-+	 */
-+	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &p->cpus_mask));
-+	if (p->cpus_ptr != &p->cpus_mask)
-+		__do_set_cpus_ptr(p, &p->cpus_mask);
-+	/*
-+	 * Mustn't clear migration_disabled() until cpus_ptr points back at the
-+	 * regular cpus_mask, otherwise things that race (eg.
-+	 * select_fallback_rq) get confused.
-+	 */
-+	barrier();
-+	p->migration_disabled = 0;
-+	this_rq()->nr_pinned--;
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(migrate_enable);
-+
-+static inline bool rq_has_pinned_tasks(struct rq *rq)
-+{
-+	return rq->nr_pinned;
-+}
-+
-+/*
-+ * Per-CPU kthreads are allowed to run on !active && online CPUs, see
-+ * __set_cpus_allowed_ptr() and select_fallback_rq().
-+ */
-+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
-+{
-+	/* When not in the task's cpumask, no point in looking further. */
-+	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-+		return false;
-+
-+	/* migrate_disabled() must be allowed to finish. */
-+	if (is_migration_disabled(p))
-+		return cpu_online(cpu);
-+
-+	/* Non kernel threads are not allowed during either online or offline. */
-+	if (!(p->flags & PF_KTHREAD))
-+		return cpu_active(cpu) && task_cpu_possible(cpu, p);
-+
-+	/* KTHREAD_IS_PER_CPU is always allowed. */
-+	if (kthread_is_per_cpu(p))
-+		return cpu_online(cpu);
-+
-+	/* Regular kernel threads don't get to stay during offline. */
-+	if (cpu_dying(cpu))
-+		return false;
-+
-+	/* But are allowed during online. */
-+	return cpu_online(cpu);
-+}
-+
-+/*
-+ * This is how migration works:
-+ *
-+ * 1) we invoke migration_cpu_stop() on the target CPU using
-+ *    stop_one_cpu().
-+ * 2) stopper starts to run (implicitly forcing the migrated thread
-+ *    off the CPU)
-+ * 3) it checks whether the migrated task is still in the wrong runqueue.
-+ * 4) if it's in the wrong runqueue then the migration thread removes
-+ *    it and puts it into the right queue.
-+ * 5) stopper completes and stop_one_cpu() returns and the migration
-+ *    is done.
-+ */
-+
-+/*
-+ * move_queued_task - move a queued task to new rq.
-+ *
-+ * Returns (locked) new rq. Old rq's lock is released.
-+ */
-+static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int
-+				   new_cpu)
-+{
-+	lockdep_assert_held(&rq->lock);
-+
-+	WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
-+	dequeue_task(p, rq, 0);
-+	update_sched_rq_watermark(rq);
-+	set_task_cpu(p, new_cpu);
-+	raw_spin_unlock(&rq->lock);
-+
-+	rq = cpu_rq(new_cpu);
-+
-+	raw_spin_lock(&rq->lock);
-+	BUG_ON(task_cpu(p) != new_cpu);
-+	sched_task_sanity_check(p, rq);
-+	enqueue_task(p, rq, 0);
-+	p->on_rq = TASK_ON_RQ_QUEUED;
-+	check_preempt_curr(rq);
-+
-+	return rq;
-+}
-+
-+struct migration_arg {
-+	struct task_struct *task;
-+	int dest_cpu;
-+};
-+
-+/*
-+ * Move (not current) task off this CPU, onto the destination CPU. We're doing
-+ * this because either it can't run here any more (set_cpus_allowed()
-+ * away from this CPU, or CPU going down), or because we're
-+ * attempting to rebalance this task on exec (sched_exec).
-+ *
-+ * So we race with normal scheduler movements, but that's OK, as long
-+ * as the task is no longer on this CPU.
-+ */
-+static struct rq *__migrate_task(struct rq *rq, struct task_struct *p, int
-+				 dest_cpu)
-+{
-+	/* Affinity changed (again). */
-+	if (!is_cpu_allowed(p, dest_cpu))
-+		return rq;
-+
-+	update_rq_clock(rq);
-+	return move_queued_task(rq, p, dest_cpu);
-+}
-+
-+/*
-+ * migration_cpu_stop - this will be executed by a highprio stopper thread
-+ * and performs thread migration by bumping thread off CPU then
-+ * 'pushing' onto another runqueue.
-+ */
-+static int migration_cpu_stop(void *data)
-+{
-+	struct migration_arg *arg = data;
-+	struct task_struct *p = arg->task;
-+	struct rq *rq = this_rq();
-+	unsigned long flags;
-+
-+	/*
-+	 * The original target CPU might have gone down and we might
-+	 * be on another CPU but it doesn't matter.
-+	 */
-+	local_irq_save(flags);
-+	/*
-+	 * We need to explicitly wake pending tasks before running
-+	 * __migrate_task() such that we will not miss enforcing cpus_ptr
-+	 * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
-+	 */
-+	flush_smp_call_function_queue();
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+	/*
-+	 * If task_rq(p) != rq, it cannot be migrated here, because we're
-+	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
-+	 * we're holding p->pi_lock.
-+	 */
-+	if (task_rq(p) == rq && task_on_rq_queued(p))
-+		rq = __migrate_task(rq, p, arg->dest_cpu);
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	return 0;
-+}
-+
-+static inline void
-+set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	cpumask_copy(&p->cpus_mask, new_mask);
-+	p->nr_cpus_allowed = cpumask_weight(new_mask);
-+}
-+
-+static void
-+__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	lockdep_assert_held(&p->pi_lock);
-+	set_cpus_allowed_common(p, new_mask);
-+}
-+
-+void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	__do_set_cpus_allowed(p, new_mask);
-+}
-+
-+int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src,
-+		      int node)
-+{
-+	if (!src->user_cpus_ptr)
-+		return 0;
-+
-+	dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node);
-+	if (!dst->user_cpus_ptr)
-+		return -ENOMEM;
-+
-+	cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr);
-+	return 0;
-+}
-+
-+static inline struct cpumask *clear_user_cpus_ptr(struct task_struct *p)
-+{
-+	struct cpumask *user_mask = NULL;
-+
-+	swap(p->user_cpus_ptr, user_mask);
-+
-+	return user_mask;
-+}
-+
-+void release_user_cpus_ptr(struct task_struct *p)
-+{
-+	kfree(clear_user_cpus_ptr(p));
-+}
-+
-+#endif
-+
-+/**
-+ * task_curr - is this task currently executing on a CPU?
-+ * @p: the task in question.
-+ *
-+ * Return: 1 if the task is currently executing. 0 otherwise.
-+ */
-+inline int task_curr(const struct task_struct *p)
-+{
-+	return cpu_curr(task_cpu(p)) == p;
-+}
-+
-+#ifdef CONFIG_SMP
-+/*
-+ * wait_task_inactive - wait for a thread to unschedule.
-+ *
-+ * If @match_state is nonzero, it's the @p->state value just checked and
-+ * not expected to change.  If it changes, i.e. @p might have woken up,
-+ * then return zero.  When we succeed in waiting for @p to be off its CPU,
-+ * we return a positive number (its total switch count).  If a second call
-+ * a short while later returns the same number, the caller can be sure that
-+ * @p has remained unscheduled the whole time.
-+ *
-+ * The caller must ensure that the task *will* unschedule sometime soon,
-+ * else this function might spin for a *long* time. This function can't
-+ * be called with interrupts off, or it may introduce deadlock with
-+ * smp_call_function() if an IPI is sent by the same process we are
-+ * waiting to become inactive.
-+ */
-+unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
-+{
-+	unsigned long flags;
-+	bool running, on_rq;
-+	unsigned long ncsw;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	for (;;) {
-+		rq = task_rq(p);
-+
-+		/*
-+		 * If the task is actively running on another CPU
-+		 * still, just relax and busy-wait without holding
-+		 * any locks.
-+		 *
-+		 * NOTE! Since we don't hold any locks, it's not
-+		 * even sure that "rq" stays as the right runqueue!
-+		 * But we don't care, since this will return false
-+		 * if the runqueue has changed and p is actually now
-+		 * running somewhere else!
-+		 */
-+		while (task_running(p) && p == rq->curr) {
-+			if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
-+				return 0;
-+			cpu_relax();
-+		}
-+
-+		/*
-+		 * Ok, time to look more closely! We need the rq
-+		 * lock now, to be *sure*. If we're wrong, we'll
-+		 * just go back and repeat.
-+		 */
-+		task_access_lock_irqsave(p, &lock, &flags);
-+		trace_sched_wait_task(p);
-+		running = task_running(p);
-+		on_rq = p->on_rq;
-+		ncsw = 0;
-+		if (!match_state || READ_ONCE(p->__state) == match_state)
-+			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-+		task_access_unlock_irqrestore(p, lock, &flags);
-+
-+		/*
-+		 * If it changed from the expected state, bail out now.
-+		 */
-+		if (unlikely(!ncsw))
-+			break;
-+
-+		/*
-+		 * Was it really running after all now that we
-+		 * checked with the proper locks actually held?
-+		 *
-+		 * Oops. Go back and try again..
-+		 */
-+		if (unlikely(running)) {
-+			cpu_relax();
-+			continue;
-+		}
-+
-+		/*
-+		 * It's not enough that it's not actively running,
-+		 * it must be off the runqueue _entirely_, and not
-+		 * preempted!
-+		 *
-+		 * So if it was still runnable (but just not actively
-+		 * running right now), it's preempted, and we should
-+		 * yield - it could be a while.
-+		 */
-+		if (unlikely(on_rq)) {
-+			ktime_t to = NSEC_PER_SEC / HZ;
-+
-+			set_current_state(TASK_UNINTERRUPTIBLE);
-+			schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
-+			continue;
-+		}
-+
-+		/*
-+		 * Ahh, all good. It wasn't running, and it wasn't
-+		 * runnable, which means that it will never become
-+		 * running in the future either. We're all done!
-+		 */
-+		break;
-+	}
-+
-+	return ncsw;
-+}
-+
-+/***
-+ * kick_process - kick a running thread to enter/exit the kernel
-+ * @p: the to-be-kicked thread
-+ *
-+ * Cause a process which is running on another CPU to enter
-+ * kernel-mode, without any delay. (to get signals handled.)
-+ *
-+ * NOTE: this function doesn't have to take the runqueue lock,
-+ * because all it wants to ensure is that the remote task enters
-+ * the kernel. If the IPI races and the task has been migrated
-+ * to another CPU then no harm is done and the purpose has been
-+ * achieved as well.
-+ */
-+void kick_process(struct task_struct *p)
-+{
-+	int cpu;
-+
-+	preempt_disable();
-+	cpu = task_cpu(p);
-+	if ((cpu != smp_processor_id()) && task_curr(p))
-+		smp_send_reschedule(cpu);
-+	preempt_enable();
-+}
-+EXPORT_SYMBOL_GPL(kick_process);
-+
-+/*
-+ * ->cpus_ptr is protected by both rq->lock and p->pi_lock
-+ *
-+ * A few notes on cpu_active vs cpu_online:
-+ *
-+ *  - cpu_active must be a subset of cpu_online
-+ *
-+ *  - on CPU-up we allow per-CPU kthreads on the online && !active CPU,
-+ *    see __set_cpus_allowed_ptr(). At this point the newly online
-+ *    CPU isn't yet part of the sched domains, and balancing will not
-+ *    see it.
-+ *
-+ *  - on cpu-down we clear cpu_active() to mask the sched domains and
-+ *    avoid the load balancer to place new tasks on the to be removed
-+ *    CPU. Existing tasks will remain running there and will be taken
-+ *    off.
-+ *
-+ * This means that fallback selection must not select !active CPUs.
-+ * And can assume that any active CPU must be online. Conversely
-+ * select_task_rq() below may allow selection of !active CPUs in order
-+ * to satisfy the above rules.
-+ */
-+static int select_fallback_rq(int cpu, struct task_struct *p)
-+{
-+	int nid = cpu_to_node(cpu);
-+	const struct cpumask *nodemask = NULL;
-+	enum { cpuset, possible, fail } state = cpuset;
-+	int dest_cpu;
-+
-+	/*
-+	 * If the node that the CPU is on has been offlined, cpu_to_node()
-+	 * will return -1. There is no CPU on the node, and we should
-+	 * select the CPU on the other node.
-+	 */
-+	if (nid != -1) {
-+		nodemask = cpumask_of_node(nid);
-+
-+		/* Look for allowed, online CPU in same node. */
-+		for_each_cpu(dest_cpu, nodemask) {
-+			if (is_cpu_allowed(p, dest_cpu))
-+				return dest_cpu;
-+		}
-+	}
-+
-+	for (;;) {
-+		/* Any allowed, online CPU? */
-+		for_each_cpu(dest_cpu, p->cpus_ptr) {
-+			if (!is_cpu_allowed(p, dest_cpu))
-+				continue;
-+			goto out;
-+		}
-+
-+		/* No more Mr. Nice Guy. */
-+		switch (state) {
-+		case cpuset:
-+			if (cpuset_cpus_allowed_fallback(p)) {
-+				state = possible;
-+				break;
-+			}
-+			fallthrough;
-+		case possible:
-+			/*
-+			 * XXX When called from select_task_rq() we only
-+			 * hold p->pi_lock and again violate locking order.
-+			 *
-+			 * More yuck to audit.
-+			 */
-+			do_set_cpus_allowed(p, task_cpu_possible_mask(p));
-+			state = fail;
-+			break;
-+
-+		case fail:
-+			BUG();
-+			break;
-+		}
-+	}
-+
-+out:
-+	if (state != cpuset) {
-+		/*
-+		 * Don't tell them about moving exiting tasks or
-+		 * kernel threads (both mm NULL), since they never
-+		 * leave kernel.
-+		 */
-+		if (p->mm && printk_ratelimit()) {
-+			printk_deferred("process %d (%s) no longer affine to cpu%d\n",
-+					task_pid_nr(p), p->comm, cpu);
-+		}
-+	}
-+
-+	return dest_cpu;
-+}
-+
-+static inline int select_task_rq(struct task_struct *p)
-+{
-+	cpumask_t chk_mask, tmp;
-+
-+	if (unlikely(!cpumask_and(&chk_mask, p->cpus_ptr, cpu_active_mask)))
-+		return select_fallback_rq(task_cpu(p), p);
-+
-+	if (
-+#ifdef CONFIG_SCHED_SMT
-+	    cpumask_and(&tmp, &chk_mask, &sched_sg_idle_mask) ||
-+#endif
-+	    cpumask_and(&tmp, &chk_mask, sched_rq_watermark) ||
-+	    cpumask_and(&tmp, &chk_mask,
-+			sched_rq_watermark + SCHED_QUEUE_BITS - 1 - task_sched_prio(p)))
-+		return best_mask_cpu(task_cpu(p), &tmp);
-+
-+	return best_mask_cpu(task_cpu(p), &chk_mask);
-+}
-+
-+void sched_set_stop_task(int cpu, struct task_struct *stop)
-+{
-+	static struct lock_class_key stop_pi_lock;
-+	struct sched_param stop_param = { .sched_priority = STOP_PRIO };
-+	struct sched_param start_param = { .sched_priority = 0 };
-+	struct task_struct *old_stop = cpu_rq(cpu)->stop;
-+
-+	if (stop) {
-+		/*
-+		 * Make it appear like a SCHED_FIFO task, its something
-+		 * userspace knows about and won't get confused about.
-+		 *
-+		 * Also, it will make PI more or less work without too
-+		 * much confusion -- but then, stop work should not
-+		 * rely on PI working anyway.
-+		 */
-+		sched_setscheduler_nocheck(stop, SCHED_FIFO, &stop_param);
-+
-+		/*
-+		 * The PI code calls rt_mutex_setprio() with ->pi_lock held to
-+		 * adjust the effective priority of a task. As a result,
-+		 * rt_mutex_setprio() can trigger (RT) balancing operations,
-+		 * which can then trigger wakeups of the stop thread to push
-+		 * around the current task.
-+		 *
-+		 * The stop task itself will never be part of the PI-chain, it
-+		 * never blocks, therefore that ->pi_lock recursion is safe.
-+		 * Tell lockdep about this by placing the stop->pi_lock in its
-+		 * own class.
-+		 */
-+		lockdep_set_class(&stop->pi_lock, &stop_pi_lock);
-+	}
-+
-+	cpu_rq(cpu)->stop = stop;
-+
-+	if (old_stop) {
-+		/*
-+		 * Reset it back to a normal scheduling policy so that
-+		 * it can die in pieces.
-+		 */
-+		sched_setscheduler_nocheck(old_stop, SCHED_NORMAL, &start_param);
-+	}
-+}
-+
-+static int affine_move_task(struct rq *rq, struct task_struct *p, int dest_cpu,
-+			    raw_spinlock_t *lock, unsigned long irq_flags)
-+{
-+	/* Can the task run on the task's current CPU? If so, we're done */
-+	if (!cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
-+		if (p->migration_disabled) {
-+			if (likely(p->cpus_ptr != &p->cpus_mask))
-+				__do_set_cpus_ptr(p, &p->cpus_mask);
-+			p->migration_disabled = 0;
-+			p->migration_flags |= MDF_FORCE_ENABLED;
-+			/* When p is migrate_disabled, rq->lock should be held */
-+			rq->nr_pinned--;
-+		}
-+
-+		if (task_running(p) || READ_ONCE(p->__state) == TASK_WAKING) {
-+			struct migration_arg arg = { p, dest_cpu };
-+
-+			/* Need help from migration thread: drop lock and wait. */
-+			__task_access_unlock(p, lock);
-+			raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags);
-+			stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
-+			return 0;
-+		}
-+		if (task_on_rq_queued(p)) {
-+			/*
-+			 * OK, since we're going to drop the lock immediately
-+			 * afterwards anyway.
-+			 */
-+			update_rq_clock(rq);
-+			rq = move_queued_task(rq, p, dest_cpu);
-+			lock = &rq->lock;
-+		}
-+	}
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags);
-+	return 0;
-+}
-+
-+static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
-+					 const struct cpumask *new_mask,
-+					 u32 flags,
-+					 struct rq *rq,
-+					 raw_spinlock_t *lock,
-+					 unsigned long irq_flags)
-+{
-+	const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p);
-+	const struct cpumask *cpu_valid_mask = cpu_active_mask;
-+	bool kthread = p->flags & PF_KTHREAD;
-+	struct cpumask *user_mask = NULL;
-+	int dest_cpu;
-+	int ret = 0;
-+
-+	if (kthread || is_migration_disabled(p)) {
-+		/*
-+		 * Kernel threads are allowed on online && !active CPUs,
-+		 * however, during cpu-hot-unplug, even these might get pushed
-+		 * away if not KTHREAD_IS_PER_CPU.
-+		 *
-+		 * Specifically, migration_disabled() tasks must not fail the
-+		 * cpumask_any_and_distribute() pick below, esp. so on
-+		 * SCA_MIGRATE_ENABLE, otherwise we'll not call
-+		 * set_cpus_allowed_common() and actually reset p->cpus_ptr.
-+		 */
-+		cpu_valid_mask = cpu_online_mask;
-+	}
-+
-+	if (!kthread && !cpumask_subset(new_mask, cpu_allowed_mask)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	/*
-+	 * Must re-check here, to close a race against __kthread_bind(),
-+	 * sched_setaffinity() is not guaranteed to observe the flag.
-+	 */
-+	if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	if (cpumask_equal(&p->cpus_mask, new_mask))
-+		goto out;
-+
-+	dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
-+	if (dest_cpu >= nr_cpu_ids) {
-+		ret = -EINVAL;
-+		goto out;
-+	}
-+
-+	__do_set_cpus_allowed(p, new_mask);
-+
-+	if (flags & SCA_USER)
-+		user_mask = clear_user_cpus_ptr(p);
-+
-+	ret = affine_move_task(rq, p, dest_cpu, lock, irq_flags);
-+
-+	kfree(user_mask);
-+
-+	return ret;
-+
-+out:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags);
-+
-+	return ret;
-+}
-+
-+/*
-+ * Change a given task's CPU affinity. Migrate the thread to a
-+ * proper CPU and schedule it away if the CPU it's executing on
-+ * is removed from the allowed bitmask.
-+ *
-+ * NOTE: the caller must have a valid reference to the task, the
-+ * task must not exit() & deallocate itself prematurely. The
-+ * call is not atomic; no spinlocks may be held.
-+ */
-+static int __set_cpus_allowed_ptr(struct task_struct *p,
-+				  const struct cpumask *new_mask, u32 flags)
-+{
-+	unsigned long irq_flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, irq_flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, lock, irq_flags);
-+}
-+
-+int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
-+{
-+	return __set_cpus_allowed_ptr(p, new_mask, 0);
-+}
-+EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
-+
-+/*
-+ * Change a given task's CPU affinity to the intersection of its current
-+ * affinity mask and @subset_mask, writing the resulting mask to @new_mask
-+ * and pointing @p->user_cpus_ptr to a copy of the old mask.
-+ * If the resulting mask is empty, leave the affinity unchanged and return
-+ * -EINVAL.
-+ */
-+static int restrict_cpus_allowed_ptr(struct task_struct *p,
-+				     struct cpumask *new_mask,
-+				     const struct cpumask *subset_mask)
-+{
-+	struct cpumask *user_mask = NULL;
-+	unsigned long irq_flags;
-+	raw_spinlock_t *lock;
-+	struct rq *rq;
-+	int err;
-+
-+	if (!p->user_cpus_ptr) {
-+		user_mask = kmalloc(cpumask_size(), GFP_KERNEL);
-+		if (!user_mask)
-+			return -ENOMEM;
-+	}
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, irq_flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) {
-+		err = -EINVAL;
-+		goto err_unlock;
-+	}
-+
-+	/*
-+	 * We're about to butcher the task affinity, so keep track of what
-+	 * the user asked for in case we're able to restore it later on.
-+	 */
-+	if (user_mask) {
-+		cpumask_copy(user_mask, p->cpus_ptr);
-+		p->user_cpus_ptr = user_mask;
-+	}
-+
-+	/*return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, &rf);*/
-+	return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, lock, irq_flags);
-+
-+err_unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, irq_flags);
-+	kfree(user_mask);
-+	return err;
-+}
-+
-+/*
-+ * Restrict the CPU affinity of task @p so that it is a subset of
-+ * task_cpu_possible_mask() and point @p->user_cpu_ptr to a copy of the
-+ * old affinity mask. If the resulting mask is empty, we warn and walk
-+ * up the cpuset hierarchy until we find a suitable mask.
-+ */
-+void force_compatible_cpus_allowed_ptr(struct task_struct *p)
-+{
-+	cpumask_var_t new_mask;
-+	const struct cpumask *override_mask = task_cpu_possible_mask(p);
-+
-+	alloc_cpumask_var(&new_mask, GFP_KERNEL);
-+
-+	/*
-+	 * __migrate_task() can fail silently in the face of concurrent
-+	 * offlining of the chosen destination CPU, so take the hotplug
-+	 * lock to ensure that the migration succeeds.
-+	 */
-+	cpus_read_lock();
-+	if (!cpumask_available(new_mask))
-+		goto out_set_mask;
-+
-+	if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask))
-+		goto out_free_mask;
-+
-+	/*
-+	 * We failed to find a valid subset of the affinity mask for the
-+	 * task, so override it based on its cpuset hierarchy.
-+	 */
-+	cpuset_cpus_allowed(p, new_mask);
-+	override_mask = new_mask;
-+
-+out_set_mask:
-+	if (printk_ratelimit()) {
-+		printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n",
-+				task_pid_nr(p), p->comm,
-+				cpumask_pr_args(override_mask));
-+	}
-+
-+	WARN_ON(set_cpus_allowed_ptr(p, override_mask));
-+out_free_mask:
-+	cpus_read_unlock();
-+	free_cpumask_var(new_mask);
-+}
-+
-+static int
-+__sched_setaffinity(struct task_struct *p, const struct cpumask *mask);
-+
-+/*
-+ * Restore the affinity of a task @p which was previously restricted by a
-+ * call to force_compatible_cpus_allowed_ptr(). This will clear (and free)
-+ * @p->user_cpus_ptr.
-+ *
-+ * It is the caller's responsibility to serialise this with any calls to
-+ * force_compatible_cpus_allowed_ptr(@p).
-+ */
-+void relax_compatible_cpus_allowed_ptr(struct task_struct *p)
-+{
-+	struct cpumask *user_mask = p->user_cpus_ptr;
-+	unsigned long flags;
-+
-+	/*
-+	 * Try to restore the old affinity mask. If this fails, then
-+	 * we free the mask explicitly to avoid it being inherited across
-+	 * a subsequent fork().
-+	 */
-+	if (!user_mask || !__sched_setaffinity(p, user_mask))
-+		return;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	user_mask = clear_user_cpus_ptr(p);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	kfree(user_mask);
-+}
-+
-+#else /* CONFIG_SMP */
-+
-+static inline int select_task_rq(struct task_struct *p)
-+{
-+	return 0;
-+}
-+
-+static inline int
-+__set_cpus_allowed_ptr(struct task_struct *p,
-+		       const struct cpumask *new_mask, u32 flags)
-+{
-+	return set_cpus_allowed_ptr(p, new_mask);
-+}
-+
-+static inline bool rq_has_pinned_tasks(struct rq *rq)
-+{
-+	return false;
-+}
-+
-+#endif /* !CONFIG_SMP */
-+
-+static void
-+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq;
-+
-+	if (!schedstat_enabled())
-+		return;
-+
-+	rq = this_rq();
-+
-+#ifdef CONFIG_SMP
-+	if (cpu == rq->cpu) {
-+		__schedstat_inc(rq->ttwu_local);
-+		__schedstat_inc(p->stats.nr_wakeups_local);
-+	} else {
-+		/** Alt schedule FW ToDo:
-+		 * How to do ttwu_wake_remote
-+		 */
-+	}
-+#endif /* CONFIG_SMP */
-+
-+	__schedstat_inc(rq->ttwu_count);
-+	__schedstat_inc(p->stats.nr_wakeups);
-+}
-+
-+/*
-+ * Mark the task runnable and perform wakeup-preemption.
-+ */
-+static inline void
-+ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	check_preempt_curr(rq);
-+	WRITE_ONCE(p->__state, TASK_RUNNING);
-+	trace_sched_wakeup(p);
-+}
-+
-+static inline void
-+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
-+{
-+	if (p->sched_contributes_to_load)
-+		rq->nr_uninterruptible--;
-+
-+	if (
-+#ifdef CONFIG_SMP
-+	    !(wake_flags & WF_MIGRATED) &&
-+#endif
-+	    p->in_iowait) {
-+		delayacct_blkio_end(p);
-+		atomic_dec(&task_rq(p)->nr_iowait);
-+	}
-+
-+	activate_task(p, rq);
-+	ttwu_do_wakeup(rq, p, 0);
-+}
-+
-+/*
-+ * Consider @p being inside a wait loop:
-+ *
-+ *   for (;;) {
-+ *      set_current_state(TASK_UNINTERRUPTIBLE);
-+ *
-+ *      if (CONDITION)
-+ *         break;
-+ *
-+ *      schedule();
-+ *   }
-+ *   __set_current_state(TASK_RUNNING);
-+ *
-+ * between set_current_state() and schedule(). In this case @p is still
-+ * runnable, so all that needs doing is change p->state back to TASK_RUNNING in
-+ * an atomic manner.
-+ *
-+ * By taking task_rq(p)->lock we serialize against schedule(), if @p->on_rq
-+ * then schedule() must still happen and p->state can be changed to
-+ * TASK_RUNNING. Otherwise we lost the race, schedule() has happened, and we
-+ * need to do a full wakeup with enqueue.
-+ *
-+ * Returns: %true when the wakeup is done,
-+ *          %false otherwise.
-+ */
-+static int ttwu_runnable(struct task_struct *p, int wake_flags)
-+{
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	int ret = 0;
-+
-+	rq = __task_access_lock(p, &lock);
-+	if (task_on_rq_queued(p)) {
-+		/* check_preempt_curr() may use rq clock */
-+		update_rq_clock(rq);
-+		ttwu_do_wakeup(rq, p, wake_flags);
-+		ret = 1;
-+	}
-+	__task_access_unlock(p, lock);
-+
-+	return ret;
-+}
-+
-+#ifdef CONFIG_SMP
-+void sched_ttwu_pending(void *arg)
-+{
-+	struct llist_node *llist = arg;
-+	struct rq *rq = this_rq();
-+	struct task_struct *p, *t;
-+	struct rq_flags rf;
-+
-+	if (!llist)
-+		return;
-+
-+	/*
-+	 * rq::ttwu_pending racy indication of out-standing wakeups.
-+	 * Races such that false-negatives are possible, since they
-+	 * are shorter lived that false-positives would be.
-+	 */
-+	WRITE_ONCE(rq->ttwu_pending, 0);
-+
-+	rq_lock_irqsave(rq, &rf);
-+	update_rq_clock(rq);
-+
-+	llist_for_each_entry_safe(p, t, llist, wake_entry.llist) {
-+		if (WARN_ON_ONCE(p->on_cpu))
-+			smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+		if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
-+			set_task_cpu(p, cpu_of(rq));
-+
-+		ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0);
-+	}
-+
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+void send_call_function_single_ipi(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (!set_nr_if_polling(rq->idle))
-+		arch_send_call_function_single_ipi(cpu);
-+	else
-+		trace_sched_wake_idle_without_ipi(cpu);
-+}
-+
-+/*
-+ * Queue a task on the target CPUs wake_list and wake the CPU via IPI if
-+ * necessary. The wakee CPU on receipt of the IPI will queue the task
-+ * via sched_ttwu_wakeup() for activation so the wakee incurs the cost
-+ * of the wakeup instead of the waker.
-+ */
-+static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
-+
-+	WRITE_ONCE(rq->ttwu_pending, 1);
-+	__smp_call_single_queue(cpu, &p->wake_entry.llist);
-+}
-+
-+static inline bool ttwu_queue_cond(int cpu, int wake_flags)
-+{
-+	/*
-+	 * Do not complicate things with the async wake_list while the CPU is
-+	 * in hotplug state.
-+	 */
-+	if (!cpu_active(cpu))
-+		return false;
-+
-+	/*
-+	 * If the CPU does not share cache, then queue the task on the
-+	 * remote rqs wakelist to avoid accessing remote data.
-+	 */
-+	if (!cpus_share_cache(smp_processor_id(), cpu))
-+		return true;
-+
-+	/*
-+	 * If the task is descheduling and the only running task on the
-+	 * CPU then use the wakelist to offload the task activation to
-+	 * the soon-to-be-idle CPU as the current CPU is likely busy.
-+	 * nr_running is checked to avoid unnecessary task stacking.
-+	 */
-+	if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
-+		return true;
-+
-+	return false;
-+}
-+
-+static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	if (__is_defined(ALT_SCHED_TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
-+		if (WARN_ON_ONCE(cpu == smp_processor_id()))
-+			return false;
-+
-+		sched_clock_cpu(cpu); /* Sync clocks across CPUs */
-+		__ttwu_queue_wakelist(p, cpu, wake_flags);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+void wake_up_if_idle(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	rcu_read_lock();
-+
-+	if (!is_idle_task(rcu_dereference(rq->curr)))
-+		goto out;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	if (is_idle_task(rq->curr))
-+		resched_curr(rq);
-+	/* Else CPU is not idle, do nothing here */
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+out:
-+	rcu_read_unlock();
-+}
-+
-+bool cpus_share_cache(int this_cpu, int that_cpu)
-+{
-+	if (this_cpu == that_cpu)
-+		return true;
-+
-+	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
-+}
-+#else /* !CONFIG_SMP */
-+
-+static inline bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	return false;
-+}
-+
-+#endif /* CONFIG_SMP */
-+
-+static inline void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (ttwu_queue_wakelist(p, cpu, wake_flags))
-+		return;
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+	ttwu_do_activate(rq, p, wake_flags);
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+/*
-+ * Invoked from try_to_wake_up() to check whether the task can be woken up.
-+ *
-+ * The caller holds p::pi_lock if p != current or has preemption
-+ * disabled when p == current.
-+ *
-+ * The rules of PREEMPT_RT saved_state:
-+ *
-+ *   The related locking code always holds p::pi_lock when updating
-+ *   p::saved_state, which means the code is fully serialized in both cases.
-+ *
-+ *   The lock wait and lock wakeups happen via TASK_RTLOCK_WAIT. No other
-+ *   bits set. This allows to distinguish all wakeup scenarios.
-+ */
-+static __always_inline
-+bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
-+{
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) {
-+		WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) &&
-+			     state != TASK_RTLOCK_WAIT);
-+	}
-+
-+	if (READ_ONCE(p->__state) & state) {
-+		*success = 1;
-+		return true;
-+	}
-+
-+#ifdef CONFIG_PREEMPT_RT
-+	/*
-+	 * Saved state preserves the task state across blocking on
-+	 * an RT lock.  If the state matches, set p::saved_state to
-+	 * TASK_RUNNING, but do not wake the task because it waits
-+	 * for a lock wakeup. Also indicate success because from
-+	 * the regular waker's point of view this has succeeded.
-+	 *
-+	 * After acquiring the lock the task will restore p::__state
-+	 * from p::saved_state which ensures that the regular
-+	 * wakeup is not lost. The restore will also set
-+	 * p::saved_state to TASK_RUNNING so any further tests will
-+	 * not result in false positives vs. @success
-+	 */
-+	if (p->saved_state & state) {
-+		p->saved_state = TASK_RUNNING;
-+		*success = 1;
-+	}
-+#endif
-+	return false;
-+}
-+
-+/*
-+ * Notes on Program-Order guarantees on SMP systems.
-+ *
-+ *  MIGRATION
-+ *
-+ * The basic program-order guarantee on SMP systems is that when a task [t]
-+ * migrates, all its activity on its old CPU [c0] happens-before any subsequent
-+ * execution on its new CPU [c1].
-+ *
-+ * For migration (of runnable tasks) this is provided by the following means:
-+ *
-+ *  A) UNLOCK of the rq(c0)->lock scheduling out task t
-+ *  B) migration for t is required to synchronize *both* rq(c0)->lock and
-+ *     rq(c1)->lock (if not at the same time, then in that order).
-+ *  C) LOCK of the rq(c1)->lock scheduling in task
-+ *
-+ * Transitivity guarantees that B happens after A and C after B.
-+ * Note: we only require RCpc transitivity.
-+ * Note: the CPU doing B need not be c0 or c1
-+ *
-+ * Example:
-+ *
-+ *   CPU0            CPU1            CPU2
-+ *
-+ *   LOCK rq(0)->lock
-+ *   sched-out X
-+ *   sched-in Y
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(0)->lock // orders against CPU0
-+ *                                   dequeue X
-+ *                                   UNLOCK rq(0)->lock
-+ *
-+ *                                   LOCK rq(1)->lock
-+ *                                   enqueue X
-+ *                                   UNLOCK rq(1)->lock
-+ *
-+ *                   LOCK rq(1)->lock // orders against CPU2
-+ *                   sched-out Z
-+ *                   sched-in X
-+ *                   UNLOCK rq(1)->lock
-+ *
-+ *
-+ *  BLOCKING -- aka. SLEEP + WAKEUP
-+ *
-+ * For blocking we (obviously) need to provide the same guarantee as for
-+ * migration. However the means are completely different as there is no lock
-+ * chain to provide order. Instead we do:
-+ *
-+ *   1) smp_store_release(X->on_cpu, 0)   -- finish_task()
-+ *   2) smp_cond_load_acquire(!X->on_cpu) -- try_to_wake_up()
-+ *
-+ * Example:
-+ *
-+ *   CPU0 (schedule)  CPU1 (try_to_wake_up) CPU2 (schedule)
-+ *
-+ *   LOCK rq(0)->lock LOCK X->pi_lock
-+ *   dequeue X
-+ *   sched-out X
-+ *   smp_store_release(X->on_cpu, 0);
-+ *
-+ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
-+ *                    X->state = WAKING
-+ *                    set_task_cpu(X,2)
-+ *
-+ *                    LOCK rq(2)->lock
-+ *                    enqueue X
-+ *                    X->state = RUNNING
-+ *                    UNLOCK rq(2)->lock
-+ *
-+ *                                          LOCK rq(2)->lock // orders against CPU1
-+ *                                          sched-out Z
-+ *                                          sched-in X
-+ *                                          UNLOCK rq(2)->lock
-+ *
-+ *                    UNLOCK X->pi_lock
-+ *   UNLOCK rq(0)->lock
-+ *
-+ *
-+ * However; for wakeups there is a second guarantee we must provide, namely we
-+ * must observe the state that lead to our wakeup. That is, not only must our
-+ * task observe its own prior state, it must also observe the stores prior to
-+ * its wakeup.
-+ *
-+ * This means that any means of doing remote wakeups must order the CPU doing
-+ * the wakeup against the CPU the task is going to end up running on. This,
-+ * however, is already required for the regular Program-Order guarantee above,
-+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
-+ *
-+ */
-+
-+/**
-+ * try_to_wake_up - wake up a thread
-+ * @p: the thread to be awakened
-+ * @state: the mask of task states that can be woken
-+ * @wake_flags: wake modifier flags (WF_*)
-+ *
-+ * Conceptually does:
-+ *
-+ *   If (@state & @p->state) @p->state = TASK_RUNNING.
-+ *
-+ * If the task was not queued/runnable, also place it back on a runqueue.
-+ *
-+ * This function is atomic against schedule() which would dequeue the task.
-+ *
-+ * It issues a full memory barrier before accessing @p->state, see the comment
-+ * with set_current_state().
-+ *
-+ * Uses p->pi_lock to serialize against concurrent wake-ups.
-+ *
-+ * Relies on p->pi_lock stabilizing:
-+ *  - p->sched_class
-+ *  - p->cpus_ptr
-+ *  - p->sched_task_group
-+ * in order to do migration, see its use of select_task_rq()/set_task_cpu().
-+ *
-+ * Tries really hard to only take one task_rq(p)->lock for performance.
-+ * Takes rq->lock in:
-+ *  - ttwu_runnable()    -- old rq, unavoidable, see comment there;
-+ *  - ttwu_queue()       -- new rq, for enqueue of the task;
-+ *  - psi_ttwu_dequeue() -- much sadness :-( accounting will kill us.
-+ *
-+ * As a consequence we race really badly with just about everything. See the
-+ * many memory barriers and their comments for details.
-+ *
-+ * Return: %true if @p->state changes (an actual wakeup was done),
-+ *	   %false otherwise.
-+ */
-+static int try_to_wake_up(struct task_struct *p, unsigned int state,
-+			  int wake_flags)
-+{
-+	unsigned long flags;
-+	int cpu, success = 0;
-+
-+	preempt_disable();
-+	if (p == current) {
-+		/*
-+		 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
-+		 * == smp_processor_id()'. Together this means we can special
-+		 * case the whole 'p->on_rq && ttwu_runnable()' case below
-+		 * without taking any locks.
-+		 *
-+		 * In particular:
-+		 *  - we rely on Program-Order guarantees for all the ordering,
-+		 *  - we're serialized against set_special_state() by virtue of
-+		 *    it disabling IRQs (this allows not taking ->pi_lock).
-+		 */
-+		if (!ttwu_state_match(p, state, &success))
-+			goto out;
-+
-+		trace_sched_waking(p);
-+		WRITE_ONCE(p->__state, TASK_RUNNING);
-+		trace_sched_wakeup(p);
-+		goto out;
-+	}
-+
-+	/*
-+	 * If we are going to wake up a thread waiting for CONDITION we
-+	 * need to ensure that CONDITION=1 done by the caller can not be
-+	 * reordered with p->state check below. This pairs with smp_store_mb()
-+	 * in set_current_state() that the waiting thread does.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	smp_mb__after_spinlock();
-+	if (!ttwu_state_match(p, state, &success))
-+		goto unlock;
-+
-+	trace_sched_waking(p);
-+
-+	/*
-+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
-+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
-+	 * in smp_cond_load_acquire() below.
-+	 *
-+	 * sched_ttwu_pending()			try_to_wake_up()
-+	 *   STORE p->on_rq = 1			  LOAD p->state
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * [task p]
-+	 *   STORE p->state = UNINTERRUPTIBLE	  LOAD p->on_rq
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 *
-+	 * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
-+	 */
-+	smp_rmb();
-+	if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
-+		goto unlock;
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-+	 * possible to, falsely, observe p->on_cpu == 0.
-+	 *
-+	 * One must be running (->on_cpu == 1) in order to remove oneself
-+	 * from the runqueue.
-+	 *
-+	 * __schedule() (switch to task 'p')	try_to_wake_up()
-+	 *   STORE p->on_cpu = 1		  LOAD p->on_rq
-+	 *   UNLOCK rq->lock
-+	 *
-+	 * __schedule() (put 'p' to sleep)
-+	 *   LOCK rq->lock			  smp_rmb();
-+	 *   smp_mb__after_spinlock();
-+	 *   STORE p->on_rq = 0			  LOAD p->on_cpu
-+	 *
-+	 * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-+	 * __schedule().  See the comment for smp_mb__after_spinlock().
-+	 *
-+	 * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
-+	 * schedule()'s deactivate_task() has 'happened' and p will no longer
-+	 * care about it's own p->state. See the comment in __schedule().
-+	 */
-+	smp_acquire__after_ctrl_dep();
-+
-+	/*
-+	 * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
-+	 * == 0), which means we need to do an enqueue, change p->state to
-+	 * TASK_WAKING such that we can unlock p->pi_lock before doing the
-+	 * enqueue, such as ttwu_queue_wakelist().
-+	 */
-+	WRITE_ONCE(p->__state, TASK_WAKING);
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, considering queueing p on the remote CPUs wake_list
-+	 * which potentially sends an IPI instead of spinning on p->on_cpu to
-+	 * let the waker make forward progress. This is safe because IRQs are
-+	 * disabled and the IPI will deliver after on_cpu is cleared.
-+	 *
-+	 * Ensure we load task_cpu(p) after p->on_cpu:
-+	 *
-+	 * set_task_cpu(p, cpu);
-+	 *   STORE p->cpu = @cpu
-+	 * __schedule() (switch to task 'p')
-+	 *   LOCK rq->lock
-+	 *   smp_mb__after_spin_lock()          smp_cond_load_acquire(&p->on_cpu)
-+	 *   STORE p->on_cpu = 1                LOAD p->cpu
-+	 *
-+	 * to ensure we observe the correct CPU on which the task is currently
-+	 * scheduling.
-+	 */
-+	if (smp_load_acquire(&p->on_cpu) &&
-+	    ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
-+		goto unlock;
-+
-+	/*
-+	 * If the owning (remote) CPU is still in the middle of schedule() with
-+	 * this task as prev, wait until it's done referencing the task.
-+	 *
-+	 * Pairs with the smp_store_release() in finish_task().
-+	 *
-+	 * This ensures that tasks getting woken will be fully ordered against
-+	 * their previous state and preserve Program Order.
-+	 */
-+	smp_cond_load_acquire(&p->on_cpu, !VAL);
-+
-+	sched_task_ttwu(p);
-+
-+	cpu = select_task_rq(p);
-+
-+	if (cpu != task_cpu(p)) {
-+		if (p->in_iowait) {
-+			delayacct_blkio_end(p);
-+			atomic_dec(&task_rq(p)->nr_iowait);
-+		}
-+
-+		wake_flags |= WF_MIGRATED;
-+		psi_ttwu_dequeue(p);
-+		set_task_cpu(p, cpu);
-+	}
-+#else
-+	cpu = task_cpu(p);
-+#endif /* CONFIG_SMP */
-+
-+	ttwu_queue(p, cpu, wake_flags);
-+unlock:
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+out:
-+	if (success)
-+		ttwu_stat(p, task_cpu(p), wake_flags);
-+	preempt_enable();
-+
-+	return success;
-+}
-+
-+/**
-+ * task_call_func - Invoke a function on task in fixed state
-+ * @p: Process for which the function is to be invoked, can be @current.
-+ * @func: Function to invoke.
-+ * @arg: Argument to function.
-+ *
-+ * Fix the task in it's current state by avoiding wakeups and or rq operations
-+ * and call @func(@arg) on it.  This function can use ->on_rq and task_curr()
-+ * to work out what the state is, if required.  Given that @func can be invoked
-+ * with a runqueue lock held, it had better be quite lightweight.
-+ *
-+ * Returns:
-+ *   Whatever @func returns
-+ */
-+int task_call_func(struct task_struct *p, task_call_f func, void *arg)
-+{
-+	struct rq *rq = NULL;
-+	unsigned int state;
-+	struct rq_flags rf;
-+	int ret;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
-+
-+	state = READ_ONCE(p->__state);
-+
-+	/*
-+	 * Ensure we load p->on_rq after p->__state, otherwise it would be
-+	 * possible to, falsely, observe p->on_rq == 0.
-+	 *
-+	 * See try_to_wake_up() for a longer comment.
-+	 */
-+	smp_rmb();
-+
-+	/*
-+	 * Since pi->lock blocks try_to_wake_up(), we don't need rq->lock when
-+	 * the task is blocked. Make sure to check @state since ttwu() can drop
-+	 * locks at the end, see ttwu_queue_wakelist().
-+	 */
-+	if (state == TASK_RUNNING || state == TASK_WAKING || p->on_rq)
-+		rq = __task_rq_lock(p, &rf);
-+
-+	/*
-+	 * At this point the task is pinned; either:
-+	 *  - blocked and we're holding off wakeups      (pi->lock)
-+	 *  - woken, and we're holding off enqueue       (rq->lock)
-+	 *  - queued, and we're holding off schedule     (rq->lock)
-+	 *  - running, and we're holding off de-schedule (rq->lock)
-+	 *
-+	 * The called function (@func) can use: task_curr(), p->on_rq and
-+	 * p->__state to differentiate between these states.
-+	 */
-+	ret = func(p, arg);
-+
-+	if (rq)
-+		__task_rq_unlock(rq, &rf);
-+
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
-+	return ret;
-+}
-+
-+/**
-+ * wake_up_process - Wake up a specific process
-+ * @p: The process to be woken up.
-+ *
-+ * Attempt to wake up the nominated process and move it to the set of runnable
-+ * processes.
-+ *
-+ * Return: 1 if the process was woken up, 0 if it was already running.
-+ *
-+ * This function executes a full memory barrier before accessing the task state.
-+ */
-+int wake_up_process(struct task_struct *p)
-+{
-+	return try_to_wake_up(p, TASK_NORMAL, 0);
-+}
-+EXPORT_SYMBOL(wake_up_process);
-+
-+int wake_up_state(struct task_struct *p, unsigned int state)
-+{
-+	return try_to_wake_up(p, state, 0);
-+}
-+
-+/*
-+ * Perform scheduler related setup for a newly forked process p.
-+ * p is forked by current.
-+ *
-+ * __sched_fork() is basic setup used by init_idle() too:
-+ */
-+static inline void __sched_fork(unsigned long clone_flags, struct task_struct *p)
-+{
-+	p->on_rq			= 0;
-+	p->on_cpu			= 0;
-+	p->utime			= 0;
-+	p->stime			= 0;
-+	p->sched_time			= 0;
-+
-+#ifdef CONFIG_SCHEDSTATS
-+	/* Even if schedstat is disabled, there should not be garbage */
-+	memset(&p->stats, 0, sizeof(p->stats));
-+#endif
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+	INIT_HLIST_HEAD(&p->preempt_notifiers);
-+#endif
-+
-+#ifdef CONFIG_COMPACTION
-+	p->capture_control = NULL;
-+#endif
-+#ifdef CONFIG_SMP
-+	p->wake_entry.u_flags = CSD_TYPE_TTWU;
-+#endif
-+}
-+
-+/*
-+ * fork()/clone()-time setup:
-+ */
-+int sched_fork(unsigned long clone_flags, struct task_struct *p)
-+{
-+	__sched_fork(clone_flags, p);
-+	/*
-+	 * We mark the process as NEW here. This guarantees that
-+	 * nobody will actually run it, and a signal or other external
-+	 * event cannot wake it up and insert it on the runqueue either.
-+	 */
-+	p->__state = TASK_NEW;
-+
-+	/*
-+	 * Make sure we do not leak PI boosting priority to the child.
-+	 */
-+	p->prio = current->normal_prio;
-+
-+	/*
-+	 * Revert to default priority/policy on fork if requested.
-+	 */
-+	if (unlikely(p->sched_reset_on_fork)) {
-+		if (task_has_rt_policy(p)) {
-+			p->policy = SCHED_NORMAL;
-+			p->static_prio = NICE_TO_PRIO(0);
-+			p->rt_priority = 0;
-+		} else if (PRIO_TO_NICE(p->static_prio) < 0)
-+			p->static_prio = NICE_TO_PRIO(0);
-+
-+		p->prio = p->normal_prio = p->static_prio;
-+
-+		/*
-+		 * We don't need the reset flag anymore after the fork. It has
-+		 * fulfilled its duty:
-+		 */
-+		p->sched_reset_on_fork = 0;
-+	}
-+
-+#ifdef CONFIG_SCHED_INFO
-+	if (unlikely(sched_info_on()))
-+		memset(&p->sched_info, 0, sizeof(p->sched_info));
-+#endif
-+	init_task_preempt_count(p);
-+
-+	return 0;
-+}
-+
-+void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	/*
-+	 * Because we're not yet on the pid-hash, p->pi_lock isn't strictly
-+	 * required yet, but lockdep gets upset if rules are violated.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	/*
-+	 * Share the timeslice between parent and child, thus the
-+	 * total amount of pending timeslices in the system doesn't change,
-+	 * resulting in more scheduling fairness.
-+	 */
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+
-+	rq->curr->time_slice /= 2;
-+	p->time_slice = rq->curr->time_slice;
-+#ifdef CONFIG_SCHED_HRTICK
-+	hrtick_start(rq, rq->curr->time_slice);
-+#endif
-+
-+	if (p->time_slice < RESCHED_NS) {
-+		p->time_slice = sched_timeslice_ns;
-+		resched_curr(rq);
-+	}
-+	sched_task_fork(p, rq);
-+	raw_spin_unlock(&rq->lock);
-+
-+	rseq_migrate(p);
-+	/*
-+	 * We're setting the CPU for the first time, we don't migrate,
-+	 * so use __set_task_cpu().
-+	 */
-+	__set_task_cpu(p, smp_processor_id());
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+void sched_post_fork(struct task_struct *p)
-+{
-+}
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-+
-+static void set_schedstats(bool enabled)
-+{
-+	if (enabled)
-+		static_branch_enable(&sched_schedstats);
-+	else
-+		static_branch_disable(&sched_schedstats);
-+}
-+
-+void force_schedstat_enabled(void)
-+{
-+	if (!schedstat_enabled()) {
-+		pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
-+		static_branch_enable(&sched_schedstats);
-+	}
-+}
-+
-+static int __init setup_schedstats(char *str)
-+{
-+	int ret = 0;
-+	if (!str)
-+		goto out;
-+
-+	if (!strcmp(str, "enable")) {
-+		set_schedstats(true);
-+		ret = 1;
-+	} else if (!strcmp(str, "disable")) {
-+		set_schedstats(false);
-+		ret = 1;
-+	}
-+out:
-+	if (!ret)
-+		pr_warn("Unable to parse schedstats=\n");
-+
-+	return ret;
-+}
-+__setup("schedstats=", setup_schedstats);
-+
-+#ifdef CONFIG_PROC_SYSCTL
-+static int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
-+		size_t *lenp, loff_t *ppos)
-+{
-+	struct ctl_table t;
-+	int err;
-+	int state = static_branch_likely(&sched_schedstats);
-+
-+	if (write && !capable(CAP_SYS_ADMIN))
-+		return -EPERM;
-+
-+	t = *table;
-+	t.data = &state;
-+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
-+	if (err < 0)
-+		return err;
-+	if (write)
-+		set_schedstats(state);
-+	return err;
-+}
-+
-+static struct ctl_table sched_core_sysctls[] = {
-+	{
-+		.procname       = "sched_schedstats",
-+		.data           = NULL,
-+		.maxlen         = sizeof(unsigned int),
-+		.mode           = 0644,
-+		.proc_handler   = sysctl_schedstats,
-+		.extra1         = SYSCTL_ZERO,
-+		.extra2         = SYSCTL_ONE,
-+	},
-+	{}
-+};
-+static int __init sched_core_sysctl_init(void)
-+{
-+	register_sysctl_init("kernel", sched_core_sysctls);
-+	return 0;
-+}
-+late_initcall(sched_core_sysctl_init);
-+#endif /* CONFIG_PROC_SYSCTL */
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+/*
-+ * wake_up_new_task - wake up a newly created task for the first time.
-+ *
-+ * This function will do some initial scheduler statistics housekeeping
-+ * that must be done for every newly created context, then puts the task
-+ * on the runqueue and wakes it.
-+ */
-+void wake_up_new_task(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	WRITE_ONCE(p->__state, TASK_RUNNING);
-+	rq = cpu_rq(select_task_rq(p));
-+#ifdef CONFIG_SMP
-+	rseq_migrate(p);
-+	/*
-+	 * Fork balancing, do it here and not earlier because:
-+	 * - cpus_ptr can change in the fork path
-+	 * - any previously selected CPU might disappear through hotplug
-+	 *
-+	 * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq,
-+	 * as we're not fully set-up yet.
-+	 */
-+	__set_task_cpu(p, cpu_of(rq));
-+#endif
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	activate_task(p, rq);
-+	trace_sched_wakeup_new(p);
-+	check_preempt_curr(rq);
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+
-+#ifdef CONFIG_PREEMPT_NOTIFIERS
-+
-+static DEFINE_STATIC_KEY_FALSE(preempt_notifier_key);
-+
-+void preempt_notifier_inc(void)
-+{
-+	static_branch_inc(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_inc);
-+
-+void preempt_notifier_dec(void)
-+{
-+	static_branch_dec(&preempt_notifier_key);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_dec);
-+
-+/**
-+ * preempt_notifier_register - tell me when current is being preempted & rescheduled
-+ * @notifier: notifier struct to register
-+ */
-+void preempt_notifier_register(struct preempt_notifier *notifier)
-+{
-+	if (!static_branch_unlikely(&preempt_notifier_key))
-+		WARN(1, "registering preempt_notifier while notifiers disabled\n");
-+
-+	hlist_add_head(&notifier->link, &current->preempt_notifiers);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_register);
-+
-+/**
-+ * preempt_notifier_unregister - no longer interested in preemption notifications
-+ * @notifier: notifier struct to unregister
-+ *
-+ * This is *not* safe to call from within a preemption notifier.
-+ */
-+void preempt_notifier_unregister(struct preempt_notifier *notifier)
-+{
-+	hlist_del(&notifier->link);
-+}
-+EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
-+
-+static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_in(notifier, raw_smp_processor_id());
-+}
-+
-+static __always_inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_in_preempt_notifiers(curr);
-+}
-+
-+static void
-+__fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				   struct task_struct *next)
-+{
-+	struct preempt_notifier *notifier;
-+
-+	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
-+		notifier->ops->sched_out(notifier, next);
-+}
-+
-+static __always_inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+	if (static_branch_unlikely(&preempt_notifier_key))
-+		__fire_sched_out_preempt_notifiers(curr, next);
-+}
-+
-+#else /* !CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void fire_sched_in_preempt_notifiers(struct task_struct *curr)
-+{
-+}
-+
-+static inline void
-+fire_sched_out_preempt_notifiers(struct task_struct *curr,
-+				 struct task_struct *next)
-+{
-+}
-+
-+#endif /* CONFIG_PREEMPT_NOTIFIERS */
-+
-+static inline void prepare_task(struct task_struct *next)
-+{
-+	/*
-+	 * Claim the task as running, we do this before switching to it
-+	 * such that any running task will have this set.
-+	 *
-+	 * See the ttwu() WF_ON_CPU case and its ordering comment.
-+	 */
-+	WRITE_ONCE(next->on_cpu, 1);
-+}
-+
-+static inline void finish_task(struct task_struct *prev)
-+{
-+#ifdef CONFIG_SMP
-+	/*
-+	 * This must be the very last reference to @prev from this CPU. After
-+	 * p->on_cpu is cleared, the task can be moved to a different CPU. We
-+	 * must ensure this doesn't happen until the switch is completely
-+	 * finished.
-+	 *
-+	 * In particular, the load of prev->state in finish_task_switch() must
-+	 * happen before this.
-+	 *
-+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
-+	 */
-+	smp_store_release(&prev->on_cpu, 0);
-+#else
-+	prev->on_cpu = 0;
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
-+{
-+	void (*func)(struct rq *rq);
-+	struct callback_head *next;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	while (head) {
-+		func = (void (*)(struct rq *))head->func;
-+		next = head->next;
-+		head->next = NULL;
-+		head = next;
-+
-+		func(rq);
-+	}
-+}
-+
-+static void balance_push(struct rq *rq);
-+
-+/*
-+ * balance_push_callback is a right abuse of the callback interface and plays
-+ * by significantly different rules.
-+ *
-+ * Where the normal balance_callback's purpose is to be ran in the same context
-+ * that queued it (only later, when it's safe to drop rq->lock again),
-+ * balance_push_callback is specifically targeted at __schedule().
-+ *
-+ * This abuse is tolerated because it places all the unlikely/odd cases behind
-+ * a single test, namely: rq->balance_callback == NULL.
-+ */
-+struct callback_head balance_push_callback = {
-+	.next = NULL,
-+	.func = (void (*)(struct callback_head *))balance_push,
-+};
-+
-+static inline struct callback_head *
-+__splice_balance_callbacks(struct rq *rq, bool split)
-+{
-+	struct callback_head *head = rq->balance_callback;
-+
-+	if (likely(!head))
-+		return NULL;
-+
-+	lockdep_assert_rq_held(rq);
-+	/*
-+	 * Must not take balance_push_callback off the list when
-+	 * splice_balance_callbacks() and balance_callbacks() are not
-+	 * in the same rq->lock section.
-+	 *
-+	 * In that case it would be possible for __schedule() to interleave
-+	 * and observe the list empty.
-+	 */
-+	if (split && head == &balance_push_callback)
-+		head = NULL;
-+	else
-+		rq->balance_callback = NULL;
-+
-+	return head;
-+}
-+
-+static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
-+{
-+	return __splice_balance_callbacks(rq, true);
-+}
-+
-+static void __balance_callbacks(struct rq *rq)
-+{
-+	do_balance_callbacks(rq, __splice_balance_callbacks(rq, false));
-+}
-+
-+static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
-+{
-+	unsigned long flags;
-+
-+	if (unlikely(head)) {
-+		raw_spin_lock_irqsave(&rq->lock, flags);
-+		do_balance_callbacks(rq, head);
-+		raw_spin_unlock_irqrestore(&rq->lock, flags);
-+	}
-+}
-+
-+#else
-+
-+static inline void __balance_callbacks(struct rq *rq)
-+{
-+}
-+
-+static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
-+{
-+	return NULL;
-+}
-+
-+static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
-+{
-+}
-+
-+#endif
-+
-+static inline void
-+prepare_lock_switch(struct rq *rq, struct task_struct *next)
-+{
-+	/*
-+	 * Since the runqueue lock will be released by the next
-+	 * task (which is an invalid locking op but in the case
-+	 * of the scheduler it's an obvious special-case), so we
-+	 * do an early lockdep release here:
-+	 */
-+	spin_release(&rq->lock.dep_map, _THIS_IP_);
-+#ifdef CONFIG_DEBUG_SPINLOCK
-+	/* this is a valid case when another task releases the spinlock */
-+	rq->lock.owner = next;
-+#endif
-+}
-+
-+static inline void finish_lock_switch(struct rq *rq)
-+{
-+	/*
-+	 * If we are tracking spinlock dependencies then we have to
-+	 * fix up the runqueue lock - which gets 'carried over' from
-+	 * prev into current:
-+	 */
-+	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
-+	__balance_callbacks(rq);
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+/*
-+ * NOP if the arch has not defined these:
-+ */
-+
-+#ifndef prepare_arch_switch
-+# define prepare_arch_switch(next)	do { } while (0)
-+#endif
-+
-+#ifndef finish_arch_post_lock_switch
-+# define finish_arch_post_lock_switch()	do { } while (0)
-+#endif
-+
-+static inline void kmap_local_sched_out(void)
-+{
-+#ifdef CONFIG_KMAP_LOCAL
-+	if (unlikely(current->kmap_ctrl.idx))
-+		__kmap_local_sched_out();
-+#endif
-+}
-+
-+static inline void kmap_local_sched_in(void)
-+{
-+#ifdef CONFIG_KMAP_LOCAL
-+	if (unlikely(current->kmap_ctrl.idx))
-+		__kmap_local_sched_in();
-+#endif
-+}
-+
-+/**
-+ * prepare_task_switch - prepare to switch tasks
-+ * @rq: the runqueue preparing to switch
-+ * @next: the task we are going to switch to.
-+ *
-+ * This is called with the rq lock held and interrupts off. It must
-+ * be paired with a subsequent finish_task_switch after the context
-+ * switch.
-+ *
-+ * prepare_task_switch sets up locking and calls architecture specific
-+ * hooks.
-+ */
-+static inline void
-+prepare_task_switch(struct rq *rq, struct task_struct *prev,
-+		    struct task_struct *next)
-+{
-+	kcov_prepare_switch(prev);
-+	sched_info_switch(rq, prev, next);
-+	perf_event_task_sched_out(prev, next);
-+	rseq_preempt(prev);
-+	fire_sched_out_preempt_notifiers(prev, next);
-+	kmap_local_sched_out();
-+	prepare_task(next);
-+	prepare_arch_switch(next);
-+}
-+
-+/**
-+ * finish_task_switch - clean up after a task-switch
-+ * @rq: runqueue associated with task-switch
-+ * @prev: the thread we just switched away from.
-+ *
-+ * finish_task_switch must be called after the context switch, paired
-+ * with a prepare_task_switch call before the context switch.
-+ * finish_task_switch will reconcile locking set up by prepare_task_switch,
-+ * and do any other architecture-specific cleanup actions.
-+ *
-+ * Note that we may have delayed dropping an mm in context_switch(). If
-+ * so, we finish that here outside of the runqueue lock.  (Doing it
-+ * with the lock held can cause deadlocks; see schedule() for
-+ * details.)
-+ *
-+ * The context switch have flipped the stack from under us and restored the
-+ * local variables which were saved when this task called schedule() in the
-+ * past. prev == current is still correct but we need to recalculate this_rq
-+ * because prev may have moved to another CPU.
-+ */
-+static struct rq *finish_task_switch(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	struct rq *rq = this_rq();
-+	struct mm_struct *mm = rq->prev_mm;
-+	unsigned int prev_state;
-+
-+	/*
-+	 * The previous task will have left us with a preempt_count of 2
-+	 * because it left us after:
-+	 *
-+	 *	schedule()
-+	 *	  preempt_disable();			// 1
-+	 *	  __schedule()
-+	 *	    raw_spin_lock_irq(&rq->lock)	// 2
-+	 *
-+	 * Also, see FORK_PREEMPT_COUNT.
-+	 */
-+	if (WARN_ONCE(preempt_count() != 2*PREEMPT_DISABLE_OFFSET,
-+		      "corrupted preempt_count: %s/%d/0x%x\n",
-+		      current->comm, current->pid, preempt_count()))
-+		preempt_count_set(FORK_PREEMPT_COUNT);
-+
-+	rq->prev_mm = NULL;
-+
-+	/*
-+	 * A task struct has one reference for the use as "current".
-+	 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
-+	 * schedule one last time. The schedule call will never return, and
-+	 * the scheduled task must drop that reference.
-+	 *
-+	 * We must observe prev->state before clearing prev->on_cpu (in
-+	 * finish_task), otherwise a concurrent wakeup can get prev
-+	 * running on another CPU and we could rave with its RUNNING -> DEAD
-+	 * transition, resulting in a double drop.
-+	 */
-+	prev_state = READ_ONCE(prev->__state);
-+	vtime_task_switch(prev);
-+	perf_event_task_sched_in(prev, current);
-+	finish_task(prev);
-+	tick_nohz_task_switch();
-+	finish_lock_switch(rq);
-+	finish_arch_post_lock_switch();
-+	kcov_finish_switch(current);
-+	/*
-+	 * kmap_local_sched_out() is invoked with rq::lock held and
-+	 * interrupts disabled. There is no requirement for that, but the
-+	 * sched out code does not have an interrupt enabled section.
-+	 * Restoring the maps on sched in does not require interrupts being
-+	 * disabled either.
-+	 */
-+	kmap_local_sched_in();
-+
-+	fire_sched_in_preempt_notifiers(current);
-+	/*
-+	 * When switching through a kernel thread, the loop in
-+	 * membarrier_{private,global}_expedited() may have observed that
-+	 * kernel thread and not issued an IPI. It is therefore possible to
-+	 * schedule between user->kernel->user threads without passing though
-+	 * switch_mm(). Membarrier requires a barrier after storing to
-+	 * rq->curr, before returning to userspace, so provide them here:
-+	 *
-+	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
-+	 *   provided by mmdrop(),
-+	 * - a sync_core for SYNC_CORE.
-+	 */
-+	if (mm) {
-+		membarrier_mm_sync_core_before_usermode(mm);
-+		mmdrop_sched(mm);
-+	}
-+	if (unlikely(prev_state == TASK_DEAD)) {
-+		/* Task is done with its stack. */
-+		put_task_stack(prev);
-+
-+		put_task_struct_rcu_user(prev);
-+	}
-+
-+	return rq;
-+}
-+
-+/**
-+ * schedule_tail - first thing a freshly forked thread must call.
-+ * @prev: the thread we just switched away from.
-+ */
-+asmlinkage __visible void schedule_tail(struct task_struct *prev)
-+	__releases(rq->lock)
-+{
-+	/*
-+	 * New tasks start with FORK_PREEMPT_COUNT, see there and
-+	 * finish_task_switch() for details.
-+	 *
-+	 * finish_task_switch() will drop rq->lock() and lower preempt_count
-+	 * and the preempt_enable() will end up enabling preemption (on
-+	 * PREEMPT_COUNT kernels).
-+	 */
-+
-+	finish_task_switch(prev);
-+	preempt_enable();
-+
-+	if (current->set_child_tid)
-+		put_user(task_pid_vnr(current), current->set_child_tid);
-+
-+	calculate_sigpending();
-+}
-+
-+/*
-+ * context_switch - switch to the new MM and the new thread's register state.
-+ */
-+static __always_inline struct rq *
-+context_switch(struct rq *rq, struct task_struct *prev,
-+	       struct task_struct *next)
-+{
-+	prepare_task_switch(rq, prev, next);
-+
-+	/*
-+	 * For paravirt, this is coupled with an exit in switch_to to
-+	 * combine the page table reload and the switch backend into
-+	 * one hypercall.
-+	 */
-+	arch_start_context_switch(prev);
-+
-+	/*
-+	 * kernel -> kernel   lazy + transfer active
-+	 *   user -> kernel   lazy + mmgrab() active
-+	 *
-+	 * kernel ->   user   switch + mmdrop() active
-+	 *   user ->   user   switch
-+	 */
-+	if (!next->mm) {                                // to kernel
-+		enter_lazy_tlb(prev->active_mm, next);
-+
-+		next->active_mm = prev->active_mm;
-+		if (prev->mm)                           // from user
-+			mmgrab(prev->active_mm);
-+		else
-+			prev->active_mm = NULL;
-+	} else {                                        // to user
-+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
-+		/*
-+		 * sys_membarrier() requires an smp_mb() between setting
-+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
-+		 *
-+		 * The below provides this either through switch_mm(), or in
-+		 * case 'prev->active_mm == next->mm' through
-+		 * finish_task_switch()'s mmdrop().
-+		 */
-+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+
-+		if (!prev->mm) {                        // from kernel
-+			/* will mmdrop() in finish_task_switch(). */
-+			rq->prev_mm = prev->active_mm;
-+			prev->active_mm = NULL;
-+		}
-+	}
-+
-+	prepare_lock_switch(rq, next);
-+
-+	/* Here we just switch the register state and the stack. */
-+	switch_to(prev, next, prev);
-+	barrier();
-+
-+	return finish_task_switch(prev);
-+}
-+
-+/*
-+ * nr_running, nr_uninterruptible and nr_context_switches:
-+ *
-+ * externally visible scheduler statistics: current number of runnable
-+ * threads, total number of context switches performed since bootup.
-+ */
-+unsigned int nr_running(void)
-+{
-+	unsigned int i, sum = 0;
-+
-+	for_each_online_cpu(i)
-+		sum += cpu_rq(i)->nr_running;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Check if only the current task is running on the CPU.
-+ *
-+ * Caution: this function does not check that the caller has disabled
-+ * preemption, thus the result might have a time-of-check-to-time-of-use
-+ * race.  The caller is responsible to use it correctly, for example:
-+ *
-+ * - from a non-preemptible section (of course)
-+ *
-+ * - from a thread that is bound to a single CPU
-+ *
-+ * - in a loop with very short iterations (e.g. a polling loop)
-+ */
-+bool single_task_running(void)
-+{
-+	return raw_rq()->nr_running == 1;
-+}
-+EXPORT_SYMBOL(single_task_running);
-+
-+unsigned long long nr_context_switches(void)
-+{
-+	int i;
-+	unsigned long long sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += cpu_rq(i)->nr_switches;
-+
-+	return sum;
-+}
-+
-+/*
-+ * Consumers of these two interfaces, like for example the cpuidle menu
-+ * governor, are using nonsensical data. Preferring shallow idle state selection
-+ * for a CPU that has IO-wait which might not even end up running the task when
-+ * it does become runnable.
-+ */
-+
-+unsigned int nr_iowait_cpu(int cpu)
-+{
-+	return atomic_read(&cpu_rq(cpu)->nr_iowait);
-+}
-+
-+/*
-+ * IO-wait accounting, and how it's mostly bollocks (on SMP).
-+ *
-+ * The idea behind IO-wait account is to account the idle time that we could
-+ * have spend running if it were not for IO. That is, if we were to improve the
-+ * storage performance, we'd have a proportional reduction in IO-wait time.
-+ *
-+ * This all works nicely on UP, where, when a task blocks on IO, we account
-+ * idle time as IO-wait, because if the storage were faster, it could've been
-+ * running and we'd not be idle.
-+ *
-+ * This has been extended to SMP, by doing the same for each CPU. This however
-+ * is broken.
-+ *
-+ * Imagine for instance the case where two tasks block on one CPU, only the one
-+ * CPU will have IO-wait accounted, while the other has regular idle. Even
-+ * though, if the storage were faster, both could've ran at the same time,
-+ * utilising both CPUs.
-+ *
-+ * This means, that when looking globally, the current IO-wait accounting on
-+ * SMP is a lower bound, by reason of under accounting.
-+ *
-+ * Worse, since the numbers are provided per CPU, they are sometimes
-+ * interpreted per CPU, and that is nonsensical. A blocked task isn't strictly
-+ * associated with any one particular CPU, it can wake to another CPU than it
-+ * blocked on. This means the per CPU IO-wait number is meaningless.
-+ *
-+ * Task CPU affinities can make all that even more 'interesting'.
-+ */
-+
-+unsigned int nr_iowait(void)
-+{
-+	unsigned int i, sum = 0;
-+
-+	for_each_possible_cpu(i)
-+		sum += nr_iowait_cpu(i);
-+
-+	return sum;
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+/*
-+ * sched_exec - execve() is a valuable balancing opportunity, because at
-+ * this point the task has the smallest effective memory and cache
-+ * footprint.
-+ */
-+void sched_exec(void)
-+{
-+}
-+
-+#endif
-+
-+DEFINE_PER_CPU(struct kernel_stat, kstat);
-+DEFINE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
-+
-+EXPORT_PER_CPU_SYMBOL(kstat);
-+EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
-+
-+static inline void update_curr(struct rq *rq, struct task_struct *p)
-+{
-+	s64 ns = rq->clock_task - p->last_ran;
-+
-+	p->sched_time += ns;
-+	cgroup_account_cputime(p, ns);
-+	account_group_exec_runtime(p, ns);
-+
-+	p->time_slice -= ns;
-+	p->last_ran = rq->clock_task;
-+}
-+
-+/*
-+ * Return accounted runtime for the task.
-+ * Return separately the current's pending runtime that have not been
-+ * accounted yet.
-+ */
-+unsigned long long task_sched_runtime(struct task_struct *p)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+	u64 ns;
-+
-+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
-+	/*
-+	 * 64-bit doesn't need locks to atomically read a 64-bit value.
-+	 * So we have a optimization chance when the task's delta_exec is 0.
-+	 * Reading ->on_cpu is racy, but this is ok.
-+	 *
-+	 * If we race with it leaving CPU, we'll take a lock. So we're correct.
-+	 * If we race with it entering CPU, unaccounted time is 0. This is
-+	 * indistinguishable from the read occurring a few cycles earlier.
-+	 * If we see ->on_cpu without ->on_rq, the task is leaving, and has
-+	 * been accounted, so we're correct here as well.
-+	 */
-+	if (!p->on_cpu || !task_on_rq_queued(p))
-+		return tsk_seruntime(p);
-+#endif
-+
-+	rq = task_access_lock_irqsave(p, &lock, &flags);
-+	/*
-+	 * Must be ->curr _and_ ->on_rq.  If dequeued, we would
-+	 * project cycles that may never be accounted to this
-+	 * thread, breaking clock_gettime().
-+	 */
-+	if (p == rq->curr && task_on_rq_queued(p)) {
-+		update_rq_clock(rq);
-+		update_curr(rq, p);
-+	}
-+	ns = tsk_seruntime(p);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+	return ns;
-+}
-+
-+/* This manages tasks that have run out of timeslice during a scheduler_tick */
-+static inline void scheduler_task_tick(struct rq *rq)
-+{
-+	struct task_struct *p = rq->curr;
-+
-+	if (is_idle_task(p))
-+		return;
-+
-+	update_curr(rq, p);
-+	cpufreq_update_util(rq, 0);
-+
-+	/*
-+	 * Tasks have less than RESCHED_NS of time slice left they will be
-+	 * rescheduled.
-+	 */
-+	if (p->time_slice >= RESCHED_NS)
-+		return;
-+	set_tsk_need_resched(p);
-+	set_preempt_need_resched();
-+}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+static u64 cpu_resched_latency(struct rq *rq)
-+{
-+	int latency_warn_ms = READ_ONCE(sysctl_resched_latency_warn_ms);
-+	u64 resched_latency, now = rq_clock(rq);
-+	static bool warned_once;
-+
-+	if (sysctl_resched_latency_warn_once && warned_once)
-+		return 0;
-+
-+	if (!need_resched() || !latency_warn_ms)
-+		return 0;
-+
-+	if (system_state == SYSTEM_BOOTING)
-+		return 0;
-+
-+	if (!rq->last_seen_need_resched_ns) {
-+		rq->last_seen_need_resched_ns = now;
-+		rq->ticks_without_resched = 0;
-+		return 0;
-+	}
-+
-+	rq->ticks_without_resched++;
-+	resched_latency = now - rq->last_seen_need_resched_ns;
-+	if (resched_latency <= latency_warn_ms * NSEC_PER_MSEC)
-+		return 0;
-+
-+	warned_once = true;
-+
-+	return resched_latency;
-+}
-+
-+static int __init setup_resched_latency_warn_ms(char *str)
-+{
-+	long val;
-+
-+	if ((kstrtol(str, 0, &val))) {
-+		pr_warn("Unable to set resched_latency_warn_ms\n");
-+		return 1;
-+	}
-+
-+	sysctl_resched_latency_warn_ms = val;
-+	return 1;
-+}
-+__setup("resched_latency_warn_ms=", setup_resched_latency_warn_ms);
-+#else
-+static inline u64 cpu_resched_latency(struct rq *rq) { return 0; }
-+#endif /* CONFIG_SCHED_DEBUG */
-+
-+/*
-+ * This function gets called by the timer code, with HZ frequency.
-+ * We call it with interrupts disabled.
-+ */
-+void scheduler_tick(void)
-+{
-+	int cpu __maybe_unused = smp_processor_id();
-+	struct rq *rq = cpu_rq(cpu);
-+	u64 resched_latency;
-+
-+	arch_scale_freq_tick();
-+	sched_clock_tick();
-+
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	scheduler_task_tick(rq);
-+	if (sched_feat(LATENCY_WARN))
-+		resched_latency = cpu_resched_latency(rq);
-+	calc_global_load_tick(rq);
-+
-+	rq->last_tick = rq->clock;
-+	raw_spin_unlock(&rq->lock);
-+
-+	if (sched_feat(LATENCY_WARN) && resched_latency)
-+		resched_latency_warn(cpu, resched_latency);
-+
-+	perf_event_task_tick();
-+}
-+
-+#ifdef CONFIG_SCHED_SMT
-+static inline int sg_balance_cpu_stop(void *data)
-+{
-+	struct rq *rq = this_rq();
-+	struct task_struct *p = data;
-+	cpumask_t tmp;
-+	unsigned long flags;
-+
-+	local_irq_save(flags);
-+
-+	raw_spin_lock(&p->pi_lock);
-+	raw_spin_lock(&rq->lock);
-+
-+	rq->active_balance = 0;
-+	/* _something_ may have changed the task, double check again */
-+	if (task_on_rq_queued(p) && task_rq(p) == rq &&
-+	    cpumask_and(&tmp, p->cpus_ptr, &sched_sg_idle_mask) &&
-+	    !is_migration_disabled(p)) {
-+		int cpu = cpu_of(rq);
-+		int dcpu = __best_mask_cpu(&tmp, per_cpu(sched_cpu_llc_mask, cpu));
-+		rq = move_queued_task(rq, p, dcpu);
-+	}
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock(&p->pi_lock);
-+
-+	local_irq_restore(flags);
-+
-+	return 0;
-+}
-+
-+/* sg_balance_trigger - trigger slibing group balance for @cpu */
-+static inline int sg_balance_trigger(const int cpu)
-+{
-+	struct rq *rq= cpu_rq(cpu);
-+	unsigned long flags;
-+	struct task_struct *curr;
-+	int res;
-+
-+	if (!raw_spin_trylock_irqsave(&rq->lock, flags))
-+		return 0;
-+	curr = rq->curr;
-+	res = (!is_idle_task(curr)) && (1 == rq->nr_running) &&\
-+	      cpumask_intersects(curr->cpus_ptr, &sched_sg_idle_mask) &&\
-+	      !is_migration_disabled(curr) && (!rq->active_balance);
-+
-+	if (res)
-+		rq->active_balance = 1;
-+
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	if (res)
-+		stop_one_cpu_nowait(cpu, sg_balance_cpu_stop, curr,
-+				    &rq->active_balance_work);
-+	return res;
-+}
-+
-+/*
-+ * sg_balance - slibing group balance check for run queue @rq
-+ */
-+static inline void sg_balance(struct rq *rq)
-+{
-+	cpumask_t chk;
-+	int cpu = cpu_of(rq);
-+
-+	/* exit when cpu is offline */
-+	if (unlikely(!rq->online))
-+		return;
-+
-+	/*
-+	 * Only cpu in slibing idle group will do the checking and then
-+	 * find potential cpus which can migrate the current running task
-+	 */
-+	if (cpumask_test_cpu(cpu, &sched_sg_idle_mask) &&
-+	    cpumask_andnot(&chk, cpu_online_mask, sched_rq_watermark) &&
-+	    cpumask_andnot(&chk, &chk, &sched_rq_pending_mask)) {
-+		int i;
-+
-+		for_each_cpu_wrap(i, &chk, cpu) {
-+			if (cpumask_subset(cpu_smt_mask(i), &chk) &&
-+			    sg_balance_trigger(i))
-+				return;
-+		}
-+	}
-+}
-+#endif /* CONFIG_SCHED_SMT */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+
-+struct tick_work {
-+	int			cpu;
-+	atomic_t		state;
-+	struct delayed_work	work;
-+};
-+/* Values for ->state, see diagram below. */
-+#define TICK_SCHED_REMOTE_OFFLINE	0
-+#define TICK_SCHED_REMOTE_OFFLINING	1
-+#define TICK_SCHED_REMOTE_RUNNING	2
-+
-+/*
-+ * State diagram for ->state:
-+ *
-+ *
-+ *          TICK_SCHED_REMOTE_OFFLINE
-+ *                    |   ^
-+ *                    |   |
-+ *                    |   | sched_tick_remote()
-+ *                    |   |
-+ *                    |   |
-+ *                    +--TICK_SCHED_REMOTE_OFFLINING
-+ *                    |   ^
-+ *                    |   |
-+ * sched_tick_start() |   | sched_tick_stop()
-+ *                    |   |
-+ *                    V   |
-+ *          TICK_SCHED_REMOTE_RUNNING
-+ *
-+ *
-+ * Other transitions get WARN_ON_ONCE(), except that sched_tick_remote()
-+ * and sched_tick_start() are happy to leave the state in RUNNING.
-+ */
-+
-+static struct tick_work __percpu *tick_work_cpu;
-+
-+static void sched_tick_remote(struct work_struct *work)
-+{
-+	struct delayed_work *dwork = to_delayed_work(work);
-+	struct tick_work *twork = container_of(dwork, struct tick_work, work);
-+	int cpu = twork->cpu;
-+	struct rq *rq = cpu_rq(cpu);
-+	struct task_struct *curr;
-+	unsigned long flags;
-+	u64 delta;
-+	int os;
-+
-+	/*
-+	 * Handle the tick only if it appears the remote CPU is running in full
-+	 * dynticks mode. The check is racy by nature, but missing a tick or
-+	 * having one too much is no big deal because the scheduler tick updates
-+	 * statistics and checks timeslices in a time-independent way, regardless
-+	 * of when exactly it is running.
-+	 */
-+	if (!tick_nohz_tick_stopped_cpu(cpu))
-+		goto out_requeue;
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	curr = rq->curr;
-+	if (cpu_is_offline(cpu))
-+		goto out_unlock;
-+
-+	update_rq_clock(rq);
-+	if (!is_idle_task(curr)) {
-+		/*
-+		 * Make sure the next tick runs within a reasonable
-+		 * amount of time.
-+		 */
-+		delta = rq_clock_task(rq) - curr->last_ran;
-+		WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
-+	}
-+	scheduler_task_tick(rq);
-+
-+	calc_load_nohz_remote(rq);
-+out_unlock:
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+out_requeue:
-+	/*
-+	 * Run the remote tick once per second (1Hz). This arbitrary
-+	 * frequency is large enough to avoid overload but short enough
-+	 * to keep scheduler internal stats reasonably up to date.  But
-+	 * first update state to reflect hotplug activity if required.
-+	 */
-+	os = atomic_fetch_add_unless(&twork->state, -1, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_OFFLINE);
-+	if (os == TICK_SCHED_REMOTE_RUNNING)
-+		queue_delayed_work(system_unbound_wq, dwork, HZ);
-+}
-+
-+static void sched_tick_start(int cpu)
-+{
-+	int os;
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_TYPE_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	os = atomic_xchg(&twork->state, TICK_SCHED_REMOTE_RUNNING);
-+	WARN_ON_ONCE(os == TICK_SCHED_REMOTE_RUNNING);
-+	if (os == TICK_SCHED_REMOTE_OFFLINE) {
-+		twork->cpu = cpu;
-+		INIT_DELAYED_WORK(&twork->work, sched_tick_remote);
-+		queue_delayed_work(system_unbound_wq, &twork->work, HZ);
-+	}
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+static void sched_tick_stop(int cpu)
-+{
-+	struct tick_work *twork;
-+
-+	if (housekeeping_cpu(cpu, HK_TYPE_TICK))
-+		return;
-+
-+	WARN_ON_ONCE(!tick_work_cpu);
-+
-+	twork = per_cpu_ptr(tick_work_cpu, cpu);
-+	cancel_delayed_work_sync(&twork->work);
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+int __init sched_tick_offload_init(void)
-+{
-+	tick_work_cpu = alloc_percpu(struct tick_work);
-+	BUG_ON(!tick_work_cpu);
-+	return 0;
-+}
-+
-+#else /* !CONFIG_NO_HZ_FULL */
-+static inline void sched_tick_start(int cpu) { }
-+static inline void sched_tick_stop(int cpu) { }
-+#endif
-+
-+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
-+				defined(CONFIG_PREEMPT_TRACER))
-+/*
-+ * If the value passed in is equal to the current preempt count
-+ * then we just disabled preemption. Start timing the latency.
-+ */
-+static inline void preempt_latency_start(int val)
-+{
-+	if (preempt_count() == val) {
-+		unsigned long ip = get_lock_parent_ip();
-+#ifdef CONFIG_DEBUG_PREEMPT
-+		current->preempt_disable_ip = ip;
-+#endif
-+		trace_preempt_off(CALLER_ADDR0, ip);
-+	}
-+}
-+
-+void preempt_count_add(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
-+		return;
-+#endif
-+	__preempt_count_add(val);
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Spinlock count overflowing soon?
-+	 */
-+	DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >=
-+				PREEMPT_MASK - 10);
-+#endif
-+	preempt_latency_start(val);
-+}
-+EXPORT_SYMBOL(preempt_count_add);
-+NOKPROBE_SYMBOL(preempt_count_add);
-+
-+/*
-+ * If the value passed in equals to the current preempt count
-+ * then we just enabled preemption. Stop timing the latency.
-+ */
-+static inline void preempt_latency_stop(int val)
-+{
-+	if (preempt_count() == val)
-+		trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
-+}
-+
-+void preempt_count_sub(int val)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	/*
-+	 * Underflow?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
-+		return;
-+	/*
-+	 * Is the spinlock portion underflowing?
-+	 */
-+	if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
-+			!(preempt_count() & PREEMPT_MASK)))
-+		return;
-+#endif
-+
-+	preempt_latency_stop(val);
-+	__preempt_count_sub(val);
-+}
-+EXPORT_SYMBOL(preempt_count_sub);
-+NOKPROBE_SYMBOL(preempt_count_sub);
-+
-+#else
-+static inline void preempt_latency_start(int val) { }
-+static inline void preempt_latency_stop(int val) { }
-+#endif
-+
-+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
-+{
-+#ifdef CONFIG_DEBUG_PREEMPT
-+	return p->preempt_disable_ip;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+/*
-+ * Print scheduling while atomic bug:
-+ */
-+static noinline void __schedule_bug(struct task_struct *prev)
-+{
-+	/* Save this before calling printk(), since that will clobber it */
-+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	if (oops_in_progress)
-+		return;
-+
-+	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
-+		prev->comm, prev->pid, preempt_count());
-+
-+	debug_show_held_locks(prev);
-+	print_modules();
-+	if (irqs_disabled())
-+		print_irqtrace_events(prev);
-+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
-+	    && in_atomic_preempt_off()) {
-+		pr_err("Preemption disabled at:");
-+		print_ip_sym(KERN_ERR, preempt_disable_ip);
-+	}
-+	if (panic_on_warn)
-+		panic("scheduling while atomic\n");
-+
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+
-+/*
-+ * Various schedule()-time debugging checks and statistics:
-+ */
-+static inline void schedule_debug(struct task_struct *prev, bool preempt)
-+{
-+#ifdef CONFIG_SCHED_STACK_END_CHECK
-+	if (task_stack_end_corrupted(prev))
-+		panic("corrupted stack end detected inside scheduler\n");
-+
-+	if (task_scs_end_corrupted(prev))
-+		panic("corrupted shadow stack detected inside scheduler\n");
-+#endif
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+	if (!preempt && READ_ONCE(prev->__state) && prev->non_block_count) {
-+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
-+			prev->comm, prev->pid, prev->non_block_count);
-+		dump_stack();
-+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+	}
-+#endif
-+
-+	if (unlikely(in_atomic_preempt_off())) {
-+		__schedule_bug(prev);
-+		preempt_count_set(PREEMPT_DISABLED);
-+	}
-+	rcu_sleep_check();
-+	SCHED_WARN_ON(ct_state() == CONTEXT_USER);
-+
-+	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
-+
-+	schedstat_inc(this_rq()->sched_count);
-+}
-+
-+/*
-+ * Compile time debug macro
-+ * #define ALT_SCHED_DEBUG
-+ */
-+
-+#ifdef ALT_SCHED_DEBUG
-+void alt_sched_debug(void)
-+{
-+	printk(KERN_INFO "sched: pending: 0x%04lx, idle: 0x%04lx, sg_idle: 0x%04lx\n",
-+	       sched_rq_pending_mask.bits[0],
-+	       sched_rq_watermark[0].bits[0],
-+	       sched_sg_idle_mask.bits[0]);
-+}
-+#else
-+inline void alt_sched_debug(void) {}
-+#endif
-+
-+#ifdef	CONFIG_SMP
-+
-+#define SCHED_RQ_NR_MIGRATION (32U)
-+/*
-+ * Migrate pending tasks in @rq to @dest_cpu
-+ * Will try to migrate mininal of half of @rq nr_running tasks and
-+ * SCHED_RQ_NR_MIGRATION to @dest_cpu
-+ */
-+static inline int
-+migrate_pending_tasks(struct rq *rq, struct rq *dest_rq, const int dest_cpu)
-+{
-+	struct task_struct *p, *skip = rq->curr;
-+	int nr_migrated = 0;
-+	int nr_tries = min(rq->nr_running / 2, SCHED_RQ_NR_MIGRATION);
-+
-+	while (skip != rq->idle && nr_tries &&
-+	       (p = sched_rq_next_task(skip, rq)) != rq->idle) {
-+		skip = sched_rq_next_task(p, rq);
-+		if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) {
-+			__SCHED_DEQUEUE_TASK(p, rq, 0);
-+			set_task_cpu(p, dest_cpu);
-+			sched_task_sanity_check(p, dest_rq);
-+			__SCHED_ENQUEUE_TASK(p, dest_rq, 0);
-+			nr_migrated++;
-+		}
-+		nr_tries--;
-+	}
-+
-+	return nr_migrated;
-+}
-+
-+static inline int take_other_rq_tasks(struct rq *rq, int cpu)
-+{
-+	struct cpumask *topo_mask, *end_mask;
-+
-+	if (unlikely(!rq->online))
-+		return 0;
-+
-+	if (cpumask_empty(&sched_rq_pending_mask))
-+		return 0;
-+
-+	topo_mask = per_cpu(sched_cpu_topo_masks, cpu) + 1;
-+	end_mask = per_cpu(sched_cpu_topo_end_mask, cpu);
-+	do {
-+		int i;
-+		for_each_cpu_and(i, &sched_rq_pending_mask, topo_mask) {
-+			int nr_migrated;
-+			struct rq *src_rq;
-+
-+			src_rq = cpu_rq(i);
-+			if (!do_raw_spin_trylock(&src_rq->lock))
-+				continue;
-+			spin_acquire(&src_rq->lock.dep_map,
-+				     SINGLE_DEPTH_NESTING, 1, _RET_IP_);
-+
-+			if ((nr_migrated = migrate_pending_tasks(src_rq, rq, cpu))) {
-+				src_rq->nr_running -= nr_migrated;
-+				if (src_rq->nr_running < 2)
-+					cpumask_clear_cpu(i, &sched_rq_pending_mask);
-+
-+				rq->nr_running += nr_migrated;
-+				if (rq->nr_running > 1)
-+					cpumask_set_cpu(cpu, &sched_rq_pending_mask);
-+
-+				cpufreq_update_util(rq, 0);
-+
-+				spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+				do_raw_spin_unlock(&src_rq->lock);
-+
-+				return 1;
-+			}
-+
-+			spin_release(&src_rq->lock.dep_map, _RET_IP_);
-+			do_raw_spin_unlock(&src_rq->lock);
-+		}
-+	} while (++topo_mask < end_mask);
-+
-+	return 0;
-+}
-+#endif
-+
-+/*
-+ * Timeslices below RESCHED_NS are considered as good as expired as there's no
-+ * point rescheduling when there's so little time left.
-+ */
-+static inline void check_curr(struct task_struct *p, struct rq *rq)
-+{
-+	if (unlikely(rq->idle == p))
-+		return;
-+
-+	update_curr(rq, p);
-+
-+	if (p->time_slice < RESCHED_NS)
-+		time_slice_expired(p, rq);
-+}
-+
-+static inline struct task_struct *
-+choose_next_task(struct rq *rq, int cpu, struct task_struct *prev)
-+{
-+	struct task_struct *next;
-+
-+	if (unlikely(rq->skip)) {
-+		next = rq_runnable_task(rq);
-+		if (next == rq->idle) {
-+#ifdef	CONFIG_SMP
-+			if (!take_other_rq_tasks(rq, cpu)) {
-+#endif
-+				rq->skip = NULL;
-+				schedstat_inc(rq->sched_goidle);
-+				return next;
-+#ifdef	CONFIG_SMP
-+			}
-+			next = rq_runnable_task(rq);
-+#endif
-+		}
-+		rq->skip = NULL;
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+		hrtick_start(rq, next->time_slice);
-+#endif
-+		return next;
-+	}
-+
-+	next = sched_rq_first_task(rq);
-+	if (next == rq->idle) {
-+#ifdef	CONFIG_SMP
-+		if (!take_other_rq_tasks(rq, cpu)) {
-+#endif
-+			schedstat_inc(rq->sched_goidle);
-+			/*printk(KERN_INFO "sched: choose_next_task(%d) idle %px\n", cpu, next);*/
-+			return next;
-+#ifdef	CONFIG_SMP
-+		}
-+		next = sched_rq_first_task(rq);
-+#endif
-+	}
-+#ifdef CONFIG_HIGH_RES_TIMERS
-+	hrtick_start(rq, next->time_slice);
-+#endif
-+	/*printk(KERN_INFO "sched: choose_next_task(%d) next %px\n", cpu,
-+	 * next);*/
-+	return next;
-+}
-+
-+/*
-+ * Constants for the sched_mode argument of __schedule().
-+ *
-+ * The mode argument allows RT enabled kernels to differentiate a
-+ * preemption from blocking on an 'sleeping' spin/rwlock. Note that
-+ * SM_MASK_PREEMPT for !RT has all bits set, which allows the compiler to
-+ * optimize the AND operation out and just check for zero.
-+ */
-+#define SM_NONE			0x0
-+#define SM_PREEMPT		0x1
-+#define SM_RTLOCK_WAIT		0x2
-+
-+#ifndef CONFIG_PREEMPT_RT
-+# define SM_MASK_PREEMPT	(~0U)
-+#else
-+# define SM_MASK_PREEMPT	SM_PREEMPT
-+#endif
-+
-+/*
-+ * schedule() is the main scheduler function.
-+ *
-+ * The main means of driving the scheduler and thus entering this function are:
-+ *
-+ *   1. Explicit blocking: mutex, semaphore, waitqueue, etc.
-+ *
-+ *   2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
-+ *      paths. For example, see arch/x86/entry_64.S.
-+ *
-+ *      To drive preemption between tasks, the scheduler sets the flag in timer
-+ *      interrupt handler scheduler_tick().
-+ *
-+ *   3. Wakeups don't really cause entry into schedule(). They add a
-+ *      task to the run-queue and that's it.
-+ *
-+ *      Now, if the new task added to the run-queue preempts the current
-+ *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
-+ *      called on the nearest possible occasion:
-+ *
-+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
-+ *
-+ *         - in syscall or exception context, at the next outmost
-+ *           preempt_enable(). (this might be as soon as the wake_up()'s
-+ *           spin_unlock()!)
-+ *
-+ *         - in IRQ context, return from interrupt-handler to
-+ *           preemptible context
-+ *
-+ *       - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
-+ *         then at the next:
-+ *
-+ *          - cond_resched() call
-+ *          - explicit schedule() call
-+ *          - return from syscall or exception to user-space
-+ *          - return from interrupt-handler to user-space
-+ *
-+ * WARNING: must be called with preemption disabled!
-+ */
-+static void __sched notrace __schedule(unsigned int sched_mode)
-+{
-+	struct task_struct *prev, *next;
-+	unsigned long *switch_count;
-+	unsigned long prev_state;
-+	struct rq *rq;
-+	int cpu;
-+	int deactivated = 0;
-+
-+	cpu = smp_processor_id();
-+	rq = cpu_rq(cpu);
-+	prev = rq->curr;
-+
-+	schedule_debug(prev, !!sched_mode);
-+
-+	/* by passing sched_feat(HRTICK) checking which Alt schedule FW doesn't support */
-+	hrtick_clear(rq);
-+
-+	local_irq_disable();
-+	rcu_note_context_switch(!!sched_mode);
-+
-+	/*
-+	 * Make sure that signal_pending_state()->signal_pending() below
-+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
-+	 * done by the caller to avoid the race with signal_wake_up():
-+	 *
-+	 * __set_current_state(@state)		signal_wake_up()
-+	 * schedule()				  set_tsk_thread_flag(p, TIF_SIGPENDING)
-+	 *					  wake_up_state(p, state)
-+	 *   LOCK rq->lock			    LOCK p->pi_state
-+	 *   smp_mb__after_spinlock()		    smp_mb__after_spinlock()
-+	 *     if (signal_pending_state())	    if (p->state & @state)
-+	 *
-+	 * Also, the membarrier system call requires a full memory barrier
-+	 * after coming from user-space, before storing to rq->curr.
-+	 */
-+	raw_spin_lock(&rq->lock);
-+	smp_mb__after_spinlock();
-+
-+	update_rq_clock(rq);
-+
-+	switch_count = &prev->nivcsw;
-+	/*
-+	 * We must load prev->state once (task_struct::state is volatile), such
-+	 * that we form a control dependency vs deactivate_task() below.
-+	 */
-+	prev_state = READ_ONCE(prev->__state);
-+	if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) {
-+		if (signal_pending_state(prev_state, prev)) {
-+			WRITE_ONCE(prev->__state, TASK_RUNNING);
-+		} else {
-+			prev->sched_contributes_to_load =
-+				(prev_state & TASK_UNINTERRUPTIBLE) &&
-+				!(prev_state & TASK_NOLOAD) &&
-+				!(prev->flags & PF_FROZEN);
-+
-+			if (prev->sched_contributes_to_load)
-+				rq->nr_uninterruptible++;
-+
-+			/*
-+			 * __schedule()			ttwu()
-+			 *   prev_state = prev->state;    if (p->on_rq && ...)
-+			 *   if (prev_state)		    goto out;
-+			 *     p->on_rq = 0;		  smp_acquire__after_ctrl_dep();
-+			 *				  p->state = TASK_WAKING
-+			 *
-+			 * Where __schedule() and ttwu() have matching control dependencies.
-+			 *
-+			 * After this, schedule() must not care about p->state any more.
-+			 */
-+			sched_task_deactivate(prev, rq);
-+			deactivate_task(prev, rq);
-+			deactivated = 1;
-+
-+			if (prev->in_iowait) {
-+				atomic_inc(&rq->nr_iowait);
-+				delayacct_blkio_start();
-+			}
-+		}
-+		switch_count = &prev->nvcsw;
-+	}
-+
-+	check_curr(prev, rq);
-+
-+	next = choose_next_task(rq, cpu, prev);
-+	clear_tsk_need_resched(prev);
-+	clear_preempt_need_resched();
-+#ifdef CONFIG_SCHED_DEBUG
-+	rq->last_seen_need_resched_ns = 0;
-+#endif
-+
-+	if (likely(prev != next)) {
-+		if (deactivated)
-+			update_sched_rq_watermark(rq);
-+		next->last_ran = rq->clock_task;
-+		rq->last_ts_switch = rq->clock;
-+
-+		rq->nr_switches++;
-+		/*
-+		 * RCU users of rcu_dereference(rq->curr) may not see
-+		 * changes to task_struct made by pick_next_task().
-+		 */
-+		RCU_INIT_POINTER(rq->curr, next);
-+		/*
-+		 * The membarrier system call requires each architecture
-+		 * to have a full memory barrier after updating
-+		 * rq->curr, before returning to user-space.
-+		 *
-+		 * Here are the schemes providing that barrier on the
-+		 * various architectures:
-+		 * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC.
-+		 *   switch_mm() rely on membarrier_arch_switch_mm() on PowerPC.
-+		 * - finish_lock_switch() for weakly-ordered
-+		 *   architectures where spin_unlock is a full barrier,
-+		 * - switch_to() for arm64 (weakly-ordered, spin_unlock
-+		 *   is a RELEASE barrier),
-+		 */
-+		++*switch_count;
-+
-+		psi_sched_switch(prev, next, !task_on_rq_queued(prev));
-+
-+		trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
-+
-+		/* Also unlocks the rq: */
-+		rq = context_switch(rq, prev, next);
-+	} else {
-+		__balance_callbacks(rq);
-+		raw_spin_unlock_irq(&rq->lock);
-+	}
-+
-+#ifdef CONFIG_SCHED_SMT
-+	sg_balance(rq);
-+#endif
-+}
-+
-+void __noreturn do_task_dead(void)
-+{
-+	/* Causes final put_task_struct in finish_task_switch(): */
-+	set_special_state(TASK_DEAD);
-+
-+	/* Tell freezer to ignore us: */
-+	current->flags |= PF_NOFREEZE;
-+
-+	__schedule(SM_NONE);
-+	BUG();
-+
-+	/* Avoid "noreturn function does return" - but don't continue if BUG() is a NOP: */
-+	for (;;)
-+		cpu_relax();
-+}
-+
-+static inline void sched_submit_work(struct task_struct *tsk)
-+{
-+	unsigned int task_flags;
-+
-+	if (task_is_running(tsk))
-+		return;
-+
-+	task_flags = tsk->flags;
-+	/*
-+	 * If a worker goes to sleep, notify and ask workqueue whether it
-+	 * wants to wake up a task to maintain concurrency.
-+	 */
-+	if (task_flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		if (task_flags & PF_WQ_WORKER)
-+			wq_worker_sleeping(tsk);
-+		else
-+			io_wq_worker_sleeping(tsk);
-+	}
-+
-+	if (tsk_is_pi_blocked(tsk))
-+		return;
-+
-+	/*
-+	 * If we are going to sleep and we have plugged IO queued,
-+	 * make sure to submit it to avoid deadlocks.
-+	 */
-+	blk_flush_plug(tsk->plug, true);
-+}
-+
-+static void sched_update_worker(struct task_struct *tsk)
-+{
-+	if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
-+		if (tsk->flags & PF_WQ_WORKER)
-+			wq_worker_running(tsk);
-+		else
-+			io_wq_worker_running(tsk);
-+	}
-+}
-+
-+asmlinkage __visible void __sched schedule(void)
-+{
-+	struct task_struct *tsk = current;
-+
-+	sched_submit_work(tsk);
-+	do {
-+		preempt_disable();
-+		__schedule(SM_NONE);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+	sched_update_worker(tsk);
-+}
-+EXPORT_SYMBOL(schedule);
-+
-+/*
-+ * synchronize_rcu_tasks() makes sure that no task is stuck in preempted
-+ * state (have scheduled out non-voluntarily) by making sure that all
-+ * tasks have either left the run queue or have gone into user space.
-+ * As idle tasks do not do either, they must not ever be preempted
-+ * (schedule out non-voluntarily).
-+ *
-+ * schedule_idle() is similar to schedule_preempt_disable() except that it
-+ * never enables preemption because it does not call sched_submit_work().
-+ */
-+void __sched schedule_idle(void)
-+{
-+	/*
-+	 * As this skips calling sched_submit_work(), which the idle task does
-+	 * regardless because that function is a nop when the task is in a
-+	 * TASK_RUNNING state, make sure this isn't used someplace that the
-+	 * current task can be in any other state. Note, idle is always in the
-+	 * TASK_RUNNING state.
-+	 */
-+	WARN_ON_ONCE(current->__state);
-+	do {
-+		__schedule(SM_NONE);
-+	} while (need_resched());
-+}
-+
-+#if defined(CONFIG_CONTEXT_TRACKING) && !defined(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK)
-+asmlinkage __visible void __sched schedule_user(void)
-+{
-+	/*
-+	 * If we come here after a random call to set_need_resched(),
-+	 * or we have been woken up remotely but the IPI has not yet arrived,
-+	 * we haven't yet exited the RCU idle mode. Do it here manually until
-+	 * we find a better solution.
-+	 *
-+	 * NB: There are buggy callers of this function.  Ideally we
-+	 * should warn if prev_state != CONTEXT_USER, but that will trigger
-+	 * too frequently to make sense yet.
-+	 */
-+	enum ctx_state prev_state = exception_enter();
-+	schedule();
-+	exception_exit(prev_state);
-+}
-+#endif
-+
-+/**
-+ * schedule_preempt_disabled - called with preemption disabled
-+ *
-+ * Returns with preemption disabled. Note: preempt_count must be 1
-+ */
-+void __sched schedule_preempt_disabled(void)
-+{
-+	sched_preempt_enable_no_resched();
-+	schedule();
-+	preempt_disable();
-+}
-+
-+#ifdef CONFIG_PREEMPT_RT
-+void __sched notrace schedule_rtlock(void)
-+{
-+	do {
-+		preempt_disable();
-+		__schedule(SM_RTLOCK_WAIT);
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+}
-+NOKPROBE_SYMBOL(schedule_rtlock);
-+#endif
-+
-+static void __sched notrace preempt_schedule_common(void)
-+{
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		__schedule(SM_PREEMPT);
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+
-+		/*
-+		 * Check again in case we missed a preemption opportunity
-+		 * between schedule and now.
-+		 */
-+	} while (need_resched());
-+}
-+
-+#ifdef CONFIG_PREEMPTION
-+/*
-+ * This is the entry point to schedule() from in-kernel preemption
-+ * off of preempt_enable.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule(void)
-+{
-+	/*
-+	 * If there is a non-zero preempt_count or interrupts are disabled,
-+	 * we do not want to preempt the current task. Just return..
-+	 */
-+	if (likely(!preemptible()))
-+		return;
-+
-+	preempt_schedule_common();
-+}
-+NOKPROBE_SYMBOL(preempt_schedule);
-+EXPORT_SYMBOL(preempt_schedule);
-+
-+#ifdef CONFIG_PREEMPT_DYNAMIC
-+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-+#ifndef preempt_schedule_dynamic_enabled
-+#define preempt_schedule_dynamic_enabled	preempt_schedule
-+#define preempt_schedule_dynamic_disabled	NULL
-+#endif
-+DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled);
-+EXPORT_STATIC_CALL_TRAMP(preempt_schedule);
-+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
-+static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule);
-+void __sched notrace dynamic_preempt_schedule(void)
-+{
-+	if (!static_branch_unlikely(&sk_dynamic_preempt_schedule))
-+		return;
-+	preempt_schedule();
-+}
-+NOKPROBE_SYMBOL(dynamic_preempt_schedule);
-+EXPORT_SYMBOL(dynamic_preempt_schedule);
-+#endif
-+#endif
-+
-+/**
-+ * preempt_schedule_notrace - preempt_schedule called by tracing
-+ *
-+ * The tracing infrastructure uses preempt_enable_notrace to prevent
-+ * recursion and tracing preempt enabling caused by the tracing
-+ * infrastructure itself. But as tracing can happen in areas coming
-+ * from userspace or just about to enter userspace, a preempt enable
-+ * can occur before user_exit() is called. This will cause the scheduler
-+ * to be called when the system is still in usermode.
-+ *
-+ * To prevent this, the preempt_enable_notrace will use this function
-+ * instead of preempt_schedule() to exit user context if needed before
-+ * calling the scheduler.
-+ */
-+asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
-+{
-+	enum ctx_state prev_ctx;
-+
-+	if (likely(!preemptible()))
-+		return;
-+
-+	do {
-+		/*
-+		 * Because the function tracer can trace preempt_count_sub()
-+		 * and it also uses preempt_enable/disable_notrace(), if
-+		 * NEED_RESCHED is set, the preempt_enable_notrace() called
-+		 * by the function tracer will call this function again and
-+		 * cause infinite recursion.
-+		 *
-+		 * Preemption must be disabled here before the function
-+		 * tracer can trace. Break up preempt_disable() into two
-+		 * calls. One to disable preemption without fear of being
-+		 * traced. The other to still record the preemption latency,
-+		 * which can also be traced by the function tracer.
-+		 */
-+		preempt_disable_notrace();
-+		preempt_latency_start(1);
-+		/*
-+		 * Needs preempt disabled in case user_exit() is traced
-+		 * and the tracer calls preempt_enable_notrace() causing
-+		 * an infinite recursion.
-+		 */
-+		prev_ctx = exception_enter();
-+		__schedule(SM_PREEMPT);
-+		exception_exit(prev_ctx);
-+
-+		preempt_latency_stop(1);
-+		preempt_enable_no_resched_notrace();
-+	} while (need_resched());
-+}
-+EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
-+
-+#ifdef CONFIG_PREEMPT_DYNAMIC
-+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-+#ifndef preempt_schedule_notrace_dynamic_enabled
-+#define preempt_schedule_notrace_dynamic_enabled	preempt_schedule_notrace
-+#define preempt_schedule_notrace_dynamic_disabled	NULL
-+#endif
-+DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled);
-+EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace);
-+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
-+static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule_notrace);
-+void __sched notrace dynamic_preempt_schedule_notrace(void)
-+{
-+	if (!static_branch_unlikely(&sk_dynamic_preempt_schedule_notrace))
-+		return;
-+	preempt_schedule_notrace();
-+}
-+NOKPROBE_SYMBOL(dynamic_preempt_schedule_notrace);
-+EXPORT_SYMBOL(dynamic_preempt_schedule_notrace);
-+#endif
-+#endif
-+
-+#endif /* CONFIG_PREEMPTION */
-+
-+/*
-+ * This is the entry point to schedule() from kernel preemption
-+ * off of irq context.
-+ * Note, that this is called and return with irqs disabled. This will
-+ * protect us against recursive calling from irq.
-+ */
-+asmlinkage __visible void __sched preempt_schedule_irq(void)
-+{
-+	enum ctx_state prev_state;
-+
-+	/* Catch callers which need to be fixed */
-+	BUG_ON(preempt_count() || !irqs_disabled());
-+
-+	prev_state = exception_enter();
-+
-+	do {
-+		preempt_disable();
-+		local_irq_enable();
-+		__schedule(SM_PREEMPT);
-+		local_irq_disable();
-+		sched_preempt_enable_no_resched();
-+	} while (need_resched());
-+
-+	exception_exit(prev_state);
-+}
-+
-+int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
-+			  void *key)
-+{
-+	WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC);
-+	return try_to_wake_up(curr->private, mode, wake_flags);
-+}
-+EXPORT_SYMBOL(default_wake_function);
-+
-+static inline void check_task_changed(struct task_struct *p, struct rq *rq)
-+{
-+	int idx;
-+
-+	/* Trigger resched if task sched_prio has been modified. */
-+	if (task_on_rq_queued(p) && (idx = task_sched_prio_idx(p, rq)) != p->sq_idx) {
-+		requeue_task(p, rq, idx);
-+		check_preempt_curr(rq);
-+	}
-+}
-+
-+static void __setscheduler_prio(struct task_struct *p, int prio)
-+{
-+	p->prio = prio;
-+}
-+
-+#ifdef CONFIG_RT_MUTEXES
-+
-+static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
-+{
-+	if (pi_task)
-+		prio = min(prio, pi_task->prio);
-+
-+	return prio;
-+}
-+
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	struct task_struct *pi_task = rt_mutex_get_top_task(p);
-+
-+	return __rt_effective_prio(pi_task, prio);
-+}
-+
-+/*
-+ * rt_mutex_setprio - set the current priority of a task
-+ * @p: task to boost
-+ * @pi_task: donor task
-+ *
-+ * This function changes the 'effective' priority of a task. It does
-+ * not touch ->normal_prio like __setscheduler().
-+ *
-+ * Used by the rt_mutex code to implement priority inheritance
-+ * logic. Call site only calls if the priority of the task changed.
-+ */
-+void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
-+{
-+	int prio;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	/* XXX used to be waiter->prio, not waiter->task->prio */
-+	prio = __rt_effective_prio(pi_task, p->normal_prio);
-+
-+	/*
-+	 * If nothing changed; bail early.
-+	 */
-+	if (p->pi_top_task == pi_task && prio == p->prio)
-+		return;
-+
-+	rq = __task_access_lock(p, &lock);
-+	/*
-+	 * Set under pi_lock && rq->lock, such that the value can be used under
-+	 * either lock.
-+	 *
-+	 * Note that there is loads of tricky to make this pointer cache work
-+	 * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to
-+	 * ensure a task is de-boosted (pi_task is set to NULL) before the
-+	 * task is allowed to run again (and can exit). This ensures the pointer
-+	 * points to a blocked task -- which guarantees the task is present.
-+	 */
-+	p->pi_top_task = pi_task;
-+
-+	/*
-+	 * For FIFO/RR we only need to set prio, if that matches we're done.
-+	 */
-+	if (prio == p->prio)
-+		goto out_unlock;
-+
-+	/*
-+	 * Idle task boosting is a nono in general. There is one
-+	 * exception, when PREEMPT_RT and NOHZ is active:
-+	 *
-+	 * The idle task calls get_next_timer_interrupt() and holds
-+	 * the timer wheel base->lock on the CPU and another CPU wants
-+	 * to access the timer (probably to cancel it). We can safely
-+	 * ignore the boosting request, as the idle CPU runs this code
-+	 * with interrupts disabled and will complete the lock
-+	 * protected section without being interrupted. So there is no
-+	 * real need to boost.
-+	 */
-+	if (unlikely(p == rq->idle)) {
-+		WARN_ON(p != rq->curr);
-+		WARN_ON(p->pi_blocked_on);
-+		goto out_unlock;
-+	}
-+
-+	trace_sched_pi_setprio(p, pi_task);
-+
-+	__setscheduler_prio(p, prio);
-+
-+	check_task_changed(p, rq);
-+out_unlock:
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+
-+	__balance_callbacks(rq);
-+	__task_access_unlock(p, lock);
-+
-+	preempt_enable();
-+}
-+#else
-+static inline int rt_effective_prio(struct task_struct *p, int prio)
-+{
-+	return prio;
-+}
-+#endif
-+
-+void set_user_nice(struct task_struct *p, long nice)
-+{
-+	unsigned long flags;
-+	struct rq *rq;
-+	raw_spinlock_t *lock;
-+
-+	if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE)
-+		return;
-+	/*
-+	 * We have to be careful, if called from sys_setpriority(),
-+	 * the task might be in the middle of scheduling on another CPU.
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+	rq = __task_access_lock(p, &lock);
-+
-+	p->static_prio = NICE_TO_PRIO(nice);
-+	/*
-+	 * The RT priorities are set via sched_setscheduler(), but we still
-+	 * allow the 'normal' nice value to be set - but as expected
-+	 * it won't have any effect on scheduling until the task is
-+	 * not SCHED_NORMAL/SCHED_BATCH:
-+	 */
-+	if (task_has_rt_policy(p))
-+		goto out_unlock;
-+
-+	p->prio = effective_prio(p);
-+
-+	check_task_changed(p, rq);
-+out_unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+}
-+EXPORT_SYMBOL(set_user_nice);
-+
-+/*
-+ * can_nice - check if a task can reduce its nice value
-+ * @p: task
-+ * @nice: nice value
-+ */
-+int can_nice(const struct task_struct *p, const int nice)
-+{
-+	/* Convert nice value [19,-20] to rlimit style value [1,40] */
-+	int nice_rlim = nice_to_rlimit(nice);
-+
-+	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
-+		capable(CAP_SYS_NICE));
-+}
-+
-+#ifdef __ARCH_WANT_SYS_NICE
-+
-+/*
-+ * sys_nice - change the priority of the current process.
-+ * @increment: priority increment
-+ *
-+ * sys_setpriority is a more generic, but much slower function that
-+ * does similar things.
-+ */
-+SYSCALL_DEFINE1(nice, int, increment)
-+{
-+	long nice, retval;
-+
-+	/*
-+	 * Setpriority might change our priority at the same moment.
-+	 * We don't have to worry. Conceptually one call occurs first
-+	 * and we have a single winner.
-+	 */
-+
-+	increment = clamp(increment, -NICE_WIDTH, NICE_WIDTH);
-+	nice = task_nice(current) + increment;
-+
-+	nice = clamp_val(nice, MIN_NICE, MAX_NICE);
-+	if (increment < 0 && !can_nice(current, nice))
-+		return -EPERM;
-+
-+	retval = security_task_setnice(current, nice);
-+	if (retval)
-+		return retval;
-+
-+	set_user_nice(current, nice);
-+	return 0;
-+}
-+
-+#endif
-+
-+/**
-+ * task_prio - return the priority value of a given task.
-+ * @p: the task in question.
-+ *
-+ * Return: The priority value as seen by users in /proc.
-+ *
-+ * sched policy         return value   kernel prio    user prio/nice
-+ *
-+ * (BMQ)normal, batch, idle[0 ... 53]  [100 ... 139]          0/[-20 ... 19]/[-7 ... 7]
-+ * (PDS)normal, batch, idle[0 ... 39]            100          0/[-20 ... 19]
-+ * fifo, rr             [-1 ... -100]     [99 ... 0]  [0 ... 99]
-+ */
-+int task_prio(const struct task_struct *p)
-+{
-+	return (p->prio < MAX_RT_PRIO) ? p->prio - MAX_RT_PRIO :
-+		task_sched_prio_normal(p, task_rq(p));
-+}
-+
-+/**
-+ * idle_cpu - is a given CPU idle currently?
-+ * @cpu: the processor in question.
-+ *
-+ * Return: 1 if the CPU is currently idle. 0 otherwise.
-+ */
-+int idle_cpu(int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	if (rq->curr != rq->idle)
-+		return 0;
-+
-+	if (rq->nr_running)
-+		return 0;
-+
-+#ifdef CONFIG_SMP
-+	if (rq->ttwu_pending)
-+		return 0;
-+#endif
-+
-+	return 1;
-+}
-+
-+/**
-+ * idle_task - return the idle task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * Return: The idle task for the cpu @cpu.
-+ */
-+struct task_struct *idle_task(int cpu)
-+{
-+	return cpu_rq(cpu)->idle;
-+}
-+
-+/**
-+ * find_process_by_pid - find a process with a matching PID value.
-+ * @pid: the pid in question.
-+ *
-+ * The task of @pid, if found. %NULL otherwise.
-+ */
-+static inline struct task_struct *find_process_by_pid(pid_t pid)
-+{
-+	return pid ? find_task_by_vpid(pid) : current;
-+}
-+
-+/*
-+ * sched_setparam() passes in -1 for its policy, to let the functions
-+ * it calls know not to change it.
-+ */
-+#define SETPARAM_POLICY -1
-+
-+static void __setscheduler_params(struct task_struct *p,
-+		const struct sched_attr *attr)
-+{
-+	int policy = attr->sched_policy;
-+
-+	if (policy == SETPARAM_POLICY)
-+		policy = p->policy;
-+
-+	p->policy = policy;
-+
-+	/*
-+	 * allow normal nice value to be set, but will not have any
-+	 * effect on scheduling until the task not SCHED_NORMAL/
-+	 * SCHED_BATCH
-+	 */
-+	p->static_prio = NICE_TO_PRIO(attr->sched_nice);
-+
-+	/*
-+	 * __sched_setscheduler() ensures attr->sched_priority == 0 when
-+	 * !rt_policy. Always setting this ensures that things like
-+	 * getparam()/getattr() don't report silly values for !rt tasks.
-+	 */
-+	p->rt_priority = attr->sched_priority;
-+	p->normal_prio = normal_prio(p);
-+}
-+
-+/*
-+ * check the target process has a UID that matches the current process's
-+ */
-+static bool check_same_owner(struct task_struct *p)
-+{
-+	const struct cred *cred = current_cred(), *pcred;
-+	bool match;
-+
-+	rcu_read_lock();
-+	pcred = __task_cred(p);
-+	match = (uid_eq(cred->euid, pcred->euid) ||
-+		 uid_eq(cred->euid, pcred->uid));
-+	rcu_read_unlock();
-+	return match;
-+}
-+
-+static int __sched_setscheduler(struct task_struct *p,
-+				const struct sched_attr *attr,
-+				bool user, bool pi)
-+{
-+	const struct sched_attr dl_squash_attr = {
-+		.size		= sizeof(struct sched_attr),
-+		.sched_policy	= SCHED_FIFO,
-+		.sched_nice	= 0,
-+		.sched_priority = 99,
-+	};
-+	int oldpolicy = -1, policy = attr->sched_policy;
-+	int retval, newprio;
-+	struct callback_head *head;
-+	unsigned long flags;
-+	struct rq *rq;
-+	int reset_on_fork;
-+	raw_spinlock_t *lock;
-+
-+	/* The pi code expects interrupts enabled */
-+	BUG_ON(pi && in_interrupt());
-+
-+	/*
-+	 * Alt schedule FW supports SCHED_DEADLINE by squash it as prio 0 SCHED_FIFO
-+	 */
-+	if (unlikely(SCHED_DEADLINE == policy)) {
-+		attr = &dl_squash_attr;
-+		policy = attr->sched_policy;
-+	}
-+recheck:
-+	/* Double check policy once rq lock held */
-+	if (policy < 0) {
-+		reset_on_fork = p->sched_reset_on_fork;
-+		policy = oldpolicy = p->policy;
-+	} else {
-+		reset_on_fork = !!(attr->sched_flags & SCHED_RESET_ON_FORK);
-+
-+		if (policy > SCHED_IDLE)
-+			return -EINVAL;
-+	}
-+
-+	if (attr->sched_flags & ~(SCHED_FLAG_ALL))
-+		return -EINVAL;
-+
-+	/*
-+	 * Valid priorities for SCHED_FIFO and SCHED_RR are
-+	 * 1..MAX_RT_PRIO-1, valid priority for SCHED_NORMAL and
-+	 * SCHED_BATCH and SCHED_IDLE is 0.
-+	 */
-+	if (attr->sched_priority < 0 ||
-+	    (p->mm && attr->sched_priority > MAX_RT_PRIO - 1) ||
-+	    (!p->mm && attr->sched_priority > MAX_RT_PRIO - 1))
-+		return -EINVAL;
-+	if ((SCHED_RR == policy || SCHED_FIFO == policy) !=
-+	    (attr->sched_priority != 0))
-+		return -EINVAL;
-+
-+	/*
-+	 * Allow unprivileged RT tasks to decrease priority:
-+	 */
-+	if (user && !capable(CAP_SYS_NICE)) {
-+		if (SCHED_FIFO == policy || SCHED_RR == policy) {
-+			unsigned long rlim_rtprio =
-+					task_rlimit(p, RLIMIT_RTPRIO);
-+
-+			/* Can't set/change the rt policy */
-+			if (policy != p->policy && !rlim_rtprio)
-+				return -EPERM;
-+
-+			/* Can't increase priority */
-+			if (attr->sched_priority > p->rt_priority &&
-+			    attr->sched_priority > rlim_rtprio)
-+				return -EPERM;
-+		}
-+
-+		/* Can't change other user's priorities */
-+		if (!check_same_owner(p))
-+			return -EPERM;
-+
-+		/* Normal users shall not reset the sched_reset_on_fork flag */
-+		if (p->sched_reset_on_fork && !reset_on_fork)
-+			return -EPERM;
-+	}
-+
-+	if (user) {
-+		retval = security_task_setscheduler(p);
-+		if (retval)
-+			return retval;
-+	}
-+
-+	if (pi)
-+		cpuset_read_lock();
-+
-+	/*
-+	 * Make sure no PI-waiters arrive (or leave) while we are
-+	 * changing the priority of the task:
-+	 */
-+	raw_spin_lock_irqsave(&p->pi_lock, flags);
-+
-+	/*
-+	 * To be able to change p->policy safely, task_access_lock()
-+	 * must be called.
-+	 * IF use task_access_lock() here:
-+	 * For the task p which is not running, reading rq->stop is
-+	 * racy but acceptable as ->stop doesn't change much.
-+	 * An enhancemnet can be made to read rq->stop saftly.
-+	 */
-+	rq = __task_access_lock(p, &lock);
-+
-+	/*
-+	 * Changing the policy of the stop threads its a very bad idea
-+	 */
-+	if (p == rq->stop) {
-+		retval = -EINVAL;
-+		goto unlock;
-+	}
-+
-+	/*
-+	 * If not changing anything there's no need to proceed further:
-+	 */
-+	if (unlikely(policy == p->policy)) {
-+		if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
-+			goto change;
-+		if (!rt_policy(policy) &&
-+		    NICE_TO_PRIO(attr->sched_nice) != p->static_prio)
-+			goto change;
-+
-+		p->sched_reset_on_fork = reset_on_fork;
-+		retval = 0;
-+		goto unlock;
-+	}
-+change:
-+
-+	/* Re-check policy now with rq lock held */
-+	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
-+		policy = oldpolicy = -1;
-+		__task_access_unlock(p, lock);
-+		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+		if (pi)
-+			cpuset_read_unlock();
-+		goto recheck;
-+	}
-+
-+	p->sched_reset_on_fork = reset_on_fork;
-+
-+	newprio = __normal_prio(policy, attr->sched_priority, NICE_TO_PRIO(attr->sched_nice));
-+	if (pi) {
-+		/*
-+		 * Take priority boosted tasks into account. If the new
-+		 * effective priority is unchanged, we just store the new
-+		 * normal parameters and do not touch the scheduler class and
-+		 * the runqueue. This will be done when the task deboost
-+		 * itself.
-+		 */
-+		newprio = rt_effective_prio(p, newprio);
-+	}
-+
-+	if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
-+		__setscheduler_params(p, attr);
-+		__setscheduler_prio(p, newprio);
-+	}
-+
-+	check_task_changed(p, rq);
-+
-+	/* Avoid rq from going away on us: */
-+	preempt_disable();
-+	head = splice_balance_callbacks(rq);
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+
-+	if (pi) {
-+		cpuset_read_unlock();
-+		rt_mutex_adjust_pi(p);
-+	}
-+
-+	/* Run balance callbacks after we've adjusted the PI chain: */
-+	balance_callbacks(rq, head);
-+	preempt_enable();
-+
-+	return 0;
-+
-+unlock:
-+	__task_access_unlock(p, lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-+	if (pi)
-+		cpuset_read_unlock();
-+	return retval;
-+}
-+
-+static int _sched_setscheduler(struct task_struct *p, int policy,
-+			       const struct sched_param *param, bool check)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy   = policy,
-+		.sched_priority = param->sched_priority,
-+		.sched_nice     = PRIO_TO_NICE(p->static_prio),
-+	};
-+
-+	/* Fixup the legacy SCHED_RESET_ON_FORK hack. */
-+	if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
-+		attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+		policy &= ~SCHED_RESET_ON_FORK;
-+		attr.sched_policy = policy;
-+	}
-+
-+	return __sched_setscheduler(p, &attr, check, true);
-+}
-+
-+/**
-+ * sched_setscheduler - change the scheduling policy and/or RT priority of a thread.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Use sched_set_fifo(), read its comment.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ *
-+ * NOTE that the task may be already dead.
-+ */
-+int sched_setscheduler(struct task_struct *p, int policy,
-+		       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, true);
-+}
-+
-+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, true, true);
-+}
-+
-+int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
-+{
-+	return __sched_setscheduler(p, attr, false, true);
-+}
-+EXPORT_SYMBOL_GPL(sched_setattr_nocheck);
-+
-+/**
-+ * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-+ * @p: the task in question.
-+ * @policy: new policy.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Just like sched_setscheduler, only don't bother checking if the
-+ * current context has permission.  For example, this is needed in
-+ * stop_machine(): we create temporary high priority worker threads,
-+ * but our caller might not have that capability.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+int sched_setscheduler_nocheck(struct task_struct *p, int policy,
-+			       const struct sched_param *param)
-+{
-+	return _sched_setscheduler(p, policy, param, false);
-+}
-+
-+/*
-+ * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally
-+ * incapable of resource management, which is the one thing an OS really should
-+ * be doing.
-+ *
-+ * This is of course the reason it is limited to privileged users only.
-+ *
-+ * Worse still; it is fundamentally impossible to compose static priority
-+ * workloads. You cannot take two correctly working static prio workloads
-+ * and smash them together and still expect them to work.
-+ *
-+ * For this reason 'all' FIFO tasks the kernel creates are basically at:
-+ *
-+ *   MAX_RT_PRIO / 2
-+ *
-+ * The administrator _MUST_ configure the system, the kernel simply doesn't
-+ * know enough information to make a sensible choice.
-+ */
-+void sched_set_fifo(struct task_struct *p)
-+{
-+	struct sched_param sp = { .sched_priority = MAX_RT_PRIO / 2 };
-+	WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_fifo);
-+
-+/*
-+ * For when you don't much care about FIFO, but want to be above SCHED_NORMAL.
-+ */
-+void sched_set_fifo_low(struct task_struct *p)
-+{
-+	struct sched_param sp = { .sched_priority = 1 };
-+	WARN_ON_ONCE(sched_setscheduler_nocheck(p, SCHED_FIFO, &sp) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_fifo_low);
-+
-+void sched_set_normal(struct task_struct *p, int nice)
-+{
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+		.sched_nice = nice,
-+	};
-+	WARN_ON_ONCE(sched_setattr_nocheck(p, &attr) != 0);
-+}
-+EXPORT_SYMBOL_GPL(sched_set_normal);
-+
-+static int
-+do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
-+{
-+	struct sched_param lparam;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!param || pid < 0)
-+		return -EINVAL;
-+	if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
-+		return -EFAULT;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setscheduler(p, policy, &lparam);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/*
-+ * Mimics kernel/events/core.c perf_copy_attr().
-+ */
-+static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *attr)
-+{
-+	u32 size;
-+	int ret;
-+
-+	/* Zero the full structure, so that a short copy will be nice: */
-+	memset(attr, 0, sizeof(*attr));
-+
-+	ret = get_user(size, &uattr->size);
-+	if (ret)
-+		return ret;
-+
-+	/* ABI compatibility quirk: */
-+	if (!size)
-+		size = SCHED_ATTR_SIZE_VER0;
-+
-+	if (size < SCHED_ATTR_SIZE_VER0 || size > PAGE_SIZE)
-+		goto err_size;
-+
-+	ret = copy_struct_from_user(attr, sizeof(*attr), uattr, size);
-+	if (ret) {
-+		if (ret == -E2BIG)
-+			goto err_size;
-+		return ret;
-+	}
-+
-+	/*
-+	 * XXX: Do we want to be lenient like existing syscalls; or do we want
-+	 * to be strict and return an error on out-of-bounds values?
-+	 */
-+	attr->sched_nice = clamp(attr->sched_nice, -20, 19);
-+
-+	/* sched/core.c uses zero here but we already know ret is zero */
-+	return 0;
-+
-+err_size:
-+	put_user(sizeof(*attr), &uattr->size);
-+	return -E2BIG;
-+}
-+
-+/**
-+ * sys_sched_setscheduler - set/change the scheduler policy and RT priority
-+ * @pid: the pid in question.
-+ * @policy: new policy.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ * @param: structure containing the new RT priority.
-+ */
-+SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param)
-+{
-+	if (policy < 0)
-+		return -EINVAL;
-+
-+	return do_sched_setscheduler(pid, policy, param);
-+}
-+
-+/**
-+ * sys_sched_setparam - set/change the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the new RT priority.
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	return do_sched_setscheduler(pid, SETPARAM_POLICY, param);
-+}
-+
-+/**
-+ * sys_sched_setattr - same as above, but with extended sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ */
-+SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr,
-+			       unsigned int, flags)
-+{
-+	struct sched_attr attr;
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || flags)
-+		return -EINVAL;
-+
-+	retval = sched_copy_attr(uattr, &attr);
-+	if (retval)
-+		return retval;
-+
-+	if ((int)attr.sched_policy < 0)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (likely(p))
-+		get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (likely(p)) {
-+		retval = sched_setattr(p, &attr);
-+		put_task_struct(p);
-+	}
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the policy (scheduling class) of a thread
-+ * @pid: the pid in question.
-+ *
-+ * Return: On success, the policy of the thread. Otherwise, a negative error
-+ * code.
-+ */
-+SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
-+{
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (pid < 0)
-+		goto out_nounlock;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (p) {
-+		retval = security_task_getscheduler(p);
-+		if (!retval)
-+			retval = p->policy;
-+	}
-+	rcu_read_unlock();
-+
-+out_nounlock:
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getscheduler - get the RT priority of a thread
-+ * @pid: the pid in question.
-+ * @param: structure containing the RT priority.
-+ *
-+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
-+ * code.
-+ */
-+SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
-+{
-+	struct sched_param lp = { .sched_priority = 0 };
-+	struct task_struct *p;
-+	int retval = -EINVAL;
-+
-+	if (!param || pid < 0)
-+		goto out_nounlock;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	if (task_has_rt_policy(p))
-+		lp.sched_priority = p->rt_priority;
-+	rcu_read_unlock();
-+
-+	/*
-+	 * This one might sleep, we cannot do it with a spinlock held ...
-+	 */
-+	retval = copy_to_user(param, &lp, sizeof(*param)) ? -EFAULT : 0;
-+
-+out_nounlock:
-+	return retval;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/*
-+ * Copy the kernel size attribute structure (which might be larger
-+ * than what user-space knows about) to user-space.
-+ *
-+ * Note that all cases are valid: user-space buffer can be larger or
-+ * smaller than the kernel-space buffer. The usual case is that both
-+ * have the same size.
-+ */
-+static int
-+sched_attr_copy_to_user(struct sched_attr __user *uattr,
-+			struct sched_attr *kattr,
-+			unsigned int usize)
-+{
-+	unsigned int ksize = sizeof(*kattr);
-+
-+	if (!access_ok(uattr, usize))
-+		return -EFAULT;
-+
-+	/*
-+	 * sched_getattr() ABI forwards and backwards compatibility:
-+	 *
-+	 * If usize == ksize then we just copy everything to user-space and all is good.
-+	 *
-+	 * If usize < ksize then we only copy as much as user-space has space for,
-+	 * this keeps ABI compatibility as well. We skip the rest.
-+	 *
-+	 * If usize > ksize then user-space is using a newer version of the ABI,
-+	 * which part the kernel doesn't know about. Just ignore it - tooling can
-+	 * detect the kernel's knowledge of attributes from the attr->size value
-+	 * which is set to ksize in this case.
-+	 */
-+	kattr->size = min(usize, ksize);
-+
-+	if (copy_to_user(uattr, kattr, kattr->size))
-+		return -EFAULT;
-+
-+	return 0;
-+}
-+
-+/**
-+ * sys_sched_getattr - similar to sched_getparam, but with sched_attr
-+ * @pid: the pid in question.
-+ * @uattr: structure containing the extended parameters.
-+ * @usize: sizeof(attr) for fwd/bwd comp.
-+ * @flags: for future extension.
-+ */
-+SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
-+		unsigned int, usize, unsigned int, flags)
-+{
-+	struct sched_attr kattr = { };
-+	struct task_struct *p;
-+	int retval;
-+
-+	if (!uattr || pid < 0 || usize > PAGE_SIZE ||
-+	    usize < SCHED_ATTR_SIZE_VER0 || flags)
-+		return -EINVAL;
-+
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	retval = -ESRCH;
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	kattr.sched_policy = p->policy;
-+	if (p->sched_reset_on_fork)
-+		kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
-+	if (task_has_rt_policy(p))
-+		kattr.sched_priority = p->rt_priority;
-+	else
-+		kattr.sched_nice = task_nice(p);
-+	kattr.sched_flags &= SCHED_FLAG_ALL;
-+
-+#ifdef CONFIG_UCLAMP_TASK
-+	kattr.sched_util_min = p->uclamp_req[UCLAMP_MIN].value;
-+	kattr.sched_util_max = p->uclamp_req[UCLAMP_MAX].value;
-+#endif
-+
-+	rcu_read_unlock();
-+
-+	return sched_attr_copy_to_user(uattr, &kattr, usize);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+static int
-+__sched_setaffinity(struct task_struct *p, const struct cpumask *mask)
-+{
-+	int retval;
-+	cpumask_var_t cpus_allowed, new_mask;
-+
-+	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
-+		retval = -ENOMEM;
-+		goto out_free_cpus_allowed;
-+	}
-+
-+	cpuset_cpus_allowed(p, cpus_allowed);
-+	cpumask_and(new_mask, mask, cpus_allowed);
-+again:
-+	retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK | SCA_USER);
-+	if (retval)
-+		goto out_free_new_mask;
-+
-+	cpuset_cpus_allowed(p, cpus_allowed);
-+	if (!cpumask_subset(new_mask, cpus_allowed)) {
-+		/*
-+		 * We must have raced with a concurrent cpuset
-+		 * update. Just reset the cpus_allowed to the
-+		 * cpuset's cpus_allowed
-+		 */
-+		cpumask_copy(new_mask, cpus_allowed);
-+		goto again;
-+	}
-+
-+out_free_new_mask:
-+	free_cpumask_var(new_mask);
-+out_free_cpus_allowed:
-+	free_cpumask_var(cpus_allowed);
-+	return retval;
-+}
-+
-+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
-+{
-+	struct task_struct *p;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	p = find_process_by_pid(pid);
-+	if (!p) {
-+		rcu_read_unlock();
-+		return -ESRCH;
-+	}
-+
-+	/* Prevent p going away */
-+	get_task_struct(p);
-+	rcu_read_unlock();
-+
-+	if (p->flags & PF_NO_SETAFFINITY) {
-+		retval = -EINVAL;
-+		goto out_put_task;
-+	}
-+
-+	if (!check_same_owner(p)) {
-+		rcu_read_lock();
-+		if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
-+			rcu_read_unlock();
-+			retval = -EPERM;
-+			goto out_put_task;
-+		}
-+		rcu_read_unlock();
-+	}
-+
-+	retval = security_task_setscheduler(p);
-+	if (retval)
-+		goto out_put_task;
-+
-+	retval = __sched_setaffinity(p, in_mask);
-+out_put_task:
-+	put_task_struct(p);
-+	return retval;
-+}
-+
-+static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
-+			     struct cpumask *new_mask)
-+{
-+	if (len < cpumask_size())
-+		cpumask_clear(new_mask);
-+	else if (len > cpumask_size())
-+		len = cpumask_size();
-+
-+	return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
-+}
-+
-+/**
-+ * sys_sched_setaffinity - set the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to the new CPU mask
-+ *
-+ * Return: 0 on success. An error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	cpumask_var_t new_mask;
-+	int retval;
-+
-+	if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
-+	if (retval == 0)
-+		retval = sched_setaffinity(pid, new_mask);
-+	free_cpumask_var(new_mask);
-+	return retval;
-+}
-+
-+long sched_getaffinity(pid_t pid, cpumask_t *mask)
-+{
-+	struct task_struct *p;
-+	raw_spinlock_t *lock;
-+	unsigned long flags;
-+	int retval;
-+
-+	rcu_read_lock();
-+
-+	retval = -ESRCH;
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+
-+	task_access_lock_irqsave(p, &lock, &flags);
-+	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
-+	task_access_unlock_irqrestore(p, lock, &flags);
-+
-+out_unlock:
-+	rcu_read_unlock();
-+
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_getaffinity - get the CPU affinity of a process
-+ * @pid: pid of the process
-+ * @len: length in bytes of the bitmask pointed to by user_mask_ptr
-+ * @user_mask_ptr: user-space pointer to hold the current CPU mask
-+ *
-+ * Return: size of CPU mask copied to user_mask_ptr on success. An
-+ * error code otherwise.
-+ */
-+SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
-+		unsigned long __user *, user_mask_ptr)
-+{
-+	int ret;
-+	cpumask_var_t mask;
-+
-+	if ((len * BITS_PER_BYTE) < nr_cpu_ids)
-+		return -EINVAL;
-+	if (len & (sizeof(unsigned long)-1))
-+		return -EINVAL;
-+
-+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
-+		return -ENOMEM;
-+
-+	ret = sched_getaffinity(pid, mask);
-+	if (ret == 0) {
-+		unsigned int retlen = min_t(size_t, len, cpumask_size());
-+
-+		if (copy_to_user(user_mask_ptr, mask, retlen))
-+			ret = -EFAULT;
-+		else
-+			ret = retlen;
-+	}
-+	free_cpumask_var(mask);
-+
-+	return ret;
-+}
-+
-+static void do_sched_yield(void)
-+{
-+	struct rq *rq;
-+	struct rq_flags rf;
-+
-+	if (!sched_yield_type)
-+		return;
-+
-+	rq = this_rq_lock_irq(&rf);
-+
-+	schedstat_inc(rq->yld_count);
-+
-+	if (1 == sched_yield_type) {
-+		if (!rt_task(current))
-+			do_sched_yield_type_1(current, rq);
-+	} else if (2 == sched_yield_type) {
-+		if (rq->nr_running > 1)
-+			rq->skip = current;
-+	}
-+
-+	preempt_disable();
-+	raw_spin_unlock_irq(&rq->lock);
-+	sched_preempt_enable_no_resched();
-+
-+	schedule();
-+}
-+
-+/**
-+ * sys_sched_yield - yield the current processor to other threads.
-+ *
-+ * This function yields the current CPU to other tasks. If there are no
-+ * other threads running on this CPU then this function will return.
-+ *
-+ * Return: 0.
-+ */
-+SYSCALL_DEFINE0(sched_yield)
-+{
-+	do_sched_yield();
-+	return 0;
-+}
-+
-+#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC)
-+int __sched __cond_resched(void)
-+{
-+	if (should_resched(0)) {
-+		preempt_schedule_common();
-+		return 1;
-+	}
-+	/*
-+	 * In preemptible kernels, ->rcu_read_lock_nesting tells the tick
-+	 * whether the current CPU is in an RCU read-side critical section,
-+	 * so the tick can report quiescent states even for CPUs looping
-+	 * in kernel context.  In contrast, in non-preemptible kernels,
-+	 * RCU readers leave no in-memory hints, which means that CPU-bound
-+	 * processes executing in kernel context might never report an
-+	 * RCU quiescent state.  Therefore, the following code causes
-+	 * cond_resched() to report a quiescent state, but only when RCU
-+	 * is in urgent need of one.
-+	 */
-+#ifndef CONFIG_PREEMPT_RCU
-+	rcu_all_qs();
-+#endif
-+	return 0;
-+}
-+EXPORT_SYMBOL(__cond_resched);
-+#endif
-+
-+#ifdef CONFIG_PREEMPT_DYNAMIC
-+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-+#define cond_resched_dynamic_enabled	__cond_resched
-+#define cond_resched_dynamic_disabled	((void *)&__static_call_return0)
-+DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched);
-+EXPORT_STATIC_CALL_TRAMP(cond_resched);
-+
-+#define might_resched_dynamic_enabled	__cond_resched
-+#define might_resched_dynamic_disabled	((void *)&__static_call_return0)
-+DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched);
-+EXPORT_STATIC_CALL_TRAMP(might_resched);
-+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
-+static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched);
-+int __sched dynamic_cond_resched(void)
-+{
-+	if (!static_branch_unlikely(&sk_dynamic_cond_resched))
-+		return 0;
-+	return __cond_resched();
-+}
-+EXPORT_SYMBOL(dynamic_cond_resched);
-+
-+static DEFINE_STATIC_KEY_FALSE(sk_dynamic_might_resched);
-+int __sched dynamic_might_resched(void)
-+{
-+	if (!static_branch_unlikely(&sk_dynamic_might_resched))
-+		return 0;
-+	return __cond_resched();
-+}
-+EXPORT_SYMBOL(dynamic_might_resched);
-+#endif
-+#endif
-+
-+/*
-+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
-+ * call schedule, and on return reacquire the lock.
-+ *
-+ * This works OK both with and without CONFIG_PREEMPTION.  We do strange low-level
-+ * operations here to prevent schedule() from being called twice (once via
-+ * spin_unlock(), once by hand).
-+ */
-+int __cond_resched_lock(spinlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held(lock);
-+
-+	if (spin_needbreak(lock) || resched) {
-+		spin_unlock(lock);
-+		if (!_cond_resched())
-+			cpu_relax();
-+		ret = 1;
-+		spin_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_lock);
-+
-+int __cond_resched_rwlock_read(rwlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held_read(lock);
-+
-+	if (rwlock_needbreak(lock) || resched) {
-+		read_unlock(lock);
-+		if (!_cond_resched())
-+			cpu_relax();
-+		ret = 1;
-+		read_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_rwlock_read);
-+
-+int __cond_resched_rwlock_write(rwlock_t *lock)
-+{
-+	int resched = should_resched(PREEMPT_LOCK_OFFSET);
-+	int ret = 0;
-+
-+	lockdep_assert_held_write(lock);
-+
-+	if (rwlock_needbreak(lock) || resched) {
-+		write_unlock(lock);
-+		if (!_cond_resched())
-+			cpu_relax();
-+		ret = 1;
-+		write_lock(lock);
-+	}
-+	return ret;
-+}
-+EXPORT_SYMBOL(__cond_resched_rwlock_write);
-+
-+#ifdef CONFIG_PREEMPT_DYNAMIC
-+
-+#ifdef CONFIG_GENERIC_ENTRY
-+#include <linux/entry-common.h>
-+#endif
-+
-+/*
-+ * SC:cond_resched
-+ * SC:might_resched
-+ * SC:preempt_schedule
-+ * SC:preempt_schedule_notrace
-+ * SC:irqentry_exit_cond_resched
-+ *
-+ *
-+ * NONE:
-+ *   cond_resched               <- __cond_resched
-+ *   might_resched              <- RET0
-+ *   preempt_schedule           <- NOP
-+ *   preempt_schedule_notrace   <- NOP
-+ *   irqentry_exit_cond_resched <- NOP
-+ *
-+ * VOLUNTARY:
-+ *   cond_resched               <- __cond_resched
-+ *   might_resched              <- __cond_resched
-+ *   preempt_schedule           <- NOP
-+ *   preempt_schedule_notrace   <- NOP
-+ *   irqentry_exit_cond_resched <- NOP
-+ *
-+ * FULL:
-+ *   cond_resched               <- RET0
-+ *   might_resched              <- RET0
-+ *   preempt_schedule           <- preempt_schedule
-+ *   preempt_schedule_notrace   <- preempt_schedule_notrace
-+ *   irqentry_exit_cond_resched <- irqentry_exit_cond_resched
-+ */
-+
-+enum {
-+	preempt_dynamic_undefined = -1,
-+	preempt_dynamic_none,
-+	preempt_dynamic_voluntary,
-+	preempt_dynamic_full,
-+};
-+
-+int preempt_dynamic_mode = preempt_dynamic_undefined;
-+
-+int sched_dynamic_mode(const char *str)
-+{
-+	if (!strcmp(str, "none"))
-+		return preempt_dynamic_none;
-+
-+	if (!strcmp(str, "voluntary"))
-+		return preempt_dynamic_voluntary;
-+
-+	if (!strcmp(str, "full"))
-+		return preempt_dynamic_full;
-+
-+	return -EINVAL;
-+}
-+
-+#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
-+#define preempt_dynamic_enable(f)	static_call_update(f, f##_dynamic_enabled)
-+#define preempt_dynamic_disable(f)	static_call_update(f, f##_dynamic_disabled)
-+#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
-+#define preempt_dynamic_enable(f)	static_key_enable(&sk_dynamic_##f.key)
-+#define preempt_dynamic_disable(f)	static_key_disable(&sk_dynamic_##f.key)
-+#else
-+#error "Unsupported PREEMPT_DYNAMIC mechanism"
-+#endif
-+
-+void sched_dynamic_update(int mode)
-+{
-+	/*
-+	 * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in
-+	 * the ZERO state, which is invalid.
-+	 */
-+	preempt_dynamic_enable(cond_resched);
-+	preempt_dynamic_enable(might_resched);
-+	preempt_dynamic_enable(preempt_schedule);
-+	preempt_dynamic_enable(preempt_schedule_notrace);
-+	preempt_dynamic_enable(irqentry_exit_cond_resched);
-+
-+	switch (mode) {
-+	case preempt_dynamic_none:
-+		preempt_dynamic_enable(cond_resched);
-+		preempt_dynamic_disable(might_resched);
-+		preempt_dynamic_disable(preempt_schedule);
-+		preempt_dynamic_disable(preempt_schedule_notrace);
-+		preempt_dynamic_disable(irqentry_exit_cond_resched);
-+		pr_info("Dynamic Preempt: none\n");
-+		break;
-+
-+	case preempt_dynamic_voluntary:
-+		preempt_dynamic_enable(cond_resched);
-+		preempt_dynamic_enable(might_resched);
-+		preempt_dynamic_disable(preempt_schedule);
-+		preempt_dynamic_disable(preempt_schedule_notrace);
-+		preempt_dynamic_disable(irqentry_exit_cond_resched);
-+		pr_info("Dynamic Preempt: voluntary\n");
-+		break;
-+
-+	case preempt_dynamic_full:
-+		preempt_dynamic_disable(cond_resched);
-+		preempt_dynamic_disable(might_resched);
-+		preempt_dynamic_enable(preempt_schedule);
-+		preempt_dynamic_enable(preempt_schedule_notrace);
-+		preempt_dynamic_enable(irqentry_exit_cond_resched);
-+		pr_info("Dynamic Preempt: full\n");
-+		break;
-+	}
-+
-+	preempt_dynamic_mode = mode;
-+}
-+
-+static int __init setup_preempt_mode(char *str)
-+{
-+	int mode = sched_dynamic_mode(str);
-+	if (mode < 0) {
-+		pr_warn("Dynamic Preempt: unsupported mode: %s\n", str);
-+		return 0;
-+	}
-+
-+	sched_dynamic_update(mode);
-+	return 1;
-+}
-+__setup("preempt=", setup_preempt_mode);
-+
-+static void __init preempt_dynamic_init(void)
-+{
-+	if (preempt_dynamic_mode == preempt_dynamic_undefined) {
-+		if (IS_ENABLED(CONFIG_PREEMPT_NONE)) {
-+			sched_dynamic_update(preempt_dynamic_none);
-+		} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
-+			sched_dynamic_update(preempt_dynamic_voluntary);
-+		} else {
-+			/* Default static call setting, nothing to do */
-+			WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
-+			preempt_dynamic_mode = preempt_dynamic_full;
-+			pr_info("Dynamic Preempt: full\n");
-+		}
-+	}
-+}
-+
-+#define PREEMPT_MODEL_ACCESSOR(mode) \
-+	bool preempt_model_##mode(void)						 \
-+	{									 \
-+		WARN_ON_ONCE(preempt_dynamic_mode == preempt_dynamic_undefined); \
-+		return preempt_dynamic_mode == preempt_dynamic_##mode;		 \
-+	}									 \
-+	EXPORT_SYMBOL_GPL(preempt_model_##mode)
-+
-+PREEMPT_MODEL_ACCESSOR(none);
-+PREEMPT_MODEL_ACCESSOR(voluntary);
-+PREEMPT_MODEL_ACCESSOR(full);
-+
-+#else /* !CONFIG_PREEMPT_DYNAMIC */
-+
-+static inline void preempt_dynamic_init(void) { }
-+
-+#endif /* #ifdef CONFIG_PREEMPT_DYNAMIC */
-+
-+/**
-+ * yield - yield the current processor to other threads.
-+ *
-+ * Do not ever use this function, there's a 99% chance you're doing it wrong.
-+ *
-+ * The scheduler is at all times free to pick the calling task as the most
-+ * eligible task to run, if removing the yield() call from your code breaks
-+ * it, it's already broken.
-+ *
-+ * Typical broken usage is:
-+ *
-+ * while (!event)
-+ * 	yield();
-+ *
-+ * where one assumes that yield() will let 'the other' process run that will
-+ * make event true. If the current task is a SCHED_FIFO task that will never
-+ * happen. Never use yield() as a progress guarantee!!
-+ *
-+ * If you want to use yield() to wait for something, use wait_event().
-+ * If you want to use yield() to be 'nice' for others, use cond_resched().
-+ * If you still want to use yield(), do not!
-+ */
-+void __sched yield(void)
-+{
-+	set_current_state(TASK_RUNNING);
-+	do_sched_yield();
-+}
-+EXPORT_SYMBOL(yield);
-+
-+/**
-+ * yield_to - yield the current processor to another thread in
-+ * your thread group, or accelerate that thread toward the
-+ * processor it's on.
-+ * @p: target task
-+ * @preempt: whether task preemption is allowed or not
-+ *
-+ * It's the caller's job to ensure that the target task struct
-+ * can't go away on us before we can do any checks.
-+ *
-+ * In Alt schedule FW, yield_to is not supported.
-+ *
-+ * Return:
-+ *	true (>0) if we indeed boosted the target task.
-+ *	false (0) if we failed to boost the target.
-+ *	-ESRCH if there's no task to yield to.
-+ */
-+int __sched yield_to(struct task_struct *p, bool preempt)
-+{
-+	return 0;
-+}
-+EXPORT_SYMBOL_GPL(yield_to);
-+
-+int io_schedule_prepare(void)
-+{
-+	int old_iowait = current->in_iowait;
-+
-+	current->in_iowait = 1;
-+	blk_flush_plug(current->plug, true);
-+	return old_iowait;
-+}
-+
-+void io_schedule_finish(int token)
-+{
-+	current->in_iowait = token;
-+}
-+
-+/*
-+ * This task is about to go to sleep on IO.  Increment rq->nr_iowait so
-+ * that process accounting knows that this is a task in IO wait state.
-+ *
-+ * But don't do that if it is a deliberate, throttling IO wait (this task
-+ * has set its backing_dev_info: the queue against which it should throttle)
-+ */
-+
-+long __sched io_schedule_timeout(long timeout)
-+{
-+	int token;
-+	long ret;
-+
-+	token = io_schedule_prepare();
-+	ret = schedule_timeout(timeout);
-+	io_schedule_finish(token);
-+
-+	return ret;
-+}
-+EXPORT_SYMBOL(io_schedule_timeout);
-+
-+void __sched io_schedule(void)
-+{
-+	int token;
-+
-+	token = io_schedule_prepare();
-+	schedule();
-+	io_schedule_finish(token);
-+}
-+EXPORT_SYMBOL(io_schedule);
-+
-+/**
-+ * sys_sched_get_priority_max - return maximum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the maximum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = MAX_RT_PRIO - 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+/**
-+ * sys_sched_get_priority_min - return minimum RT priority.
-+ * @policy: scheduling class.
-+ *
-+ * Return: On success, this syscall returns the minimum
-+ * rt_priority that can be used by a given scheduling class.
-+ * On failure, a negative error code is returned.
-+ */
-+SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
-+{
-+	int ret = -EINVAL;
-+
-+	switch (policy) {
-+	case SCHED_FIFO:
-+	case SCHED_RR:
-+		ret = 1;
-+		break;
-+	case SCHED_NORMAL:
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		ret = 0;
-+		break;
-+	}
-+	return ret;
-+}
-+
-+static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
-+{
-+	struct task_struct *p;
-+	int retval;
-+
-+	alt_sched_debug();
-+
-+	if (pid < 0)
-+		return -EINVAL;
-+
-+	retval = -ESRCH;
-+	rcu_read_lock();
-+	p = find_process_by_pid(pid);
-+	if (!p)
-+		goto out_unlock;
-+
-+	retval = security_task_getscheduler(p);
-+	if (retval)
-+		goto out_unlock;
-+	rcu_read_unlock();
-+
-+	*t = ns_to_timespec64(sched_timeslice_ns);
-+	return 0;
-+
-+out_unlock:
-+	rcu_read_unlock();
-+	return retval;
-+}
-+
-+/**
-+ * sys_sched_rr_get_interval - return the default timeslice of a process.
-+ * @pid: pid of the process.
-+ * @interval: userspace pointer to the timeslice value.
-+ *
-+ *
-+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
-+ * an error code.
-+ */
-+SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-+		struct __kernel_timespec __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_timespec64(&t, interval);
-+
-+	return retval;
-+}
-+
-+#ifdef CONFIG_COMPAT_32BIT_TIME
-+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
-+		struct old_timespec32 __user *, interval)
-+{
-+	struct timespec64 t;
-+	int retval = sched_rr_get_interval(pid, &t);
-+
-+	if (retval == 0)
-+		retval = put_old_timespec32(&t, interval);
-+	return retval;
-+}
-+#endif
-+
-+void sched_show_task(struct task_struct *p)
-+{
-+	unsigned long free = 0;
-+	int ppid;
-+
-+	if (!try_get_task_stack(p))
-+		return;
-+
-+	pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p));
-+
-+	if (task_is_running(p))
-+		pr_cont("  running task    ");
-+#ifdef CONFIG_DEBUG_STACK_USAGE
-+	free = stack_not_used(p);
-+#endif
-+	ppid = 0;
-+	rcu_read_lock();
-+	if (pid_alive(p))
-+		ppid = task_pid_nr(rcu_dereference(p->real_parent));
-+	rcu_read_unlock();
-+	pr_cont(" stack:%5lu pid:%5d ppid:%6d flags:0x%08lx\n",
-+		free, task_pid_nr(p), ppid,
-+		read_task_thread_flags(p));
-+
-+	print_worker_info(KERN_INFO, p);
-+	print_stop_info(KERN_INFO, p);
-+	show_stack(p, NULL, KERN_INFO);
-+	put_task_stack(p);
-+}
-+EXPORT_SYMBOL_GPL(sched_show_task);
-+
-+static inline bool
-+state_filter_match(unsigned long state_filter, struct task_struct *p)
-+{
-+	unsigned int state = READ_ONCE(p->__state);
-+
-+	/* no filter, everything matches */
-+	if (!state_filter)
-+		return true;
-+
-+	/* filter, but doesn't match */
-+	if (!(state & state_filter))
-+		return false;
-+
-+	/*
-+	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
-+	 * TASK_KILLABLE).
-+	 */
-+	if (state_filter == TASK_UNINTERRUPTIBLE && state == TASK_IDLE)
-+		return false;
-+
-+	return true;
-+}
-+
-+
-+void show_state_filter(unsigned int state_filter)
-+{
-+	struct task_struct *g, *p;
-+
-+	rcu_read_lock();
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * reset the NMI-timeout, listing all files on a slow
-+		 * console might take a lot of time:
-+		 * Also, reset softlockup watchdogs on all CPUs, because
-+		 * another CPU might be blocked waiting for us to process
-+		 * an IPI.
-+		 */
-+		touch_nmi_watchdog();
-+		touch_all_softlockup_watchdogs();
-+		if (state_filter_match(state_filter, p))
-+			sched_show_task(p);
-+	}
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	/* TODO: Alt schedule FW should support this
-+	if (!state_filter)
-+		sysrq_sched_debug_show();
-+	*/
-+#endif
-+	rcu_read_unlock();
-+	/*
-+	 * Only show locks if all tasks are dumped:
-+	 */
-+	if (!state_filter)
-+		debug_show_all_locks();
-+}
-+
-+void dump_cpu_task(int cpu)
-+{
-+	pr_info("Task dump for CPU %d:\n", cpu);
-+	sched_show_task(cpu_curr(cpu));
-+}
-+
-+/**
-+ * init_idle - set up an idle thread for a given CPU
-+ * @idle: task in question
-+ * @cpu: CPU the idle task belongs to
-+ *
-+ * NOTE: this function does not set the idle thread's NEED_RESCHED
-+ * flag, to make booting more robust.
-+ */
-+void __init init_idle(struct task_struct *idle, int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	__sched_fork(0, idle);
-+
-+	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-+	raw_spin_lock(&rq->lock);
-+	update_rq_clock(rq);
-+
-+	idle->last_ran = rq->clock_task;
-+	idle->__state = TASK_RUNNING;
-+	/*
-+	 * PF_KTHREAD should already be set at this point; regardless, make it
-+	 * look like a proper per-CPU kthread.
-+	 */
-+	idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY;
-+	kthread_set_per_cpu(idle, cpu);
-+
-+	sched_queue_init_idle(&rq->queue, idle);
-+
-+#ifdef CONFIG_SMP
-+	/*
-+	 * It's possible that init_idle() gets called multiple times on a task,
-+	 * in that case do_set_cpus_allowed() will not do the right thing.
-+	 *
-+	 * And since this is boot we can forgo the serialisation.
-+	 */
-+	set_cpus_allowed_common(idle, cpumask_of(cpu));
-+#endif
-+
-+	/* Silence PROVE_RCU */
-+	rcu_read_lock();
-+	__set_task_cpu(idle, cpu);
-+	rcu_read_unlock();
-+
-+	rq->idle = idle;
-+	rcu_assign_pointer(rq->curr, idle);
-+	idle->on_cpu = 1;
-+
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
-+
-+	/* Set the preempt count _outside_ the spinlocks! */
-+	init_idle_preempt_count(idle, cpu);
-+
-+	ftrace_graph_init_idle_task(idle, cpu);
-+	vtime_init_idle(idle, cpu);
-+#ifdef CONFIG_SMP
-+	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
-+#endif
-+}
-+
-+#ifdef CONFIG_SMP
-+
-+int cpuset_cpumask_can_shrink(const struct cpumask __maybe_unused *cur,
-+			      const struct cpumask __maybe_unused *trial)
-+{
-+	return 1;
-+}
-+
-+int task_can_attach(struct task_struct *p,
-+		    const struct cpumask *cs_cpus_allowed)
-+{
-+	int ret = 0;
-+
-+	/*
-+	 * Kthreads which disallow setaffinity shouldn't be moved
-+	 * to a new cpuset; we don't want to change their CPU
-+	 * affinity and isolating such threads by their set of
-+	 * allowed nodes is unnecessary.  Thus, cpusets are not
-+	 * applicable for such threads.  This prevents checking for
-+	 * success of set_cpus_allowed_ptr() on all attached tasks
-+	 * before cpus_mask may be changed.
-+	 */
-+	if (p->flags & PF_NO_SETAFFINITY)
-+		ret = -EINVAL;
-+
-+	return ret;
-+}
-+
-+bool sched_smp_initialized __read_mostly;
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+/*
-+ * Ensures that the idle task is using init_mm right before its CPU goes
-+ * offline.
-+ */
-+void idle_task_exit(void)
-+{
-+	struct mm_struct *mm = current->active_mm;
-+
-+	BUG_ON(current != this_rq()->idle);
-+
-+	if (mm != &init_mm) {
-+		switch_mm(mm, &init_mm, current);
-+		finish_arch_post_lock_switch();
-+	}
-+
-+	/* finish_cpu(), as ran on the BP, will clean up the active_mm state */
-+}
-+
-+static int __balance_push_cpu_stop(void *arg)
-+{
-+	struct task_struct *p = arg;
-+	struct rq *rq = this_rq();
-+	struct rq_flags rf;
-+	int cpu;
-+
-+	raw_spin_lock_irq(&p->pi_lock);
-+	rq_lock(rq, &rf);
-+
-+	update_rq_clock(rq);
-+
-+	if (task_rq(p) == rq && task_on_rq_queued(p)) {
-+		cpu = select_fallback_rq(rq->cpu, p);
-+		rq = __migrate_task(rq, p, cpu);
-+	}
-+
-+	rq_unlock(rq, &rf);
-+	raw_spin_unlock_irq(&p->pi_lock);
-+
-+	put_task_struct(p);
-+
-+	return 0;
-+}
-+
-+static DEFINE_PER_CPU(struct cpu_stop_work, push_work);
-+
-+/*
-+ * This is enabled below SCHED_AP_ACTIVE; when !cpu_active(), but only
-+ * effective when the hotplug motion is down.
-+ */
-+static void balance_push(struct rq *rq)
-+{
-+	struct task_struct *push_task = rq->curr;
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	/*
-+	 * Ensure the thing is persistent until balance_push_set(.on = false);
-+	 */
-+	rq->balance_callback = &balance_push_callback;
-+
-+	/*
-+	 * Only active while going offline and when invoked on the outgoing
-+	 * CPU.
-+	 */
-+	if (!cpu_dying(rq->cpu) || rq != this_rq())
-+		return;
-+
-+	/*
-+	 * Both the cpu-hotplug and stop task are in this case and are
-+	 * required to complete the hotplug process.
-+	 */
-+	if (kthread_is_per_cpu(push_task) ||
-+	    is_migration_disabled(push_task)) {
-+
-+		/*
-+		 * If this is the idle task on the outgoing CPU try to wake
-+		 * up the hotplug control thread which might wait for the
-+		 * last task to vanish. The rcuwait_active() check is
-+		 * accurate here because the waiter is pinned on this CPU
-+		 * and can't obviously be running in parallel.
-+		 *
-+		 * On RT kernels this also has to check whether there are
-+		 * pinned and scheduled out tasks on the runqueue. They
-+		 * need to leave the migrate disabled section first.
-+		 */
-+		if (!rq->nr_running && !rq_has_pinned_tasks(rq) &&
-+		    rcuwait_active(&rq->hotplug_wait)) {
-+			raw_spin_unlock(&rq->lock);
-+			rcuwait_wake_up(&rq->hotplug_wait);
-+			raw_spin_lock(&rq->lock);
-+		}
-+		return;
-+	}
-+
-+	get_task_struct(push_task);
-+	/*
-+	 * Temporarily drop rq->lock such that we can wake-up the stop task.
-+	 * Both preemption and IRQs are still disabled.
-+	 */
-+	raw_spin_unlock(&rq->lock);
-+	stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task,
-+			    this_cpu_ptr(&push_work));
-+	/*
-+	 * At this point need_resched() is true and we'll take the loop in
-+	 * schedule(). The next pick is obviously going to be the stop task
-+	 * which kthread_is_per_cpu() and will push this task away.
-+	 */
-+	raw_spin_lock(&rq->lock);
-+}
-+
-+static void balance_push_set(int cpu, bool on)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	struct rq_flags rf;
-+
-+	rq_lock_irqsave(rq, &rf);
-+	if (on) {
-+		WARN_ON_ONCE(rq->balance_callback);
-+		rq->balance_callback = &balance_push_callback;
-+	} else if (rq->balance_callback == &balance_push_callback) {
-+		rq->balance_callback = NULL;
-+	}
-+	rq_unlock_irqrestore(rq, &rf);
-+}
-+
-+/*
-+ * Invoked from a CPUs hotplug control thread after the CPU has been marked
-+ * inactive. All tasks which are not per CPU kernel threads are either
-+ * pushed off this CPU now via balance_push() or placed on a different CPU
-+ * during wakeup. Wait until the CPU is quiescent.
-+ */
-+static void balance_hotplug_wait(void)
-+{
-+	struct rq *rq = this_rq();
-+
-+	rcuwait_wait_event(&rq->hotplug_wait,
-+			   rq->nr_running == 1 && !rq_has_pinned_tasks(rq),
-+			   TASK_UNINTERRUPTIBLE);
-+}
-+
-+#else
-+
-+static void balance_push(struct rq *rq)
-+{
-+}
-+
-+static void balance_push_set(int cpu, bool on)
-+{
-+}
-+
-+static inline void balance_hotplug_wait(void)
-+{
-+}
-+#endif /* CONFIG_HOTPLUG_CPU */
-+
-+static void set_rq_offline(struct rq *rq)
-+{
-+	if (rq->online)
-+		rq->online = false;
-+}
-+
-+static void set_rq_online(struct rq *rq)
-+{
-+	if (!rq->online)
-+		rq->online = true;
-+}
-+
-+/*
-+ * used to mark begin/end of suspend/resume:
-+ */
-+static int num_cpus_frozen;
-+
-+/*
-+ * Update cpusets according to cpu_active mask.  If cpusets are
-+ * disabled, cpuset_update_active_cpus() becomes a simple wrapper
-+ * around partition_sched_domains().
-+ *
-+ * If we come here as part of a suspend/resume, don't touch cpusets because we
-+ * want to restore it back to its original state upon resume anyway.
-+ */
-+static void cpuset_cpu_active(void)
-+{
-+	if (cpuhp_tasks_frozen) {
-+		/*
-+		 * num_cpus_frozen tracks how many CPUs are involved in suspend
-+		 * resume sequence. As long as this is not the last online
-+		 * operation in the resume sequence, just build a single sched
-+		 * domain, ignoring cpusets.
-+		 */
-+		partition_sched_domains(1, NULL, NULL);
-+		if (--num_cpus_frozen)
-+			return;
-+		/*
-+		 * This is the last CPU online operation. So fall through and
-+		 * restore the original sched domains by considering the
-+		 * cpuset configurations.
-+		 */
-+		cpuset_force_rebuild();
-+	}
-+
-+	cpuset_update_active_cpus();
-+}
-+
-+static int cpuset_cpu_inactive(unsigned int cpu)
-+{
-+	if (!cpuhp_tasks_frozen) {
-+		cpuset_update_active_cpus();
-+	} else {
-+		num_cpus_frozen++;
-+		partition_sched_domains(1, NULL, NULL);
-+	}
-+	return 0;
-+}
-+
-+int sched_cpu_activate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	/*
-+	 * Clear the balance_push callback and prepare to schedule
-+	 * regular tasks.
-+	 */
-+	balance_push_set(cpu, false);
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going up, increment the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
-+		static_branch_inc_cpuslocked(&sched_smt_present);
-+#endif
-+	set_cpu_active(cpu, true);
-+
-+	if (sched_smp_initialized)
-+		cpuset_cpu_active();
-+
-+	/*
-+	 * Put the rq online, if not already. This happens:
-+	 *
-+	 * 1) In the early boot process, because we build the real domains
-+	 *    after all cpus have been brought up.
-+	 *
-+	 * 2) At runtime, if cpuset_cpu_active() fails to rebuild the
-+	 *    domains.
-+	 */
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	set_rq_online(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	return 0;
-+}
-+
-+int sched_cpu_deactivate(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+	int ret;
-+
-+	set_cpu_active(cpu, false);
-+
-+	/*
-+	 * From this point forward, this CPU will refuse to run any task that
-+	 * is not: migrate_disable() or KTHREAD_IS_PER_CPU, and will actively
-+	 * push those tasks away until this gets cleared, see
-+	 * sched_cpu_dying().
-+	 */
-+	balance_push_set(cpu, true);
-+
-+	/*
-+	 * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU
-+	 * users of this state to go away such that all new such users will
-+	 * observe it.
-+	 *
-+	 * Specifically, we rely on ttwu to no longer target this CPU, see
-+	 * ttwu_queue_cond() and is_cpu_allowed().
-+	 *
-+	 * Do sync before park smpboot threads to take care the rcu boost case.
-+	 */
-+	synchronize_rcu();
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	update_rq_clock(rq);
-+	set_rq_offline(rq);
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+#ifdef CONFIG_SCHED_SMT
-+	/*
-+	 * When going down, decrement the number of cores with SMT present.
-+	 */
-+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2) {
-+		static_branch_dec_cpuslocked(&sched_smt_present);
-+		if (!static_branch_likely(&sched_smt_present))
-+			cpumask_clear(&sched_sg_idle_mask);
-+	}
-+#endif
-+
-+	if (!sched_smp_initialized)
-+		return 0;
-+
-+	ret = cpuset_cpu_inactive(cpu);
-+	if (ret) {
-+		balance_push_set(cpu, false);
-+		set_cpu_active(cpu, true);
-+		return ret;
-+	}
-+
-+	return 0;
-+}
-+
-+static void sched_rq_cpu_starting(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+
-+	rq->calc_load_update = calc_load_update;
-+}
-+
-+int sched_cpu_starting(unsigned int cpu)
-+{
-+	sched_rq_cpu_starting(cpu);
-+	sched_tick_start(cpu);
-+	return 0;
-+}
-+
-+#ifdef CONFIG_HOTPLUG_CPU
-+
-+/*
-+ * Invoked immediately before the stopper thread is invoked to bring the
-+ * CPU down completely. At this point all per CPU kthreads except the
-+ * hotplug thread (current) and the stopper thread (inactive) have been
-+ * either parked or have been unbound from the outgoing CPU. Ensure that
-+ * any of those which might be on the way out are gone.
-+ *
-+ * If after this point a bound task is being woken on this CPU then the
-+ * responsible hotplug callback has failed to do it's job.
-+ * sched_cpu_dying() will catch it with the appropriate fireworks.
-+ */
-+int sched_cpu_wait_empty(unsigned int cpu)
-+{
-+	balance_hotplug_wait();
-+	return 0;
-+}
-+
-+/*
-+ * Since this CPU is going 'away' for a while, fold any nr_active delta we
-+ * might have. Called from the CPU stopper task after ensuring that the
-+ * stopper is the last running task on the CPU, so nr_active count is
-+ * stable. We need to take the teardown thread which is calling this into
-+ * account, so we hand in adjust = 1 to the load calculation.
-+ *
-+ * Also see the comment "Global load-average calculations".
-+ */
-+static void calc_load_migrate(struct rq *rq)
-+{
-+	long delta = calc_load_fold_active(rq, 1);
-+
-+	if (delta)
-+		atomic_long_add(delta, &calc_load_tasks);
-+}
-+
-+static void dump_rq_tasks(struct rq *rq, const char *loglvl)
-+{
-+	struct task_struct *g, *p;
-+	int cpu = cpu_of(rq);
-+
-+	lockdep_assert_held(&rq->lock);
-+
-+	printk("%sCPU%d enqueued tasks (%u total):\n", loglvl, cpu, rq->nr_running);
-+	for_each_process_thread(g, p) {
-+		if (task_cpu(p) != cpu)
-+			continue;
-+
-+		if (!task_on_rq_queued(p))
-+			continue;
-+
-+		printk("%s\tpid: %d, name: %s\n", loglvl, p->pid, p->comm);
-+	}
-+}
-+
-+int sched_cpu_dying(unsigned int cpu)
-+{
-+	struct rq *rq = cpu_rq(cpu);
-+	unsigned long flags;
-+
-+	/* Handle pending wakeups and then migrate everything off */
-+	sched_tick_stop(cpu);
-+
-+	raw_spin_lock_irqsave(&rq->lock, flags);
-+	if (rq->nr_running != 1 || rq_has_pinned_tasks(rq)) {
-+		WARN(true, "Dying CPU not properly vacated!");
-+		dump_rq_tasks(rq, KERN_WARNING);
-+	}
-+	raw_spin_unlock_irqrestore(&rq->lock, flags);
-+
-+	calc_load_migrate(rq);
-+	hrtick_clear(rq);
-+	return 0;
-+}
-+#endif
-+
-+#ifdef CONFIG_SMP
-+static void sched_init_topology_cpumask_early(void)
-+{
-+	int cpu;
-+	cpumask_t *tmp;
-+
-+	for_each_possible_cpu(cpu) {
-+		/* init topo masks */
-+		tmp = per_cpu(sched_cpu_topo_masks, cpu);
-+
-+		cpumask_copy(tmp, cpumask_of(cpu));
-+		tmp++;
-+		cpumask_copy(tmp, cpu_possible_mask);
-+		per_cpu(sched_cpu_llc_mask, cpu) = tmp;
-+		per_cpu(sched_cpu_topo_end_mask, cpu) = ++tmp;
-+		/*per_cpu(sd_llc_id, cpu) = cpu;*/
-+	}
-+}
-+
-+#define TOPOLOGY_CPUMASK(name, mask, last)\
-+	if (cpumask_and(topo, topo, mask)) {					\
-+		cpumask_copy(topo, mask);					\
-+		printk(KERN_INFO "sched: cpu#%02d topo: 0x%08lx - "#name,	\
-+		       cpu, (topo++)->bits[0]);					\
-+	}									\
-+	if (!last)								\
-+		cpumask_complement(topo, mask)
-+
-+static void sched_init_topology_cpumask(void)
-+{
-+	int cpu;
-+	cpumask_t *topo;
-+
-+	for_each_online_cpu(cpu) {
-+		/* take chance to reset time slice for idle tasks */
-+		cpu_rq(cpu)->idle->time_slice = sched_timeslice_ns;
-+
-+		topo = per_cpu(sched_cpu_topo_masks, cpu) + 1;
-+
-+		cpumask_complement(topo, cpumask_of(cpu));
-+#ifdef CONFIG_SCHED_SMT
-+		TOPOLOGY_CPUMASK(smt, topology_sibling_cpumask(cpu), false);
-+#endif
-+		per_cpu(sd_llc_id, cpu) = cpumask_first(cpu_coregroup_mask(cpu));
-+		per_cpu(sched_cpu_llc_mask, cpu) = topo;
-+		TOPOLOGY_CPUMASK(coregroup, cpu_coregroup_mask(cpu), false);
-+
-+		TOPOLOGY_CPUMASK(core, topology_core_cpumask(cpu), false);
-+
-+		TOPOLOGY_CPUMASK(others, cpu_online_mask, true);
-+
-+		per_cpu(sched_cpu_topo_end_mask, cpu) = topo;
-+		printk(KERN_INFO "sched: cpu#%02d llc_id = %d, llc_mask idx = %d\n",
-+		       cpu, per_cpu(sd_llc_id, cpu),
-+		       (int) (per_cpu(sched_cpu_llc_mask, cpu) -
-+			      per_cpu(sched_cpu_topo_masks, cpu)));
-+	}
-+}
-+#endif
-+
-+void __init sched_init_smp(void)
-+{
-+	/* Move init over to a non-isolated CPU */
-+	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_DOMAIN)) < 0)
-+		BUG();
-+	current->flags &= ~PF_NO_SETAFFINITY;
-+
-+	sched_init_topology_cpumask();
-+
-+	sched_smp_initialized = true;
-+}
-+#else
-+void __init sched_init_smp(void)
-+{
-+	cpu_rq(0)->idle->time_slice = sched_timeslice_ns;
-+}
-+#endif /* CONFIG_SMP */
-+
-+int in_sched_functions(unsigned long addr)
-+{
-+	return in_lock_functions(addr) ||
-+		(addr >= (unsigned long)__sched_text_start
-+		&& addr < (unsigned long)__sched_text_end);
-+}
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+/* task group related information */
-+struct task_group {
-+	struct cgroup_subsys_state css;
-+
-+	struct rcu_head rcu;
-+	struct list_head list;
-+
-+	struct task_group *parent;
-+	struct list_head siblings;
-+	struct list_head children;
-+#ifdef CONFIG_FAIR_GROUP_SCHED
-+	unsigned long		shares;
-+#endif
-+};
-+
-+/*
-+ * Default task group.
-+ * Every task in system belongs to this group at bootup.
-+ */
-+struct task_group root_task_group;
-+LIST_HEAD(task_groups);
-+
-+/* Cacheline aligned slab cache for task_group */
-+static struct kmem_cache *task_group_cache __read_mostly;
-+#endif /* CONFIG_CGROUP_SCHED */
-+
-+void __init sched_init(void)
-+{
-+	int i;
-+	struct rq *rq;
-+
-+	printk(KERN_INFO ALT_SCHED_VERSION_MSG);
-+
-+	wait_bit_init();
-+
-+#ifdef CONFIG_SMP
-+	for (i = 0; i < SCHED_QUEUE_BITS; i++)
-+		cpumask_copy(sched_rq_watermark + i, cpu_present_mask);
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+	task_group_cache = KMEM_CACHE(task_group, 0);
-+
-+	list_add(&root_task_group.list, &task_groups);
-+	INIT_LIST_HEAD(&root_task_group.children);
-+	INIT_LIST_HEAD(&root_task_group.siblings);
-+#endif /* CONFIG_CGROUP_SCHED */
-+	for_each_possible_cpu(i) {
-+		rq = cpu_rq(i);
-+
-+		sched_queue_init(&rq->queue);
-+		rq->watermark = IDLE_TASK_SCHED_PRIO;
-+		rq->skip = NULL;
-+
-+		raw_spin_lock_init(&rq->lock);
-+		rq->nr_running = rq->nr_uninterruptible = 0;
-+		rq->calc_load_active = 0;
-+		rq->calc_load_update = jiffies + LOAD_FREQ;
-+#ifdef CONFIG_SMP
-+		rq->online = false;
-+		rq->cpu = i;
-+
-+#ifdef CONFIG_SCHED_SMT
-+		rq->active_balance = 0;
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+		INIT_CSD(&rq->nohz_csd, nohz_csd_func, rq);
-+#endif
-+		rq->balance_callback = &balance_push_callback;
-+#ifdef CONFIG_HOTPLUG_CPU
-+		rcuwait_init(&rq->hotplug_wait);
-+#endif
-+#endif /* CONFIG_SMP */
-+		rq->nr_switches = 0;
-+
-+		hrtick_rq_init(rq);
-+		atomic_set(&rq->nr_iowait, 0);
-+	}
-+#ifdef CONFIG_SMP
-+	/* Set rq->online for cpu 0 */
-+	cpu_rq(0)->online = true;
-+#endif
-+	/*
-+	 * The boot idle thread does lazy MMU switching as well:
-+	 */
-+	mmgrab(&init_mm);
-+	enter_lazy_tlb(&init_mm, current);
-+
-+	/*
-+	 * The idle task doesn't need the kthread struct to function, but it
-+	 * is dressed up as a per-CPU kthread and thus needs to play the part
-+	 * if we want to avoid special-casing it in code that deals with per-CPU
-+	 * kthreads.
-+	 */
-+	WARN_ON(!set_kthread_struct(current));
-+
-+	/*
-+	 * Make us the idle thread. Technically, schedule() should not be
-+	 * called from this thread, however somewhere below it might be,
-+	 * but because we are the idle thread, we just pick up running again
-+	 * when this runqueue becomes "idle".
-+	 */
-+	init_idle(current, smp_processor_id());
-+
-+	calc_load_update = jiffies + LOAD_FREQ;
-+
-+#ifdef CONFIG_SMP
-+	idle_thread_set_boot_cpu();
-+	balance_push_set(smp_processor_id(), false);
-+
-+	sched_init_topology_cpumask_early();
-+#endif /* SMP */
-+
-+	psi_init();
-+
-+	preempt_dynamic_init();
-+}
-+
-+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-+
-+void __might_sleep(const char *file, int line)
-+{
-+	unsigned int state = get_current_state();
-+	/*
-+	 * Blocking primitives will set (and therefore destroy) current->state,
-+	 * since we will exit with TASK_RUNNING make sure we enter with it,
-+	 * otherwise we will destroy state.
-+	 */
-+	WARN_ONCE(state != TASK_RUNNING && current->task_state_change,
-+			"do not call blocking ops when !TASK_RUNNING; "
-+			"state=%x set at [<%p>] %pS\n", state,
-+			(void *)current->task_state_change,
-+			(void *)current->task_state_change);
-+
-+	__might_resched(file, line, 0);
-+}
-+EXPORT_SYMBOL(__might_sleep);
-+
-+static void print_preempt_disable_ip(int preempt_offset, unsigned long ip)
-+{
-+	if (!IS_ENABLED(CONFIG_DEBUG_PREEMPT))
-+		return;
-+
-+	if (preempt_count() == preempt_offset)
-+		return;
-+
-+	pr_err("Preemption disabled at:");
-+	print_ip_sym(KERN_ERR, ip);
-+}
-+
-+static inline bool resched_offsets_ok(unsigned int offsets)
-+{
-+	unsigned int nested = preempt_count();
-+
-+	nested += rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT;
-+
-+	return nested == offsets;
-+}
-+
-+void __might_resched(const char *file, int line, unsigned int offsets)
-+{
-+	/* Ratelimiting timestamp: */
-+	static unsigned long prev_jiffy;
-+
-+	unsigned long preempt_disable_ip;
-+
-+	/* WARN_ON_ONCE() by default, no rate limit required: */
-+	rcu_sleep_check();
-+
-+	if ((resched_offsets_ok(offsets) && !irqs_disabled() &&
-+	     !is_idle_task(current) && !current->non_block_count) ||
-+	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
-+	    oops_in_progress)
-+		return;
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	/* Save this before calling printk(), since that will clobber it: */
-+	preempt_disable_ip = get_preempt_disable_ip(current);
-+
-+	pr_err("BUG: sleeping function called from invalid context at %s:%d\n",
-+	       file, line);
-+	pr_err("in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
-+	       in_atomic(), irqs_disabled(), current->non_block_count,
-+	       current->pid, current->comm);
-+	pr_err("preempt_count: %x, expected: %x\n", preempt_count(),
-+	       offsets & MIGHT_RESCHED_PREEMPT_MASK);
-+
-+	if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
-+		pr_err("RCU nest depth: %d, expected: %u\n",
-+		       rcu_preempt_depth(), offsets >> MIGHT_RESCHED_RCU_SHIFT);
-+	}
-+
-+	if (task_stack_end_corrupted(current))
-+		pr_emerg("Thread overran stack, or stack corrupted\n");
-+
-+	debug_show_held_locks(current);
-+	if (irqs_disabled())
-+		print_irqtrace_events(current);
-+
-+	print_preempt_disable_ip(offsets & MIGHT_RESCHED_PREEMPT_MASK,
-+				 preempt_disable_ip);
-+
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL(__might_resched);
-+
-+void __cant_sleep(const char *file, int line, int preempt_offset)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > preempt_offset)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
-+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-+			in_atomic(), irqs_disabled(),
-+			current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_sleep);
-+
-+#ifdef CONFIG_SMP
-+void __cant_migrate(const char *file, int line)
-+{
-+	static unsigned long prev_jiffy;
-+
-+	if (irqs_disabled())
-+		return;
-+
-+	if (is_migration_disabled(current))
-+		return;
-+
-+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
-+		return;
-+
-+	if (preempt_count() > 0)
-+		return;
-+
-+	if (current->migration_flags & MDF_FORCE_ENABLED)
-+		return;
-+
-+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
-+		return;
-+	prev_jiffy = jiffies;
-+
-+	pr_err("BUG: assuming non migratable context at %s:%d\n", file, line);
-+	pr_err("in_atomic(): %d, irqs_disabled(): %d, migration_disabled() %u pid: %d, name: %s\n",
-+	       in_atomic(), irqs_disabled(), is_migration_disabled(current),
-+	       current->pid, current->comm);
-+
-+	debug_show_held_locks(current);
-+	dump_stack();
-+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-+}
-+EXPORT_SYMBOL_GPL(__cant_migrate);
-+#endif
-+#endif
-+
-+#ifdef CONFIG_MAGIC_SYSRQ
-+void normalize_rt_tasks(void)
-+{
-+	struct task_struct *g, *p;
-+	struct sched_attr attr = {
-+		.sched_policy = SCHED_NORMAL,
-+	};
-+
-+	read_lock(&tasklist_lock);
-+	for_each_process_thread(g, p) {
-+		/*
-+		 * Only normalize user tasks:
-+		 */
-+		if (p->flags & PF_KTHREAD)
-+			continue;
-+
-+		schedstat_set(p->stats.wait_start,  0);
-+		schedstat_set(p->stats.sleep_start, 0);
-+		schedstat_set(p->stats.block_start, 0);
-+
-+		if (!rt_task(p)) {
-+			/*
-+			 * Renice negative nice level userspace
-+			 * tasks back to 0:
-+			 */
-+			if (task_nice(p) < 0)
-+				set_user_nice(p, 0);
-+			continue;
-+		}
-+
-+		__sched_setscheduler(p, &attr, false, false);
-+	}
-+	read_unlock(&tasklist_lock);
-+}
-+#endif /* CONFIG_MAGIC_SYSRQ */
-+
-+#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB)
-+/*
-+ * These functions are only useful for the IA64 MCA handling, or kdb.
-+ *
-+ * They can only be called when the whole system has been
-+ * stopped - every CPU needs to be quiescent, and no scheduling
-+ * activity can take place. Using them for anything else would
-+ * be a serious bug, and as a result, they aren't even visible
-+ * under any other configuration.
-+ */
-+
-+/**
-+ * curr_task - return the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ *
-+ * Return: The current task for @cpu.
-+ */
-+struct task_struct *curr_task(int cpu)
-+{
-+	return cpu_curr(cpu);
-+}
-+
-+#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */
-+
-+#ifdef CONFIG_IA64
-+/**
-+ * ia64_set_curr_task - set the current task for a given CPU.
-+ * @cpu: the processor in question.
-+ * @p: the task pointer to set.
-+ *
-+ * Description: This function must only be used when non-maskable interrupts
-+ * are serviced on a separate stack.  It allows the architecture to switch the
-+ * notion of the current task on a CPU in a non-blocking manner.  This function
-+ * must be called with all CPU's synchronised, and interrupts disabled, the
-+ * and caller must save the original value of the current task (see
-+ * curr_task() above) and restore that value before reenabling interrupts and
-+ * re-starting the system.
-+ *
-+ * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
-+ */
-+void ia64_set_curr_task(int cpu, struct task_struct *p)
-+{
-+	cpu_curr(cpu) = p;
-+}
-+
-+#endif
-+
-+#ifdef CONFIG_CGROUP_SCHED
-+static void sched_free_group(struct task_group *tg)
-+{
-+	kmem_cache_free(task_group_cache, tg);
-+}
-+
-+static void sched_free_group_rcu(struct rcu_head *rhp)
-+{
-+	sched_free_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+static void sched_unregister_group(struct task_group *tg)
-+{
-+	/*
-+	 * We have to wait for yet another RCU grace period to expire, as
-+	 * print_cfs_stats() might run concurrently.
-+	 */
-+	call_rcu(&tg->rcu, sched_free_group_rcu);
-+}
-+
-+/* allocate runqueue etc for a new task group */
-+struct task_group *sched_create_group(struct task_group *parent)
-+{
-+	struct task_group *tg;
-+
-+	tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
-+	if (!tg)
-+		return ERR_PTR(-ENOMEM);
-+
-+	return tg;
-+}
-+
-+void sched_online_group(struct task_group *tg, struct task_group *parent)
-+{
-+}
-+
-+/* rcu callback to free various structures associated with a task group */
-+static void sched_unregister_group_rcu(struct rcu_head *rhp)
-+{
-+	/* Now it should be safe to free those cfs_rqs: */
-+	sched_unregister_group(container_of(rhp, struct task_group, rcu));
-+}
-+
-+void sched_destroy_group(struct task_group *tg)
-+{
-+	/* Wait for possible concurrent references to cfs_rqs complete: */
-+	call_rcu(&tg->rcu, sched_unregister_group_rcu);
-+}
-+
-+void sched_release_group(struct task_group *tg)
-+{
-+}
-+
-+static inline struct task_group *css_tg(struct cgroup_subsys_state *css)
-+{
-+	return css ? container_of(css, struct task_group, css) : NULL;
-+}
-+
-+static struct cgroup_subsys_state *
-+cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
-+{
-+	struct task_group *parent = css_tg(parent_css);
-+	struct task_group *tg;
-+
-+	if (!parent) {
-+		/* This is early initialization for the top cgroup */
-+		return &root_task_group.css;
-+	}
-+
-+	tg = sched_create_group(parent);
-+	if (IS_ERR(tg))
-+		return ERR_PTR(-ENOMEM);
-+	return &tg->css;
-+}
-+
-+/* Expose task group only after completing cgroup initialization */
-+static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+	struct task_group *parent = css_tg(css->parent);
-+
-+	if (parent)
-+		sched_online_group(tg, parent);
-+	return 0;
-+}
-+
-+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	sched_release_group(tg);
-+}
-+
-+static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	/*
-+	 * Relies on the RCU grace period between css_released() and this.
-+	 */
-+	sched_unregister_group(tg);
-+}
-+
-+static void cpu_cgroup_fork(struct task_struct *task)
-+{
-+}
-+
-+static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
-+{
-+	return 0;
-+}
-+
-+static void cpu_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+
-+#ifdef CONFIG_FAIR_GROUP_SCHED
-+static DEFINE_MUTEX(shares_mutex);
-+
-+int sched_group_set_shares(struct task_group *tg, unsigned long shares)
-+{
-+	/*
-+	 * We can't change the weight of the root cgroup.
-+	 */
-+	if (&root_task_group == tg)
-+		return -EINVAL;
-+
-+	shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES));
-+
-+	mutex_lock(&shares_mutex);
-+	if (tg->shares == shares)
-+		goto done;
-+
-+	tg->shares = shares;
-+done:
-+	mutex_unlock(&shares_mutex);
-+	return 0;
-+}
-+
-+static int cpu_shares_write_u64(struct cgroup_subsys_state *css,
-+				struct cftype *cftype, u64 shareval)
-+{
-+	if (shareval > scale_load_down(ULONG_MAX))
-+		shareval = MAX_SHARES;
-+	return sched_group_set_shares(css_tg(css), scale_load(shareval));
-+}
-+
-+static u64 cpu_shares_read_u64(struct cgroup_subsys_state *css,
-+			       struct cftype *cft)
-+{
-+	struct task_group *tg = css_tg(css);
-+
-+	return (u64) scale_load_down(tg->shares);
-+}
-+#endif
-+
-+static struct cftype cpu_legacy_files[] = {
-+#ifdef CONFIG_FAIR_GROUP_SCHED
-+	{
-+		.name = "shares",
-+		.read_u64 = cpu_shares_read_u64,
-+		.write_u64 = cpu_shares_write_u64,
-+	},
-+#endif
-+	{ }	/* Terminate */
-+};
-+
-+
-+static struct cftype cpu_files[] = {
-+	{ }	/* terminate */
-+};
-+
-+static int cpu_extra_stat_show(struct seq_file *sf,
-+			       struct cgroup_subsys_state *css)
-+{
-+	return 0;
-+}
-+
-+struct cgroup_subsys cpu_cgrp_subsys = {
-+	.css_alloc	= cpu_cgroup_css_alloc,
-+	.css_online	= cpu_cgroup_css_online,
-+	.css_released	= cpu_cgroup_css_released,
-+	.css_free	= cpu_cgroup_css_free,
-+	.css_extra_stat_show = cpu_extra_stat_show,
-+	.fork		= cpu_cgroup_fork,
-+	.can_attach	= cpu_cgroup_can_attach,
-+	.attach		= cpu_cgroup_attach,
-+	.legacy_cftypes	= cpu_files,
-+	.legacy_cftypes	= cpu_legacy_files,
-+	.dfl_cftypes	= cpu_files,
-+	.early_init	= true,
-+	.threaded	= true,
-+};
-+#endif	/* CONFIG_CGROUP_SCHED */
-+
-+#undef CREATE_TRACE_POINTS
-diff --git a/kernel/sched/alt_debug.c b/kernel/sched/alt_debug.c
-new file mode 100644
-index 000000000000..1212a031700e
---- /dev/null
-+++ b/kernel/sched/alt_debug.c
-@@ -0,0 +1,31 @@
-+/*
-+ * kernel/sched/alt_debug.c
-+ *
-+ * Print the alt scheduler debugging details
-+ *
-+ * Author: Alfred Chen
-+ * Date  : 2020
-+ */
-+#include "sched.h"
-+
-+/*
-+ * This allows printing both to /proc/sched_debug and
-+ * to the console
-+ */
-+#define SEQ_printf(m, x...)			\
-+ do {						\
-+	if (m)					\
-+		seq_printf(m, x);		\
-+	else					\
-+		pr_cont(x);			\
-+ } while (0)
-+
-+void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
-+			  struct seq_file *m)
-+{
-+	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns),
-+						get_nr_threads(p));
-+}
-+
-+void proc_sched_set_task(struct task_struct *p)
-+{}
-diff --git a/kernel/sched/alt_sched.h b/kernel/sched/alt_sched.h
-new file mode 100644
-index 000000000000..a181bf9ce57d
---- /dev/null
-+++ b/kernel/sched/alt_sched.h
-@@ -0,0 +1,645 @@
-+#ifndef ALT_SCHED_H
-+#define ALT_SCHED_H
-+
-+#include <linux/psi.h>
-+#include <linux/stop_machine.h>
-+#include <linux/syscalls.h>
-+#include <linux/tick.h>
-+
-+#include <trace/events/power.h>
-+#include <trace/events/sched.h>
-+
-+#include "../workqueue_internal.h"
-+
-+#include "cpupri.h"
-+
-+#ifdef CONFIG_SCHED_BMQ
-+/* bits:
-+ * RT(0-99), (Low prio adj range, nice width, high prio adj range) / 2, cpu idle task */
-+#define SCHED_BITS	(MAX_RT_PRIO + NICE_WIDTH / 2 + MAX_PRIORITY_ADJ + 1)
-+#endif
-+
-+#ifdef CONFIG_SCHED_PDS
-+/* bits: RT(0-99), reserved(100-127), NORMAL_PRIO_NUM, cpu idle task */
-+#define SCHED_BITS	(MIN_NORMAL_PRIO + NORMAL_PRIO_NUM + 1)
-+#endif /* CONFIG_SCHED_PDS */
-+
-+#define IDLE_TASK_SCHED_PRIO	(SCHED_BITS - 1)
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+# define SCHED_WARN_ON(x)	WARN_ONCE(x, #x)
-+extern void resched_latency_warn(int cpu, u64 latency);
-+#else
-+# define SCHED_WARN_ON(x)	({ (void)(x), 0; })
-+static inline void resched_latency_warn(int cpu, u64 latency) {}
-+#endif
-+
-+/*
-+ * Increase resolution of nice-level calculations for 64-bit architectures.
-+ * The extra resolution improves shares distribution and load balancing of
-+ * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
-+ * hierarchies, especially on larger systems. This is not a user-visible change
-+ * and does not change the user-interface for setting shares/weights.
-+ *
-+ * We increase resolution only if we have enough bits to allow this increased
-+ * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
-+ * are pretty high and the returns do not justify the increased costs.
-+ *
-+ * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
-+ * increase coverage and consistency always enable it on 64-bit platforms.
-+ */
-+#ifdef CONFIG_64BIT
-+# define NICE_0_LOAD_SHIFT	(SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
-+# define scale_load(w)		((w) << SCHED_FIXEDPOINT_SHIFT)
-+# define scale_load_down(w) \
-+({ \
-+	unsigned long __w = (w); \
-+	if (__w) \
-+		__w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
-+	__w; \
-+})
-+#else
-+# define NICE_0_LOAD_SHIFT	(SCHED_FIXEDPOINT_SHIFT)
-+# define scale_load(w)		(w)
-+# define scale_load_down(w)	(w)
-+#endif
-+
-+#ifdef CONFIG_FAIR_GROUP_SCHED
-+#define ROOT_TASK_GROUP_LOAD	NICE_0_LOAD
-+
-+/*
-+ * A weight of 0 or 1 can cause arithmetics problems.
-+ * A weight of a cfs_rq is the sum of weights of which entities
-+ * are queued on this cfs_rq, so a weight of a entity should not be
-+ * too large, so as the shares value of a task group.
-+ * (The default weight is 1024 - so there's no practical
-+ *  limitation from this.)
-+ */
-+#define MIN_SHARES		(1UL <<  1)
-+#define MAX_SHARES		(1UL << 18)
-+#endif
-+
-+/* task_struct::on_rq states: */
-+#define TASK_ON_RQ_QUEUED	1
-+#define TASK_ON_RQ_MIGRATING	2
-+
-+static inline int task_on_rq_queued(struct task_struct *p)
-+{
-+	return p->on_rq == TASK_ON_RQ_QUEUED;
-+}
-+
-+static inline int task_on_rq_migrating(struct task_struct *p)
-+{
-+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
-+}
-+
-+/*
-+ * wake flags
-+ */
-+#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
-+#define WF_FORK		0x02		/* child wakeup after fork */
-+#define WF_MIGRATED	0x04		/* internal use, task got migrated */
-+#define WF_ON_CPU	0x08		/* Wakee is on_rq */
-+
-+#define SCHED_QUEUE_BITS	(SCHED_BITS - 1)
-+
-+struct sched_queue {
-+	DECLARE_BITMAP(bitmap, SCHED_QUEUE_BITS);
-+	struct list_head heads[SCHED_BITS];
-+};
-+
-+/*
-+ * This is the main, per-CPU runqueue data structure.
-+ * This data should only be modified by the local cpu.
-+ */
-+struct rq {
-+	/* runqueue lock: */
-+	raw_spinlock_t lock;
-+
-+	struct task_struct __rcu *curr;
-+	struct task_struct *idle, *stop, *skip;
-+	struct mm_struct *prev_mm;
-+
-+	struct sched_queue	queue;
-+#ifdef CONFIG_SCHED_PDS
-+	u64			time_edge;
-+#endif
-+	unsigned long watermark;
-+
-+	/* switch count */
-+	u64 nr_switches;
-+
-+	atomic_t nr_iowait;
-+
-+#ifdef CONFIG_SCHED_DEBUG
-+	u64 last_seen_need_resched_ns;
-+	int ticks_without_resched;
-+#endif
-+
-+#ifdef CONFIG_MEMBARRIER
-+	int membarrier_state;
-+#endif
-+
-+#ifdef CONFIG_SMP
-+	int cpu;		/* cpu of this runqueue */
-+	bool online;
-+
-+	unsigned int		ttwu_pending;
-+	unsigned char		nohz_idle_balance;
-+	unsigned char		idle_balance;
-+
-+#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
-+	struct sched_avg	avg_irq;
-+#endif
-+
-+#ifdef CONFIG_SCHED_SMT
-+	int active_balance;
-+	struct cpu_stop_work	active_balance_work;
-+#endif
-+	struct callback_head	*balance_callback;
-+#ifdef CONFIG_HOTPLUG_CPU
-+	struct rcuwait		hotplug_wait;
-+#endif
-+	unsigned int		nr_pinned;
-+
-+#endif /* CONFIG_SMP */
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+	u64 prev_irq_time;
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+#ifdef CONFIG_PARAVIRT
-+	u64 prev_steal_time;
-+#endif /* CONFIG_PARAVIRT */
-+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
-+	u64 prev_steal_time_rq;
-+#endif /* CONFIG_PARAVIRT_TIME_ACCOUNTING */
-+
-+	/* For genenal cpu load util */
-+	s32 load_history;
-+	u64 load_block;
-+	u64 load_stamp;
-+
-+	/* calc_load related fields */
-+	unsigned long calc_load_update;
-+	long calc_load_active;
-+
-+	u64 clock, last_tick;
-+	u64 last_ts_switch;
-+	u64 clock_task;
-+
-+	unsigned int  nr_running;
-+	unsigned long nr_uninterruptible;
-+
-+#ifdef CONFIG_SCHED_HRTICK
-+#ifdef CONFIG_SMP
-+	call_single_data_t hrtick_csd;
-+#endif
-+	struct hrtimer		hrtick_timer;
-+	ktime_t			hrtick_time;
-+#endif
-+
-+#ifdef CONFIG_SCHEDSTATS
-+
-+	/* latency stats */
-+	struct sched_info rq_sched_info;
-+	unsigned long long rq_cpu_time;
-+	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
-+
-+	/* sys_sched_yield() stats */
-+	unsigned int yld_count;
-+
-+	/* schedule() stats */
-+	unsigned int sched_switch;
-+	unsigned int sched_count;
-+	unsigned int sched_goidle;
-+
-+	/* try_to_wake_up() stats */
-+	unsigned int ttwu_count;
-+	unsigned int ttwu_local;
-+#endif /* CONFIG_SCHEDSTATS */
-+
-+#ifdef CONFIG_CPU_IDLE
-+	/* Must be inspected within a rcu lock section */
-+	struct cpuidle_state *idle_state;
-+#endif
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+#ifdef CONFIG_SMP
-+	call_single_data_t	nohz_csd;
-+#endif
-+	atomic_t		nohz_flags;
-+#endif /* CONFIG_NO_HZ_COMMON */
-+};
-+
-+extern unsigned long rq_load_util(struct rq *rq, unsigned long max);
-+
-+extern unsigned long calc_load_update;
-+extern atomic_long_t calc_load_tasks;
-+
-+extern void calc_global_load_tick(struct rq *this_rq);
-+extern long calc_load_fold_active(struct rq *this_rq, long adjust);
-+
-+DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-+#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-+#define this_rq()		this_cpu_ptr(&runqueues)
-+#define task_rq(p)		cpu_rq(task_cpu(p))
-+#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-+#define raw_rq()		raw_cpu_ptr(&runqueues)
-+
-+#ifdef CONFIG_SMP
-+#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-+void register_sched_domain_sysctl(void);
-+void unregister_sched_domain_sysctl(void);
-+#else
-+static inline void register_sched_domain_sysctl(void)
-+{
-+}
-+static inline void unregister_sched_domain_sysctl(void)
-+{
-+}
-+#endif
-+
-+extern bool sched_smp_initialized;
-+
-+enum {
-+	ITSELF_LEVEL_SPACE_HOLDER,
-+#ifdef CONFIG_SCHED_SMT
-+	SMT_LEVEL_SPACE_HOLDER,
-+#endif
-+	COREGROUP_LEVEL_SPACE_HOLDER,
-+	CORE_LEVEL_SPACE_HOLDER,
-+	OTHER_LEVEL_SPACE_HOLDER,
-+	NR_CPU_AFFINITY_LEVELS
-+};
-+
-+DECLARE_PER_CPU(cpumask_t [NR_CPU_AFFINITY_LEVELS], sched_cpu_topo_masks);
-+DECLARE_PER_CPU(cpumask_t *, sched_cpu_llc_mask);
-+
-+static inline int
-+__best_mask_cpu(const cpumask_t *cpumask, const cpumask_t *mask)
-+{
-+	int cpu;
-+
-+	while ((cpu = cpumask_any_and(cpumask, mask)) >= nr_cpu_ids)
-+		mask++;
-+
-+	return cpu;
-+}
-+
-+static inline int best_mask_cpu(int cpu, const cpumask_t *mask)
-+{
-+	return __best_mask_cpu(mask, per_cpu(sched_cpu_topo_masks, cpu));
-+}
-+
-+extern void flush_smp_call_function_queue(void);
-+
-+#else  /* !CONFIG_SMP */
-+static inline void flush_smp_call_function_queue(void) { }
-+#endif
-+
-+#ifndef arch_scale_freq_tick
-+static __always_inline
-+void arch_scale_freq_tick(void)
-+{
-+}
-+#endif
-+
-+#ifndef arch_scale_freq_capacity
-+static __always_inline
-+unsigned long arch_scale_freq_capacity(int cpu)
-+{
-+	return SCHED_CAPACITY_SCALE;
-+}
-+#endif
-+
-+static inline u64 __rq_clock_broken(struct rq *rq)
-+{
-+	return READ_ONCE(rq->clock);
-+}
-+
-+static inline u64 rq_clock(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock;
-+}
-+
-+static inline u64 rq_clock_task(struct rq *rq)
-+{
-+	/*
-+	 * Relax lockdep_assert_held() checking as in VRQ, call to
-+	 * sched_info_xxxx() may not held rq->lock
-+	 * lockdep_assert_held(&rq->lock);
-+	 */
-+	return rq->clock_task;
-+}
-+
-+/*
-+ * {de,en}queue flags:
-+ *
-+ * DEQUEUE_SLEEP  - task is no longer runnable
-+ * ENQUEUE_WAKEUP - task just became runnable
-+ *
-+ */
-+
-+#define DEQUEUE_SLEEP		0x01
-+
-+#define ENQUEUE_WAKEUP		0x01
-+
-+
-+/*
-+ * Below are scheduler API which using in other kernel code
-+ * It use the dummy rq_flags
-+ * ToDo : BMQ need to support these APIs for compatibility with mainline
-+ * scheduler code.
-+ */
-+struct rq_flags {
-+	unsigned long flags;
-+};
-+
-+struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(rq->lock);
-+
-+struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
-+	__acquires(p->pi_lock)
-+	__acquires(rq->lock);
-+
-+static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+static inline void
-+task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
-+	__releases(rq->lock)
-+	__releases(p->pi_lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
-+}
-+
-+static inline void
-+rq_lock(struct rq *rq, struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	raw_spin_lock(&rq->lock);
-+}
-+
-+static inline void
-+rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock_irq(&rq->lock);
-+}
-+
-+static inline void
-+rq_unlock(struct rq *rq, struct rq_flags *rf)
-+	__releases(rq->lock)
-+{
-+	raw_spin_unlock(&rq->lock);
-+}
-+
-+static inline struct rq *
-+this_rq_lock_irq(struct rq_flags *rf)
-+	__acquires(rq->lock)
-+{
-+	struct rq *rq;
-+
-+	local_irq_disable();
-+	rq = this_rq();
-+	raw_spin_lock(&rq->lock);
-+
-+	return rq;
-+}
-+
-+static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
-+{
-+	return &rq->lock;
-+}
-+
-+static inline raw_spinlock_t *rq_lockp(struct rq *rq)
-+{
-+	return __rq_lockp(rq);
-+}
-+
-+static inline void lockdep_assert_rq_held(struct rq *rq)
-+{
-+	lockdep_assert_held(__rq_lockp(rq));
-+}
-+
-+extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass);
-+extern void raw_spin_rq_unlock(struct rq *rq);
-+
-+static inline void raw_spin_rq_lock(struct rq *rq)
-+{
-+	raw_spin_rq_lock_nested(rq, 0);
-+}
-+
-+static inline void raw_spin_rq_lock_irq(struct rq *rq)
-+{
-+	local_irq_disable();
-+	raw_spin_rq_lock(rq);
-+}
-+
-+static inline void raw_spin_rq_unlock_irq(struct rq *rq)
-+{
-+	raw_spin_rq_unlock(rq);
-+	local_irq_enable();
-+}
-+
-+static inline int task_current(struct rq *rq, struct task_struct *p)
-+{
-+	return rq->curr == p;
-+}
-+
-+static inline bool task_running(struct task_struct *p)
-+{
-+	return p->on_cpu;
-+}
-+
-+extern int task_running_nice(struct task_struct *p);
-+
-+extern struct static_key_false sched_schedstats;
-+
-+#ifdef CONFIG_CPU_IDLE
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+	rq->idle_state = idle_state;
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	WARN_ON(!rcu_read_lock_held());
-+	return rq->idle_state;
-+}
-+#else
-+static inline void idle_set_state(struct rq *rq,
-+				  struct cpuidle_state *idle_state)
-+{
-+}
-+
-+static inline struct cpuidle_state *idle_get_state(struct rq *rq)
-+{
-+	return NULL;
-+}
-+#endif
-+
-+static inline int cpu_of(const struct rq *rq)
-+{
-+#ifdef CONFIG_SMP
-+	return rq->cpu;
-+#else
-+	return 0;
-+#endif
-+}
-+
-+#include "stats.h"
-+
-+#ifdef CONFIG_NO_HZ_COMMON
-+#define NOHZ_BALANCE_KICK_BIT	0
-+#define NOHZ_STATS_KICK_BIT	1
-+
-+#define NOHZ_BALANCE_KICK	BIT(NOHZ_BALANCE_KICK_BIT)
-+#define NOHZ_STATS_KICK		BIT(NOHZ_STATS_KICK_BIT)
-+
-+#define NOHZ_KICK_MASK	(NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
-+
-+#define nohz_flags(cpu)	(&cpu_rq(cpu)->nohz_flags)
-+
-+/* TODO: needed?
-+extern void nohz_balance_exit_idle(struct rq *rq);
-+#else
-+static inline void nohz_balance_exit_idle(struct rq *rq) { }
-+*/
-+#endif
-+
-+#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-+struct irqtime {
-+	u64			total;
-+	u64			tick_delta;
-+	u64			irq_start_time;
-+	struct u64_stats_sync	sync;
-+};
-+
-+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-+
-+/*
-+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
-+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
-+ * and never move forward.
-+ */
-+static inline u64 irq_time_read(int cpu)
-+{
-+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
-+	unsigned int seq;
-+	u64 total;
-+
-+	do {
-+		seq = __u64_stats_fetch_begin(&irqtime->sync);
-+		total = irqtime->total;
-+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
-+
-+	return total;
-+}
-+#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-+
-+#ifdef CONFIG_CPU_FREQ
-+DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
-+#endif /* CONFIG_CPU_FREQ */
-+
-+#ifdef CONFIG_NO_HZ_FULL
-+extern int __init sched_tick_offload_init(void);
-+#else
-+static inline int sched_tick_offload_init(void) { return 0; }
-+#endif
-+
-+#ifdef arch_scale_freq_capacity
-+#ifndef arch_scale_freq_invariant
-+#define arch_scale_freq_invariant()	(true)
-+#endif
-+#else /* arch_scale_freq_capacity */
-+#define arch_scale_freq_invariant()	(false)
-+#endif
-+
-+extern void schedule_idle(void);
-+
-+#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
-+
-+/*
-+ * !! For sched_setattr_nocheck() (kernel) only !!
-+ *
-+ * This is actually gross. :(
-+ *
-+ * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
-+ * tasks, but still be able to sleep. We need this on platforms that cannot
-+ * atomically change clock frequency. Remove once fast switching will be
-+ * available on such platforms.
-+ *
-+ * SUGOV stands for SchedUtil GOVernor.
-+ */
-+#define SCHED_FLAG_SUGOV	0x10000000
-+
-+#ifdef CONFIG_MEMBARRIER
-+/*
-+ * The scheduler provides memory barriers required by membarrier between:
-+ * - prior user-space memory accesses and store to rq->membarrier_state,
-+ * - store to rq->membarrier_state and following user-space memory accesses.
-+ * In the same way it provides those guarantees around store to rq->curr.
-+ */
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+	int membarrier_state;
-+
-+	if (prev_mm == next_mm)
-+		return;
-+
-+	membarrier_state = atomic_read(&next_mm->membarrier_state);
-+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
-+		return;
-+
-+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
-+}
-+#else
-+static inline void membarrier_switch_mm(struct rq *rq,
-+					struct mm_struct *prev_mm,
-+					struct mm_struct *next_mm)
-+{
-+}
-+#endif
-+
-+#ifdef CONFIG_NUMA
-+extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
-+#else
-+static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return nr_cpu_ids;
-+}
-+#endif
-+
-+extern void swake_up_all_locked(struct swait_queue_head *q);
-+extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
-+
-+#ifdef CONFIG_PREEMPT_DYNAMIC
-+extern int preempt_dynamic_mode;
-+extern int sched_dynamic_mode(const char *str);
-+extern void sched_dynamic_update(int mode);
-+#endif
-+
-+static inline void nohz_run_idle_balance(int cpu) { }
-+
-+static inline
-+unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
-+				  struct task_struct *p)
-+{
-+	return util;
-+}
-+
-+static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; }
-+
-+#endif /* ALT_SCHED_H */
-diff --git a/kernel/sched/bmq.h b/kernel/sched/bmq.h
-new file mode 100644
-index 000000000000..66b77291b9d0
---- /dev/null
-+++ b/kernel/sched/bmq.h
-@@ -0,0 +1,110 @@
-+#define ALT_SCHED_VERSION_MSG "sched/bmq: BMQ CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n"
-+
-+/*
-+ * BMQ only routines
-+ */
-+#define rq_switch_time(rq)	((rq)->clock - (rq)->last_ts_switch)
-+#define boost_threshold(p)	(sched_timeslice_ns >>\
-+				 (15 - MAX_PRIORITY_ADJ -  (p)->boost_prio))
-+
-+static inline void boost_task(struct task_struct *p)
-+{
-+	int limit;
-+
-+	switch (p->policy) {
-+	case SCHED_NORMAL:
-+		limit = -MAX_PRIORITY_ADJ;
-+		break;
-+	case SCHED_BATCH:
-+	case SCHED_IDLE:
-+		limit = 0;
-+		break;
-+	default:
-+		return;
-+	}
-+
-+	if (p->boost_prio > limit)
-+		p->boost_prio--;
-+}
-+
-+static inline void deboost_task(struct task_struct *p)
-+{
-+	if (p->boost_prio < MAX_PRIORITY_ADJ)
-+		p->boost_prio++;
-+}
-+
-+/*
-+ * Common interfaces
-+ */
-+static inline void sched_timeslice_imp(const int timeslice_ms) {}
-+
-+static inline int
-+task_sched_prio_normal(const struct task_struct *p, const struct rq *rq)
-+{
-+	return p->prio + p->boost_prio - MAX_RT_PRIO;
-+}
-+
-+static inline int task_sched_prio(const struct task_struct *p)
-+{
-+	return (p->prio < MAX_RT_PRIO)? p->prio : MAX_RT_PRIO / 2 + (p->prio + p->boost_prio) / 2;
-+}
-+
-+static inline int
-+task_sched_prio_idx(const struct task_struct *p, const struct rq *rq)
-+{
-+	return task_sched_prio(p);
-+}
-+
-+static inline int sched_prio2idx(int prio, struct rq *rq)
-+{
-+	return prio;
-+}
-+
-+static inline int sched_idx2prio(int idx, struct rq *rq)
-+{
-+	return idx;
-+}
-+
-+static inline void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	p->time_slice = sched_timeslice_ns;
-+
-+	if (SCHED_FIFO != p->policy && task_on_rq_queued(p)) {
-+		if (SCHED_RR != p->policy)
-+			deboost_task(p);
-+		requeue_task(p, rq, task_sched_prio_idx(p, rq));
-+	}
-+}
-+
-+static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq) {}
-+
-+inline int task_running_nice(struct task_struct *p)
-+{
-+	return (p->prio + p->boost_prio > DEFAULT_PRIO + MAX_PRIORITY_ADJ);
-+}
-+
-+static void sched_task_fork(struct task_struct *p, struct rq *rq)
-+{
-+	p->boost_prio = MAX_PRIORITY_ADJ;
-+}
-+
-+static inline void do_sched_yield_type_1(struct task_struct *p, struct rq *rq)
-+{
-+	p->boost_prio = MAX_PRIORITY_ADJ;
-+}
-+
-+#ifdef CONFIG_SMP
-+static inline void sched_task_ttwu(struct task_struct *p)
-+{
-+	if(this_rq()->clock_task - p->last_ran > sched_timeslice_ns)
-+		boost_task(p);
-+}
-+#endif
-+
-+static inline void sched_task_deactivate(struct task_struct *p, struct rq *rq)
-+{
-+	if (rq_switch_time(rq) < boost_threshold(p))
-+		boost_task(p);
-+}
-+
-+static inline void update_rq_time_edge(struct rq *rq) {}
-diff --git a/kernel/sched/build_policy.c b/kernel/sched/build_policy.c
-index d9dc9ab3773f..71a25540d65e 100644
---- a/kernel/sched/build_policy.c
-+++ b/kernel/sched/build_policy.c
-@@ -42,13 +42,19 @@
- 
- #include "idle.c"
- 
-+#ifndef CONFIG_SCHED_ALT
- #include "rt.c"
-+#endif
- 
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_ALT
- # include "cpudeadline.c"
-+#endif
- # include "pelt.c"
- #endif
- 
- #include "cputime.c"
--#include "deadline.c"
- 
-+#ifndef CONFIG_SCHED_ALT
-+#include "deadline.c"
-+#endif
-diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c
-index 99bdd96f454f..23f80a86d2d7 100644
---- a/kernel/sched/build_utility.c
-+++ b/kernel/sched/build_utility.c
-@@ -85,7 +85,9 @@
- 
- #ifdef CONFIG_SMP
- # include "cpupri.c"
-+#ifndef CONFIG_SCHED_ALT
- # include "stop_task.c"
-+#endif
- # include "topology.c"
- #endif
- 
-diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
-index 3dbf351d12d5..b2590f961139 100644
---- a/kernel/sched/cpufreq_schedutil.c
-+++ b/kernel/sched/cpufreq_schedutil.c
-@@ -160,9 +160,14 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
- 	unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu);
- 
- 	sg_cpu->max = max;
-+#ifndef CONFIG_SCHED_ALT
- 	sg_cpu->bw_dl = cpu_bw_dl(rq);
- 	sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), max,
- 					  FREQUENCY_UTIL, NULL);
-+#else
-+	sg_cpu->bw_dl = 0;
-+	sg_cpu->util = rq_load_util(rq, max);
-+#endif /* CONFIG_SCHED_ALT */
- }
- 
- /**
-@@ -306,8 +311,10 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
-  */
- static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
- {
-+#ifndef CONFIG_SCHED_ALT
- 	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
- 		sg_cpu->sg_policy->limits_changed = true;
-+#endif
- }
- 
- static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
-@@ -607,6 +614,7 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
- 	}
- 
- 	ret = sched_setattr_nocheck(thread, &attr);
-+
- 	if (ret) {
- 		kthread_stop(thread);
- 		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
-@@ -839,7 +847,9 @@ cpufreq_governor_init(schedutil_gov);
- #ifdef CONFIG_ENERGY_MODEL
- static void rebuild_sd_workfn(struct work_struct *work)
- {
-+#ifndef CONFIG_SCHED_ALT
- 	rebuild_sched_domains_energy();
-+#endif /* CONFIG_SCHED_ALT */
- }
- static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
- 
-diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
-index 78a233d43757..b3bbc87d4352 100644
---- a/kernel/sched/cputime.c
-+++ b/kernel/sched/cputime.c
-@@ -122,7 +122,7 @@ void account_user_time(struct task_struct *p, u64 cputime)
- 	p->utime += cputime;
- 	account_group_user_time(p, cputime);
- 
--	index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
-+	index = task_running_nice(p) ? CPUTIME_NICE : CPUTIME_USER;
- 
- 	/* Add user time to cpustat. */
- 	task_group_account_field(p, index, cputime);
-@@ -146,7 +146,7 @@ void account_guest_time(struct task_struct *p, u64 cputime)
- 	p->gtime += cputime;
- 
- 	/* Add guest time to cpustat. */
--	if (task_nice(p) > 0) {
-+	if (task_running_nice(p)) {
- 		task_group_account_field(p, CPUTIME_NICE, cputime);
- 		cpustat[CPUTIME_GUEST_NICE] += cputime;
- 	} else {
-@@ -269,7 +269,7 @@ static inline u64 account_other_time(u64 max)
- #ifdef CONFIG_64BIT
- static inline u64 read_sum_exec_runtime(struct task_struct *t)
- {
--	return t->se.sum_exec_runtime;
-+	return tsk_seruntime(t);
- }
- #else
- static u64 read_sum_exec_runtime(struct task_struct *t)
-@@ -279,7 +279,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t)
- 	struct rq *rq;
- 
- 	rq = task_rq_lock(t, &rf);
--	ns = t->se.sum_exec_runtime;
-+	ns = tsk_seruntime(t);
- 	task_rq_unlock(rq, t, &rf);
- 
- 	return ns;
-@@ -611,7 +611,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
- void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
- {
- 	struct task_cputime cputime = {
--		.sum_exec_runtime = p->se.sum_exec_runtime,
-+		.sum_exec_runtime = tsk_seruntime(p),
- 	};
- 
- 	if (task_cputime(p, &cputime.utime, &cputime.stime))
-diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
-index bb3d63bdf4ae..4e1680785704 100644
---- a/kernel/sched/debug.c
-+++ b/kernel/sched/debug.c
-@@ -7,6 +7,7 @@
-  * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
-  */
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * This allows printing both to /proc/sched_debug and
-  * to the console
-@@ -215,6 +216,7 @@ static const struct file_operations sched_scaling_fops = {
- };
- 
- #endif /* SMP */
-+#endif /* !CONFIG_SCHED_ALT */
- 
- #ifdef CONFIG_PREEMPT_DYNAMIC
- 
-@@ -278,6 +280,7 @@ static const struct file_operations sched_dynamic_fops = {
- 
- #endif /* CONFIG_PREEMPT_DYNAMIC */
- 
-+#ifndef CONFIG_SCHED_ALT
- __read_mostly bool sched_debug_verbose;
- 
- static const struct seq_operations sched_debug_sops;
-@@ -293,6 +296,7 @@ static const struct file_operations sched_debug_fops = {
- 	.llseek		= seq_lseek,
- 	.release	= seq_release,
- };
-+#endif /* !CONFIG_SCHED_ALT */
- 
- static struct dentry *debugfs_sched;
- 
-@@ -302,12 +306,15 @@ static __init int sched_init_debug(void)
- 
- 	debugfs_sched = debugfs_create_dir("sched", NULL);
- 
-+#ifndef CONFIG_SCHED_ALT
- 	debugfs_create_file("features", 0644, debugfs_sched, NULL, &sched_feat_fops);
- 	debugfs_create_bool("verbose", 0644, debugfs_sched, &sched_debug_verbose);
-+#endif /* !CONFIG_SCHED_ALT */
- #ifdef CONFIG_PREEMPT_DYNAMIC
- 	debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
- #endif
- 
-+#ifndef CONFIG_SCHED_ALT
- 	debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency);
- 	debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity);
- 	debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity);
-@@ -336,11 +343,13 @@ static __init int sched_init_debug(void)
- #endif
- 
- 	debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops);
-+#endif /* !CONFIG_SCHED_ALT */
- 
- 	return 0;
- }
- late_initcall(sched_init_debug);
- 
-+#ifndef CONFIG_SCHED_ALT
- #ifdef CONFIG_SMP
- 
- static cpumask_var_t		sd_sysctl_cpus;
-@@ -1067,6 +1076,7 @@ void proc_sched_set_task(struct task_struct *p)
- 	memset(&p->stats, 0, sizeof(p->stats));
- #endif
- }
-+#endif /* !CONFIG_SCHED_ALT */
- 
- void resched_latency_warn(int cpu, u64 latency)
- {
-diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
-index 328cccbee444..aef991facc79 100644
---- a/kernel/sched/idle.c
-+++ b/kernel/sched/idle.c
-@@ -400,6 +400,7 @@ void cpu_startup_entry(enum cpuhp_state state)
- 		do_idle();
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * idle-task scheduling class.
-  */
-@@ -521,3 +522,4 @@ DEFINE_SCHED_CLASS(idle) = {
- 	.switched_to		= switched_to_idle,
- 	.update_curr		= update_curr_idle,
- };
-+#endif
-diff --git a/kernel/sched/pds.h b/kernel/sched/pds.h
-new file mode 100644
-index 000000000000..56a649d02e49
---- /dev/null
-+++ b/kernel/sched/pds.h
-@@ -0,0 +1,127 @@
-+#define ALT_SCHED_VERSION_MSG "sched/pds: PDS CPU Scheduler "ALT_SCHED_VERSION" by Alfred Chen.\n"
-+
-+static int sched_timeslice_shift = 22;
-+
-+#define NORMAL_PRIO_MOD(x)	((x) & (NORMAL_PRIO_NUM - 1))
-+
-+/*
-+ * Common interfaces
-+ */
-+static inline void sched_timeslice_imp(const int timeslice_ms)
-+{
-+	if (2 == timeslice_ms)
-+		sched_timeslice_shift = 21;
-+}
-+
-+static inline int
-+task_sched_prio_normal(const struct task_struct *p, const struct rq *rq)
-+{
-+	s64 delta = p->deadline - rq->time_edge + NORMAL_PRIO_NUM - NICE_WIDTH;
-+
-+	if (WARN_ONCE(delta > NORMAL_PRIO_NUM - 1,
-+		      "pds: task_sched_prio_normal() delta %lld\n", delta))
-+		return NORMAL_PRIO_NUM - 1;
-+
-+	return (delta < 0) ? 0 : delta;
-+}
-+
-+static inline int task_sched_prio(const struct task_struct *p)
-+{
-+	return (p->prio < MAX_RT_PRIO) ? p->prio :
-+		MIN_NORMAL_PRIO + task_sched_prio_normal(p, task_rq(p));
-+}
-+
-+static inline int
-+task_sched_prio_idx(const struct task_struct *p, const struct rq *rq)
-+{
-+	return (p->prio < MAX_RT_PRIO) ? p->prio : MIN_NORMAL_PRIO +
-+		NORMAL_PRIO_MOD(task_sched_prio_normal(p, rq) + rq->time_edge);
-+}
-+
-+static inline int sched_prio2idx(int prio, struct rq *rq)
-+{
-+	return (IDLE_TASK_SCHED_PRIO == prio || prio < MAX_RT_PRIO) ? prio :
-+		MIN_NORMAL_PRIO + NORMAL_PRIO_MOD((prio - MIN_NORMAL_PRIO) +
-+						  rq->time_edge);
-+}
-+
-+static inline int sched_idx2prio(int idx, struct rq *rq)
-+{
-+	return (idx < MAX_RT_PRIO) ? idx : MIN_NORMAL_PRIO +
-+		NORMAL_PRIO_MOD((idx - MIN_NORMAL_PRIO) + NORMAL_PRIO_NUM -
-+				NORMAL_PRIO_MOD(rq->time_edge));
-+}
-+
-+static inline void sched_renew_deadline(struct task_struct *p, const struct rq *rq)
-+{
-+	if (p->prio >= MAX_RT_PRIO)
-+		p->deadline = (rq->clock >> sched_timeslice_shift) +
-+			p->static_prio - (MAX_PRIO - NICE_WIDTH);
-+}
-+
-+int task_running_nice(struct task_struct *p)
-+{
-+	return (p->prio > DEFAULT_PRIO);
-+}
-+
-+static inline void update_rq_time_edge(struct rq *rq)
-+{
-+	struct list_head head;
-+	u64 old = rq->time_edge;
-+	u64 now = rq->clock >> sched_timeslice_shift;
-+	u64 prio, delta;
-+
-+	if (now == old)
-+		return;
-+
-+	delta = min_t(u64, NORMAL_PRIO_NUM, now - old);
-+	INIT_LIST_HEAD(&head);
-+
-+	for_each_set_bit(prio, &rq->queue.bitmap[2], delta)
-+		list_splice_tail_init(rq->queue.heads + MIN_NORMAL_PRIO +
-+				      NORMAL_PRIO_MOD(prio + old), &head);
-+
-+	rq->queue.bitmap[2] = (NORMAL_PRIO_NUM == delta) ? 0UL :
-+		rq->queue.bitmap[2] >> delta;
-+	rq->time_edge = now;
-+	if (!list_empty(&head)) {
-+		u64 idx = MIN_NORMAL_PRIO + NORMAL_PRIO_MOD(now);
-+		struct task_struct *p;
-+
-+		list_for_each_entry(p, &head, sq_node)
-+			p->sq_idx = idx;
-+
-+		list_splice(&head, rq->queue.heads + idx);
-+		rq->queue.bitmap[2] |= 1UL;
-+	}
-+}
-+
-+static inline void time_slice_expired(struct task_struct *p, struct rq *rq)
-+{
-+	p->time_slice = sched_timeslice_ns;
-+	sched_renew_deadline(p, rq);
-+	if (SCHED_FIFO != p->policy && task_on_rq_queued(p))
-+		requeue_task(p, rq, task_sched_prio_idx(p, rq));
-+}
-+
-+static inline void sched_task_sanity_check(struct task_struct *p, struct rq *rq)
-+{
-+	u64 max_dl = rq->time_edge + NICE_WIDTH - 1;
-+	if (unlikely(p->deadline > max_dl))
-+		p->deadline = max_dl;
-+}
-+
-+static void sched_task_fork(struct task_struct *p, struct rq *rq)
-+{
-+	sched_renew_deadline(p, rq);
-+}
-+
-+static inline void do_sched_yield_type_1(struct task_struct *p, struct rq *rq)
-+{
-+	time_slice_expired(p, rq);
-+}
-+
-+#ifdef CONFIG_SMP
-+static inline void sched_task_ttwu(struct task_struct *p) {}
-+#endif
-+static inline void sched_task_deactivate(struct task_struct *p, struct rq *rq) {}
-diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
-index 0f310768260c..bd38bf738fe9 100644
---- a/kernel/sched/pelt.c
-+++ b/kernel/sched/pelt.c
-@@ -266,6 +266,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load)
- 	WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- /*
-  * sched_entity:
-  *
-@@ -383,8 +384,9 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- 
- 	return 0;
- }
-+#endif
- 
--#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-+#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT)
- /*
-  * thermal:
-  *
-diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
-index 4ff2ed4f8fa1..226eeed61318 100644
---- a/kernel/sched/pelt.h
-+++ b/kernel/sched/pelt.h
-@@ -1,13 +1,15 @@
- #ifdef CONFIG_SMP
- #include "sched-pelt.h"
- 
-+#ifndef CONFIG_SCHED_ALT
- int __update_load_avg_blocked_se(u64 now, struct sched_entity *se);
- int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se);
- int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
- int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
- int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
-+#endif
- 
--#ifdef CONFIG_SCHED_THERMAL_PRESSURE
-+#if defined(CONFIG_SCHED_THERMAL_PRESSURE) && !defined(CONFIG_SCHED_ALT)
- int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity);
- 
- static inline u64 thermal_load_avg(struct rq *rq)
-@@ -44,6 +46,7 @@ static inline u32 get_pelt_divider(struct sched_avg *avg)
- 	return PELT_MIN_DIVIDER + avg->period_contrib;
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- static inline void cfs_se_util_change(struct sched_avg *avg)
- {
- 	unsigned int enqueued;
-@@ -155,9 +158,11 @@ static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
- 	return rq_clock_pelt(rq_of(cfs_rq));
- }
- #endif
-+#endif /* CONFIG_SCHED_ALT */
- 
- #else
- 
-+#ifndef CONFIG_SCHED_ALT
- static inline int
- update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
- {
-@@ -175,6 +180,7 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
- {
- 	return 0;
- }
-+#endif
- 
- static inline int
- update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
-diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
-index 47b89a0fc6e5..de2641a32c22 100644
---- a/kernel/sched/sched.h
-+++ b/kernel/sched/sched.h
-@@ -5,6 +5,10 @@
- #ifndef _KERNEL_SCHED_SCHED_H
- #define _KERNEL_SCHED_SCHED_H
- 
-+#ifdef CONFIG_SCHED_ALT
-+#include "alt_sched.h"
-+#else
-+
- #include <linux/sched/affinity.h>
- #include <linux/sched/autogroup.h>
- #include <linux/sched/cpufreq.h>
-@@ -3116,4 +3120,9 @@ extern int sched_dynamic_mode(const char *str);
- extern void sched_dynamic_update(int mode);
- #endif
- 
-+static inline int task_running_nice(struct task_struct *p)
-+{
-+	return (task_nice(p) > 0);
-+}
-+#endif /* !CONFIG_SCHED_ALT */
- #endif /* _KERNEL_SCHED_SCHED_H */
-diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
-index 857f837f52cb..5486c63e4790 100644
---- a/kernel/sched/stats.c
-+++ b/kernel/sched/stats.c
-@@ -125,8 +125,10 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 	} else {
- 		struct rq *rq;
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_ALT
- 		struct sched_domain *sd;
- 		int dcount = 0;
-+#endif
- #endif
- 		cpu = (unsigned long)(v - 2);
- 		rq = cpu_rq(cpu);
-@@ -143,6 +145,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 		seq_printf(seq, "\n");
- 
- #ifdef CONFIG_SMP
-+#ifndef CONFIG_SCHED_ALT
- 		/* domain-specific stats */
- 		rcu_read_lock();
- 		for_each_domain(cpu, sd) {
-@@ -171,6 +174,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
- 			    sd->ttwu_move_balance);
- 		}
- 		rcu_read_unlock();
-+#endif
- #endif
- 	}
- 	return 0;
-diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
-index baa839c1ba96..15238be0581b 100644
---- a/kernel/sched/stats.h
-+++ b/kernel/sched/stats.h
-@@ -89,6 +89,7 @@ static inline void rq_sched_info_depart  (struct rq *rq, unsigned long long delt
- 
- #endif /* CONFIG_SCHEDSTATS */
- 
-+#ifndef CONFIG_SCHED_ALT
- #ifdef CONFIG_FAIR_GROUP_SCHED
- struct sched_entity_stats {
- 	struct sched_entity     se;
-@@ -105,6 +106,7 @@ __schedstats_from_se(struct sched_entity *se)
- #endif
- 	return &task_of(se)->stats;
- }
-+#endif /* CONFIG_SCHED_ALT */
- 
- #ifdef CONFIG_PSI
- /*
-diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
-index 05b6c2ad90b9..480ef393b3c9 100644
---- a/kernel/sched/topology.c
-+++ b/kernel/sched/topology.c
-@@ -3,6 +3,7 @@
-  * Scheduler topology setup/handling methods
-  */
- 
-+#ifndef CONFIG_SCHED_ALT
- DEFINE_MUTEX(sched_domains_mutex);
- 
- /* Protected by sched_domains_mutex: */
-@@ -1413,8 +1414,10 @@ static void asym_cpu_capacity_scan(void)
-  */
- 
- static int default_relax_domain_level = -1;
-+#endif /* CONFIG_SCHED_ALT */
- int sched_domain_level_max;
- 
-+#ifndef CONFIG_SCHED_ALT
- static int __init setup_relax_domain_level(char *str)
- {
- 	if (kstrtoint(str, 0, &default_relax_domain_level))
-@@ -1647,6 +1650,7 @@ sd_init(struct sched_domain_topology_level *tl,
- 
- 	return sd;
- }
-+#endif /* CONFIG_SCHED_ALT */
- 
- /*
-  * Topology list, bottom-up.
-@@ -1683,6 +1687,7 @@ void set_sched_topology(struct sched_domain_topology_level *tl)
- 	sched_domain_topology_saved = NULL;
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- #ifdef CONFIG_NUMA
- 
- static const struct cpumask *sd_numa_mask(int cpu)
-@@ -2638,3 +2643,15 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
- 	partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
- 	mutex_unlock(&sched_domains_mutex);
- }
-+#else /* CONFIG_SCHED_ALT */
-+void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-+			     struct sched_domain_attr *dattr_new)
-+{}
-+
-+#ifdef CONFIG_NUMA
-+int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
-+{
-+	return best_mask_cpu(cpu, cpus);
-+}
-+#endif /* CONFIG_NUMA */
-+#endif
-diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index 35d034219513..23719c728677 100644
---- a/kernel/sysctl.c
-+++ b/kernel/sysctl.c
-@@ -86,6 +86,10 @@
- 
- /* Constants used for minimum and  maximum */
- 
-+#ifdef CONFIG_SCHED_ALT
-+extern int sched_yield_type;
-+#endif
-+
- #ifdef CONFIG_PERF_EVENTS
- static const int six_hundred_forty_kb = 640 * 1024;
- #endif
-@@ -1590,6 +1594,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
- }
- 
- static struct ctl_table kern_table[] = {
-+#ifndef CONFIG_SCHED_ALT
- #ifdef CONFIG_NUMA_BALANCING
- 	{
- 		.procname	= "numa_balancing",
-@@ -1601,6 +1606,7 @@ static struct ctl_table kern_table[] = {
- 		.extra2		= SYSCTL_FOUR,
- 	},
- #endif /* CONFIG_NUMA_BALANCING */
-+#endif /* !CONFIG_SCHED_ALT */
- 	{
- 		.procname	= "panic",
- 		.data		= &panic_timeout,
-@@ -1902,6 +1908,17 @@ static struct ctl_table kern_table[] = {
- 		.proc_handler	= proc_dointvec,
- 	},
- #endif
-+#ifdef CONFIG_SCHED_ALT
-+	{
-+		.procname	= "yield_type",
-+		.data		= &sched_yield_type,
-+		.maxlen		= sizeof (int),
-+		.mode		= 0644,
-+		.proc_handler	= &proc_dointvec_minmax,
-+		.extra1		= SYSCTL_ZERO,
-+		.extra2		= SYSCTL_TWO,
-+	},
-+#endif
- #if defined(CONFIG_S390) && defined(CONFIG_SMP)
- 	{
- 		.procname	= "spin_retry",
-diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
-index 0ea8702eb516..a27a0f3a654d 100644
---- a/kernel/time/hrtimer.c
-+++ b/kernel/time/hrtimer.c
-@@ -2088,8 +2088,10 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
- 	int ret = 0;
- 	u64 slack;
- 
-+#ifndef CONFIG_SCHED_ALT
- 	slack = current->timer_slack_ns;
- 	if (dl_task(current) || rt_task(current))
-+#endif
- 		slack = 0;
- 
- 	hrtimer_init_sleeper_on_stack(&t, clockid, mode);
-diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
-index cb925e8ef9a8..67d823510f5c 100644
---- a/kernel/time/posix-cpu-timers.c
-+++ b/kernel/time/posix-cpu-timers.c
-@@ -223,7 +223,7 @@ static void task_sample_cputime(struct task_struct *p, u64 *samples)
- 	u64 stime, utime;
- 
- 	task_cputime(p, &utime, &stime);
--	store_samples(samples, stime, utime, p->se.sum_exec_runtime);
-+	store_samples(samples, stime, utime, tsk_seruntime(p));
- }
- 
- static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
-@@ -866,6 +866,7 @@ static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
- 	}
- }
- 
-+#ifndef CONFIG_SCHED_ALT
- static inline void check_dl_overrun(struct task_struct *tsk)
- {
- 	if (tsk->dl.dl_overrun) {
-@@ -873,6 +874,7 @@ static inline void check_dl_overrun(struct task_struct *tsk)
- 		send_signal_locked(SIGXCPU, SEND_SIG_PRIV, tsk, PIDTYPE_TGID);
- 	}
- }
-+#endif
- 
- static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
- {
-@@ -900,8 +902,10 @@ static void check_thread_timers(struct task_struct *tsk,
- 	u64 samples[CPUCLOCK_MAX];
- 	unsigned long soft;
- 
-+#ifndef CONFIG_SCHED_ALT
- 	if (dl_task(tsk))
- 		check_dl_overrun(tsk);
-+#endif
- 
- 	if (expiry_cache_is_inactive(pct))
- 		return;
-@@ -915,7 +919,7 @@ static void check_thread_timers(struct task_struct *tsk,
- 	soft = task_rlimit(tsk, RLIMIT_RTTIME);
- 	if (soft != RLIM_INFINITY) {
- 		/* Task RT timeout is accounted in jiffies. RTTIME is usec */
--		unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
-+		unsigned long rttime = tsk_rttimeout(tsk) * (USEC_PER_SEC / HZ);
- 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
- 
- 		/* At the hard limit, send SIGKILL. No further action. */
-@@ -1151,8 +1155,10 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
- 			return true;
- 	}
- 
-+#ifndef CONFIG_SCHED_ALT
- 	if (dl_task(tsk) && tsk->dl.dl_overrun)
- 		return true;
-+#endif
- 
- 	return false;
- }
-diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
-index a2d301f58ced..2ccdede8585c 100644
---- a/kernel/trace/trace_selftest.c
-+++ b/kernel/trace/trace_selftest.c
-@@ -1143,10 +1143,15 @@ static int trace_wakeup_test_thread(void *data)
- {
- 	/* Make this a -deadline thread */
- 	static const struct sched_attr attr = {
-+#ifdef CONFIG_SCHED_ALT
-+		/* No deadline on BMQ/PDS, use RR */
-+		.sched_policy = SCHED_RR,
-+#else
- 		.sched_policy = SCHED_DEADLINE,
- 		.sched_runtime = 100000ULL,
- 		.sched_deadline = 10000000ULL,
- 		.sched_period = 10000000ULL
-+#endif
- 	};
- 	struct wakeup_test_data *x = data;
- 
diff --git a/sys-kernel/pinephone-pro-sources/files/5021_BMQ-and-PDS-gentoo-defaults.patch b/sys-kernel/pinephone-pro-sources/files/5021_BMQ-and-PDS-gentoo-defaults.patch
deleted file mode 100644
index 6b2049d..0000000
--- a/sys-kernel/pinephone-pro-sources/files/5021_BMQ-and-PDS-gentoo-defaults.patch
+++ /dev/null
@@ -1,13 +0,0 @@
---- a/init/Kconfig	2022-07-07 13:22:00.698439887 -0400
-+++ b/init/Kconfig	2022-07-07 13:23:45.152333576 -0400
-@@ -874,8 +874,9 @@ config UCLAMP_BUCKETS_COUNT
- 	  If in doubt, use the default value.
- 
- menuconfig SCHED_ALT
-+	depends on X86_64
- 	bool "Alternative CPU Schedulers"
--	default y
-+	default n
- 	help
- 	  This feature enable alternative CPU scheduler"
- 
diff --git a/sys-kernel/pinephone-pro-sources/pinephone-pro-sources-5.19.3.ebuild b/sys-kernel/pinephone-pro-sources/pinephone-pro-sources-5.19.3.ebuild
deleted file mode 100644
index f6def35..0000000
--- a/sys-kernel/pinephone-pro-sources/pinephone-pro-sources-5.19.3.ebuild
+++ /dev/null
@@ -1,70 +0,0 @@
-# Copyright 1999-2021 Gentoo Authors
-# Distributed under the terms of the GNU General Public License v2
-
-EAPI="8"
-K_NOUSENAME="yes"
-K_NOSETEXTRAVERSION="yes"
-K_SECURITY_UNSUPPORTED="1"
-K_GENPATCHES_VER="1"
-ETYPE="sources"
-inherit kernel-2
-detect_version
-
-KEYWORDS="~arm64"
-
-DEPEND="${RDEPEND}
-	>=sys-devel/patch-2.7.5"
-
-DESCRIPTION="Full sources for the Linux kernel, with megi's patch for pinephone and gentoo patchset"
-
-MEGI_TAG="orange-pi-5.19-20220822-1337"
-SRC_URI="https://github.com/megous/linux/archive/${MEGI_TAG}.tar.gz"
-
-PATCHES=(
-	#Gentoo Patches
-	${FILESDIR}/1500_XATTR_USER_PREFIX.patch
-	${FILESDIR}/1510_fs-enable-link-security-restrictions-by-default.patch
-	${FILESDIR}/1700_sparc-address-warray-bound-warnings.patch
-	${FILESDIR}/2000_BT-Check-key-sizes-only-if-Secure-Simple-Pairing-enabled.patch
-	${FILESDIR}/2900_tmp513-Fix-build-issue-by-selecting-CONFIG_REG.patch
-	${FILESDIR}/2920_sign-file-patch-for-libressl.patch
-	${FILESDIR}/3000_Support-printing-firmware-info.patch
-	${FILESDIR}/4567_distro-Gentoo-Kconfig.patch
-	${FILESDIR}/5010_enable-cpu-optimizations-universal.patch
-	${FILESDIR}/5020_BMQ-and-PDS-io-scheduler-v5.19-r0.patch
-	${FILESDIR}/5021_BMQ-and-PDS-gentoo-defaults.patch
-
-	#PinePhone Patches
-	${FILESDIR}/0101-arm64-dts-pinephone-drop-modem-power-node.patch
-	${FILESDIR}/0102-arm64-dts-pinephone-pro-remove-modem-node.patch
-	${FILESDIR}/0103-arm64-dts-rk3399-pinephone-pro-add-modem-RI-pin.patch
-)
-
-S="${WORKDIR}/linux-${MEGI_TAG}"
-
-src_unpack() {
-	default
-}
-
-src_prepare() {
-	default
-	eapply_user
-}
-
-pkg_postinst() {
-	kernel-2_pkg_postinst
-	einfo "To build and install the kernel use the following commands:"
-	einfo "# make Image modules"
-	einfo "# make DTC_FLAGS="-@" dtbs"
-	einfo "# cp arch/arm64/boot/Image /boot"
-	einfo "# make INSTALL_MOD_PATH=/ modules_intall"
-	einfo "# make INSTALL_DTBS_PATH=/boot/dtbs dtbs_install"
-	einfo "You will need to create and initramfs afterwards."
-	einfo "If you use dracut you can run:"
-	einfo "# dracut -m \"rootfs-block base\" --host-only --kver 5.19.2-pinehone-gentoo-arm64"
-	einfo "Change 5.19.2-pinehone-gentoo-arm64 to your kernel version installed in /lib/modules"
-}
-
-pkg_postrm() {
-	kernel-2_pkg_postrm
-}
diff --git a/sys-kernel/pinephone-sources/Manifest b/sys-kernel/pinephone-sources/Manifest
index 3a33b67..91e1408 100644
--- a/sys-kernel/pinephone-sources/Manifest
+++ b/sys-kernel/pinephone-sources/Manifest
@@ -1 +1 @@
-DIST orange-pi-5.19-20220802-0940.tar.gz 214990340 BLAKE2B 9bbadd06a8d160d716838d709f7ca6adb6143cb2205337940fb2d4607f0b806400cc77fb4abd36856844536b0a4ced92737658fc7af60d10f141a21116d66eed SHA512 04d46f6065a138d3b206937fada3990f823a1937c14812bada6512d04ebf1c7634cdea0a57611066bd2b4951a38c8e354b187bffe2ca738f2fe2a3f50d922dc2
+DIST orange-pi-5.19-20220909-1622.tar.gz 215047997 BLAKE2B 8d9b57d5e4c52e08caf97749912ba14eff7b328eb8fa6e00ba5a7f3bf47b4064c1272162602fdbda9852eea6f7473033c01b491ef09ca6a9aa3ee0f1375145ac SHA512 c2d085522c0332d6b95dde22af92c7c2a8941f94714d9d2c83249d4ddd921fe0a85226b8a09715ca37dfe0874315dd97d0d4c5511f8fe315cb29a9fef99a1109
diff --git a/sys-kernel/pinephone-pro-sources/files/0103-arm64-dts-rk3399-pinephone-pro-add-modem-RI-pin.patch b/sys-kernel/pinephone-sources/files/0103-arm64-dts-rk3399-pinephone-pro-add-modem-RI-pin.patch
similarity index 100%
rename from sys-kernel/pinephone-pro-sources/files/0103-arm64-dts-rk3399-pinephone-pro-add-modem-RI-pin.patch
rename to sys-kernel/pinephone-sources/files/0103-arm64-dts-rk3399-pinephone-pro-add-modem-RI-pin.patch
diff --git a/sys-kernel/pinephone-sources/files/0104-PPP-Add-reset-resume-to-usb_wwan.patch b/sys-kernel/pinephone-sources/files/0104-PPP-Add-reset-resume-to-usb_wwan.patch
new file mode 100644
index 0000000..be8499f
--- /dev/null
+++ b/sys-kernel/pinephone-sources/files/0104-PPP-Add-reset-resume-to-usb_wwan.patch
@@ -0,0 +1,21 @@
+From 94ee175a91b2c132ca3068ee04cb2766c9f47cd7 Mon Sep 17 00:00:00 2001
+From: Hendrik Borghorst <hendrikborghorst@gmail.com>
+Date: Fri, 10 Jun 2022 15:36:29 +0200
+Subject: [PATCH] PPP: Add reset resume to usb_wwan
+
+---
+ drivers/usb/serial/option.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index e60425bbf53764..08fb844c534bf6 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -2176,6 +2176,7 @@ static struct usb_serial_driver option_1port_device = {
+ #ifdef CONFIG_PM
+ 	.suspend           = usb_wwan_suspend,
+ 	.resume            = usb_wwan_resume,
++	.reset_resume            = usb_wwan_resume,
+ #endif
+ };
+ 
diff --git a/sys-kernel/pinephone-sources/files/0104-Revert-usb-quirks-Add-USB_QUIRK_RESET-for-Quectel-EG25G.patch b/sys-kernel/pinephone-sources/files/0104-Revert-usb-quirks-Add-USB_QUIRK_RESET-for-Quectel-EG25G.patch
new file mode 100644
index 0000000..daf2895
--- /dev/null
+++ b/sys-kernel/pinephone-sources/files/0104-Revert-usb-quirks-Add-USB_QUIRK_RESET-for-Quectel-EG25G.patch
@@ -0,0 +1,25 @@
+From f57b0185c93bc94c3fedbcbb274d3e032972301a Mon Sep 17 00:00:00 2001
+From: Hendrik Borghorst <hendrikborghorst@gmail.com>
+Date: Fri, 10 Jun 2022 09:19:39 +0200
+Subject: [PATCH] Revert "usb: quirks: Add USB_QUIRK_RESET for Quectel EG25G
+ Modem"
+
+This reverts commit 62867934f6251349e1352a345f827ba8de514a36.
+---
+ drivers/usb/core/quirks.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
+index 17c3d472fb1304..f99a65a64588fe 100644
+--- a/drivers/usb/core/quirks.c
++++ b/drivers/usb/core/quirks.c
+@@ -519,9 +519,6 @@ static const struct usb_device_id usb_quirk_list[] = {
+ 	/* INTEL VALUE SSD */
+ 	{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
+ 
+-	/* Quectel EG25G Modem */
+-	{ USB_DEVICE(0x2c7c, 0x0125), .driver_info = USB_QUIRK_RESET },
+-
+ 	{ }  /* terminating entry must be last */
+ };
+ 
diff --git a/sys-kernel/pinephone-sources/files/0104-rk818_charger-use-type-battery-again.patch b/sys-kernel/pinephone-sources/files/0104-rk818_charger-use-type-battery-again.patch
new file mode 100644
index 0000000..74ed979
--- /dev/null
+++ b/sys-kernel/pinephone-sources/files/0104-rk818_charger-use-type-battery-again.patch
@@ -0,0 +1,11 @@
+--- a/drivers/power/supply/rk818_charger.c	2022-01-28 17:51:57.000000000 +0100
++++ b/drivers/power/supply/rk818_charger.c	2022-02-02 15:06:51.303222817 +0100
+@@ -522,7 +522,7 @@ static enum power_supply_property rk818_
+  */
+ static const struct power_supply_desc rk818_charger_desc = {
+ 	.name			= "rk818-charger",
+-	.type			= POWER_SUPPLY_TYPE_MAINS,
++	.type			= POWER_SUPPLY_TYPE_BATTERY,
+ 	.properties		= rk818_charger_props,
+ 	.num_properties		= ARRAY_SIZE(rk818_charger_props),
+ 	.property_is_writeable	= rk818_charger_prop_writeable,
diff --git a/sys-kernel/pinephone-sources/files/0106-sound-rockchip-i2s-Dont-disable-mclk-on-suspend.patch b/sys-kernel/pinephone-sources/files/0106-sound-rockchip-i2s-Dont-disable-mclk-on-suspend.patch
new file mode 100644
index 0000000..04cc463
--- /dev/null
+++ b/sys-kernel/pinephone-sources/files/0106-sound-rockchip-i2s-Dont-disable-mclk-on-suspend.patch
@@ -0,0 +1,29 @@
+From 5f41055235786657509233557a3ca2950c401ec5 Mon Sep 17 00:00:00 2001
+From: marcin <marcin@ipv8.pl>
+Date: Wed, 15 Jun 2022 03:46:13 +0200
+Subject: [PATCH] sound/rockchip/i2s: Don't disable mclk on suspend
+
+This is a workaround to fix an issue with high-pitch sound after
+suspend.
+
+This patch is actually authored by Biktorgj
+---
+ sound/soc/rockchip/rockchip_i2s.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c
+index 4ce5d2579387..1f9d4e5e36b2 100644
+--- a/sound/soc/rockchip/rockchip_i2s.c
++++ b/sound/soc/rockchip/rockchip_i2s.c
+@@ -61,7 +61,7 @@ static int i2s_runtime_suspend(struct device *dev)
+ 	struct rk_i2s_dev *i2s = dev_get_drvdata(dev);
+ 
+ 	regcache_cache_only(i2s->regmap, true);
+-	clk_disable_unprepare(i2s->mclk);
++	//clk_disable_unprepare(i2s->mclk);
+ 
+ 	return 0;
+ }
+-- 
+GitLab
+
diff --git a/sys-kernel/pinephone-sources/files/0201-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch b/sys-kernel/pinephone-sources/files/0201-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
new file mode 100644
index 0000000..e7d4da5
--- /dev/null
+++ b/sys-kernel/pinephone-sources/files/0201-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
@@ -0,0 +1,150 @@
+--- b/drivers/video/fbdev/core/bitblit.c
++++ a/drivers/video/fbdev/core/bitblit.c
+@@ -234,7 +234,7 @@
+ }
+ 
+ static void bit_cursor(struct vc_data *vc, struct fb_info *info, int mode,
++		       int softback_lines, int fg, int bg)
+-		       int fg, int bg)
+ {
+ 	struct fb_cursor cursor;
+ 	struct fbcon_ops *ops = info->fbcon_par;
+@@ -247,6 +247,15 @@
+ 
+ 	cursor.set = 0;
+ 
++	if (softback_lines) {
++		if (y + softback_lines >= vc->vc_rows) {
++			mode = CM_ERASE;
++			ops->cursor_flash = 0;
++			return;
++		} else
++			y += softback_lines;
++	}
++
+  	c = scr_readw((u16 *) vc->vc_pos);
+ 	attribute = get_attribute(info, c);
+ 	src = vc->vc_font.data + ((c & charmask) * (w * vc->vc_font.height));
+--- b/drivers/video/fbdev/core/fbcon.c
++++ a/drivers/video/fbdev/core/fbcon.c
+@@ -394,7 +394,7 @@
+ 	c = scr_readw((u16 *) vc->vc_pos);
+ 	mode = (!ops->cursor_flash || ops->cursor_state.enable) ?
+ 		CM_ERASE : CM_DRAW;
++	ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1),
+-	ops->cursor(vc, info, mode, get_color(vc, info, c, 1),
+ 		    get_color(vc, info, c, 0));
+ 	console_unlock();
+ }
+@@ -1345,7 +1345,7 @@
+ 
+ 	ops->cursor_flash = (mode == CM_ERASE) ? 0 : 1;
+ 
++	ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1),
+-	ops->cursor(vc, info, mode, get_color(vc, info, c, 1),
+ 		    get_color(vc, info, c, 0));
+ }
+ 
+--- b/drivers/video/fbdev/core/fbcon.h
++++ a/drivers/video/fbdev/core/fbcon.h
+@@ -62,7 +62,7 @@
+ 	void (*clear_margins)(struct vc_data *vc, struct fb_info *info,
+ 			      int color, int bottom_only);
+ 	void (*cursor)(struct vc_data *vc, struct fb_info *info, int mode,
++		       int softback_lines, int fg, int bg);
+-		       int fg, int bg);
+ 	int  (*update_start)(struct fb_info *info);
+ 	int  (*rotate_font)(struct fb_info *info, struct vc_data *vc);
+ 	struct fb_var_screeninfo var;  /* copy of the current fb_var_screeninfo */
+--- b/drivers/video/fbdev/core/fbcon_ccw.c
++++ a/drivers/video/fbdev/core/fbcon_ccw.c
+@@ -219,7 +219,7 @@
+ }
+ 
+ static void ccw_cursor(struct vc_data *vc, struct fb_info *info, int mode,
++		       int softback_lines, int fg, int bg)
+-		       int fg, int bg)
+ {
+ 	struct fb_cursor cursor;
+ 	struct fbcon_ops *ops = info->fbcon_par;
+@@ -236,6 +236,15 @@
+ 
+ 	cursor.set = 0;
+ 
++	if (softback_lines) {
++		if (y + softback_lines >= vc->vc_rows) {
++			mode = CM_ERASE;
++			ops->cursor_flash = 0;
++			return;
++		} else
++			y += softback_lines;
++	}
++
+  	c = scr_readw((u16 *) vc->vc_pos);
+ 	attribute = get_attribute(info, c);
+ 	src = ops->fontbuffer + ((c & charmask) * (w * vc->vc_font.width));
+--- b/drivers/video/fbdev/core/fbcon_cw.c
++++ a/drivers/video/fbdev/core/fbcon_cw.c
+@@ -202,7 +202,7 @@
+ }
+ 
+ static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode,
++		      int softback_lines, int fg, int bg)
+-		      int fg, int bg)
+ {
+ 	struct fb_cursor cursor;
+ 	struct fbcon_ops *ops = info->fbcon_par;
+@@ -219,6 +219,15 @@
+ 
+ 	cursor.set = 0;
+ 
++	if (softback_lines) {
++		if (y + softback_lines >= vc->vc_rows) {
++			mode = CM_ERASE;
++			ops->cursor_flash = 0;
++			return;
++		} else
++			y += softback_lines;
++	}
++
+  	c = scr_readw((u16 *) vc->vc_pos);
+ 	attribute = get_attribute(info, c);
+ 	src = ops->fontbuffer + ((c & charmask) * (w * vc->vc_font.width));
+--- b/drivers/video/fbdev/core/fbcon_ud.c
++++ a/drivers/video/fbdev/core/fbcon_ud.c
+@@ -249,7 +249,7 @@
+ }
+ 
+ static void ud_cursor(struct vc_data *vc, struct fb_info *info, int mode,
++		      int softback_lines, int fg, int bg)
+-		      int fg, int bg)
+ {
+ 	struct fb_cursor cursor;
+ 	struct fbcon_ops *ops = info->fbcon_par;
+@@ -267,6 +267,15 @@
+ 
+ 	cursor.set = 0;
+ 
++	if (softback_lines) {
++		if (y + softback_lines >= vc->vc_rows) {
++			mode = CM_ERASE;
++			ops->cursor_flash = 0;
++			return;
++		} else
++			y += softback_lines;
++	}
++
+  	c = scr_readw((u16 *) vc->vc_pos);
+ 	attribute = get_attribute(info, c);
+ 	src = ops->fontbuffer + ((c & charmask) * (w * vc->vc_font.height));
+--- b/drivers/video/fbdev/core/tileblit.c
++++ a/drivers/video/fbdev/core/tileblit.c
+@@ -80,7 +80,7 @@
+ }
+ 
+ static void tile_cursor(struct vc_data *vc, struct fb_info *info, int mode,
++			int softback_lines, int fg, int bg)
+-			int fg, int bg)
+ {
+ 	struct fb_tilecursor cursor;
+ 	int use_sw = (vc->vc_cursor_type & 0x10);
diff --git a/sys-kernel/pinephone-sources/files/0202-revert-fbcon-remove-no-op-fbcon_set_origin.patch b/sys-kernel/pinephone-sources/files/0202-revert-fbcon-remove-no-op-fbcon_set_origin.patch
new file mode 100644
index 0000000..6491c54
--- /dev/null
+++ b/sys-kernel/pinephone-sources/files/0202-revert-fbcon-remove-no-op-fbcon_set_origin.patch
@@ -0,0 +1,31 @@
+--- b/drivers/video/fbdev/core/fbcon.c
++++ a/drivers/video/fbdev/core/fbcon.c
+@@ -163,6 +163,8 @@
+ 
+ #define advance_row(p, delta) (unsigned short *)((unsigned long)(p) + (delta) * vc->vc_size_row)
+ 
++static int fbcon_set_origin(struct vc_data *);
++
+ static int fbcon_cursor_noblink;
+ 
+ #define divides(a, b)	((!(a) || (b)%(a)) ? 0 : 1)
+@@ -2633,6 +2635,11 @@
+ 	}
+ }
+ 
++static int fbcon_set_origin(struct vc_data *vc)
++{
++	return 0;
++}
++
+ void fbcon_suspended(struct fb_info *info)
+ {
+ 	struct vc_data *vc = NULL;
+@@ -3103,6 +3110,7 @@
+ 	.con_font_default	= fbcon_set_def_font,
+ 	.con_font_copy 		= fbcon_copy_font,
+ 	.con_set_palette 	= fbcon_set_palette,
++	.con_set_origin 	= fbcon_set_origin,
+ 	.con_invert_region 	= fbcon_invert_region,
+ 	.con_screen_pos 	= fbcon_screen_pos,
+ 	.con_getxy 		= fbcon_getxy,
diff --git a/sys-kernel/pinephone-sources/files/0203-revert-fbcon-remove-soft-scrollback-code.patch b/sys-kernel/pinephone-sources/files/0203-revert-fbcon-remove-soft-scrollback-code.patch
new file mode 100644
index 0000000..a3950bb
--- /dev/null
+++ b/sys-kernel/pinephone-sources/files/0203-revert-fbcon-remove-soft-scrollback-code.patch
@@ -0,0 +1,500 @@
+--- b/drivers/video/fbdev/core/fbcon.c
++++ a/drivers/video/fbdev/core/fbcon.c
+@@ -124,6 +124,12 @@ static int logo_lines;
+ /* logo_shown is an index to vc_cons when >= 0; otherwise follows FBCON_LOGO
+    enums.  */
+ static int logo_shown = FBCON_LOGO_CANSHOW;
++/* Software scrollback */
++static int fbcon_softback_size = 32768;
++static unsigned long softback_buf, softback_curr;
++static unsigned long softback_in;
++static unsigned long softback_top, softback_end;
++static int softback_lines;
+ /* console mappings */
+ static unsigned int first_fb_vc;
+ static unsigned int last_fb_vc = MAX_NR_CONSOLES - 1;
+@@ -163,6 +169,8 @@ static int margin_color;
+ 
+ static const struct consw fb_con;
+ 
++#define CM_SOFTBACK	(8)
++
+ #define advance_row(p, delta) (unsigned short *)((unsigned long)(p) + (delta) * vc->vc_size_row)
+ 
+ static int fbcon_set_origin(struct vc_data *);
+@@ -347,6 +355,18 @@ static int get_color(struct vc_data *vc,
+ 	return color;
+ }
+ 
++static void fbcon_update_softback(struct vc_data *vc)
++{
++	int l = fbcon_softback_size / vc->vc_size_row;
++
++	if (l > 5)
++		softback_end = softback_buf + l * vc->vc_size_row;
++	else
++		/* Smaller scrollback makes no sense, and 0 would screw
++		   the operation totally */
++		softback_top = 0;
++}
++
+ static void fb_flashcursor(struct work_struct *work)
+ {
+ 	struct fbcon_ops *ops = container_of(work, struct fbcon_ops, cursor_work.work);
+@@ -379,7 +399,7 @@ static void fb_flashcursor(struct work_s
+ 	c = scr_readw((u16 *) vc->vc_pos);
+ 	mode = (!ops->cursor_flash || ops->cursor_state.enable) ?
+ 		CM_ERASE : CM_DRAW;
+-	ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1),
++	ops->cursor(vc, info, mode, softback_lines, get_color(vc, info, c, 1),
+ 		    get_color(vc, info, c, 0));
+ 	console_unlock();
+ 
+@@ -419,7 +439,13 @@ static int __init fb_console_setup(char
+ 		}
+ 		
+ 		if (!strncmp(options, "scrollback:", 11)) {
+-			pr_warn("Ignoring scrollback size option\n");
++			options += 11;
++			if (*options) {
++				fbcon_softback_size = simple_strtoul(options, &options, 0);
++				if (*options == 'k' || *options == 'K') {
++					fbcon_softback_size *= 1024;
++				}
++			}
+ 			continue;
+ 		}
+ 		
+@@ -959,6 +985,31 @@ static const char *fbcon_startup(void)
+ 
+ 	set_blitting_type(vc, info);
+ 
++	if (info->fix.type != FB_TYPE_TEXT) {
++		if (fbcon_softback_size) {
++			if (!softback_buf) {
++				softback_buf =
++				    (unsigned long)
++				    kvmalloc(fbcon_softback_size,
++					    GFP_KERNEL);
++				if (!softback_buf) {
++					fbcon_softback_size = 0;
++					softback_top = 0;
++				}
++			}
++		} else {
++			if (softback_buf) {
++				kvfree((void *) softback_buf);
++				softback_buf = 0;
++				softback_top = 0;
++			}
++		}
++		if (softback_buf)
++			softback_in = softback_top = softback_curr =
++			    softback_buf;
++		softback_lines = 0;
++	}
++
+ 	/* Setup default font */
+ 	if (!p->fontdata && !vc->vc_font.data) {
+ 		if (!fontname[0] || !(font = find_font(fontname)))
+@@ -1129,6 +1180,9 @@ static void fbcon_init(struct vc_data *v
+ 	if (logo)
+ 		fbcon_prepare_logo(vc, info, cols, rows, new_cols, new_rows);
+ 
++	if (vc == svc && softback_buf)
++		fbcon_update_softback(vc);
++
+ 	if (ops->rotate_font && ops->rotate_font(info, vc)) {
+ 		ops->rotate = FB_ROTATE_UR;
+ 		set_blitting_type(vc, info);
+@@ -1152,6 +1206,9 @@ static void fbcon_release_all(void)
+ 	struct fb_info *info;
+ 	int i, j, mapped;
+ 
++	kvfree((void *)softback_buf);
++	softback_buf = 0UL;
++
+ 	fbcon_for_each_registered_fb(i) {
+ 		mapped = 0;
+ 		info = fbcon_registered_fb[i];
+@@ -1312,6 +1369,7 @@ static void fbcon_cursor(struct vc_data
+ {
+ 	struct fb_info *info = fbcon_info_from_console(vc->vc_num);
+ 	struct fbcon_ops *ops = info->fbcon_par;
++	int y;
+  	int c = scr_readw((u16 *) vc->vc_pos);
+ 
+ 	ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms);
+@@ -1325,11 +1383,19 @@ static void fbcon_cursor(struct vc_data
+ 		fbcon_add_cursor_work(info);
+ 
+ 	ops->cursor_flash = (mode == CM_ERASE) ? 0 : 1;
++	if (mode & CM_SOFTBACK) {
++		mode &= ~CM_SOFTBACK;
++		y = softback_lines;
++	} else {
++		if (softback_lines)
++			fbcon_set_origin(vc);
++		y = 0;
++	}
+ 
+ 	if (!ops->cursor)
+ 		return;
+ 
+-	ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1),
++	ops->cursor(vc, info, mode, y, get_color(vc, info, c, 1),
+ 		    get_color(vc, info, c, 0));
+ }
+ 
+@@ -1399,6 +1465,8 @@ static void fbcon_set_disp(struct fb_inf
+ 
+ 	if (con_is_visible(vc)) {
+ 		update_screen(vc);
++		if (softback_buf)
++			fbcon_update_softback(vc);
+ 	}
+ }
+ 
+@@ -1536,6 +1604,99 @@ static __inline__ void ypan_down_redraw(
+ 	scrollback_current = 0;
+ }
+ 
++static void fbcon_redraw_softback(struct vc_data *vc, struct fbcon_display *p,
++				  long delta)
++{
++	int count = vc->vc_rows;
++	unsigned short *d, *s;
++	unsigned long n;
++	int line = 0;
++
++	d = (u16 *) softback_curr;
++	if (d == (u16 *) softback_in)
++		d = (u16 *) vc->vc_origin;
++	n = softback_curr + delta * vc->vc_size_row;
++	softback_lines -= delta;
++	if (delta < 0) {
++		if (softback_curr < softback_top && n < softback_buf) {
++			n += softback_end - softback_buf;
++			if (n < softback_top) {
++				softback_lines -=
++				    (softback_top - n) / vc->vc_size_row;
++				n = softback_top;
++			}
++		} else if (softback_curr >= softback_top
++			   && n < softback_top) {
++			softback_lines -=
++			    (softback_top - n) / vc->vc_size_row;
++			n = softback_top;
++		}
++	} else {
++		if (softback_curr > softback_in && n >= softback_end) {
++			n += softback_buf - softback_end;
++			if (n > softback_in) {
++				n = softback_in;
++				softback_lines = 0;
++			}
++		} else if (softback_curr <= softback_in && n > softback_in) {
++			n = softback_in;
++			softback_lines = 0;
++		}
++	}
++	if (n == softback_curr)
++		return;
++	softback_curr = n;
++	s = (u16 *) softback_curr;
++	if (s == (u16 *) softback_in)
++		s = (u16 *) vc->vc_origin;
++	while (count--) {
++		unsigned short *start;
++		unsigned short *le;
++		unsigned short c;
++		int x = 0;
++		unsigned short attr = 1;
++
++		start = s;
++		le = advance_row(s, 1);
++		do {
++			c = scr_readw(s);
++			if (attr != (c & 0xff00)) {
++				attr = c & 0xff00;
++				if (s > start) {
++					fbcon_putcs(vc, start, s - start,
++						    line, x);
++					x += s - start;
++					start = s;
++				}
++			}
++			if (c == scr_readw(d)) {
++				if (s > start) {
++					fbcon_putcs(vc, start, s - start,
++						    line, x);
++					x += s - start + 1;
++					start = s + 1;
++				} else {
++					x++;
++					start++;
++				}
++			}
++			s++;
++			d++;
++		} while (s < le);
++		if (s > start)
++			fbcon_putcs(vc, start, s - start, line, x);
++		line++;
++		if (d == (u16 *) softback_end)
++			d = (u16 *) softback_buf;
++		if (d == (u16 *) softback_in)
++			d = (u16 *) vc->vc_origin;
++		if (s == (u16 *) softback_end)
++			s = (u16 *) softback_buf;
++		if (s == (u16 *) softback_in)
++			s = (u16 *) vc->vc_origin;
++	}
++}
++
+ static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p,
+ 			      int line, int count, int dy)
+ {
+@@ -1740,6 +1901,31 @@ static void fbcon_bmove(struct vc_data *
+ 			p->vrows - p->yscroll);
+ }
+ 
++static inline void fbcon_softback_note(struct vc_data *vc, int t,
++				       int count)
++{
++	unsigned short *p;
++
++	if (vc->vc_num != fg_console)
++		return;
++	p = (unsigned short *) (vc->vc_origin + t * vc->vc_size_row);
++
++	while (count) {
++		scr_memcpyw((u16 *) softback_in, p, vc->vc_size_row);
++		count--;
++		p = advance_row(p, 1);
++		softback_in += vc->vc_size_row;
++		if (softback_in == softback_end)
++			softback_in = softback_buf;
++		if (softback_in == softback_top) {
++			softback_top += vc->vc_size_row;
++			if (softback_top == softback_end)
++				softback_top = softback_buf;
++		}
++	}
++	softback_curr = softback_in;
++}
++
+ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
+ 		enum con_scroll dir, unsigned int count)
+ {
+@@ -1762,6 +1948,8 @@ static bool fbcon_scroll(struct vc_data
+ 	case SM_UP:
+ 		if (count > vc->vc_rows)	/* Maximum realistic size */
+ 			count = vc->vc_rows;
++		if (softback_top)
++			fbcon_softback_note(vc, t, count);
+ 		switch (fb_scrollmode(p)) {
+ 		case SCROLL_MOVE:
+ 			fbcon_redraw_blit(vc, info, p, t, b - t - count,
+@@ -2076,6 +2264,14 @@ static int fbcon_switch(struct vc_data *
+ 	info = fbcon_info_from_console(vc->vc_num);
+ 	ops = info->fbcon_par;
+ 
++	if (softback_top) {
++		if (softback_lines)
++			fbcon_set_origin(vc);
++		softback_top = softback_curr = softback_in = softback_buf;
++		softback_lines = 0;
++		fbcon_update_softback(vc);
++	}
++
+ 	if (logo_shown >= 0) {
+ 		struct vc_data *conp2 = vc_cons[logo_shown].d;
+ 
+@@ -2406,6 +2602,9 @@ static int fbcon_do_set_font(struct vc_d
+ 	int resize;
+ 	char *old_data = NULL;
+ 
++	if (con_is_visible(vc) && softback_lines)
++		fbcon_set_origin(vc);
++
+ 	resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
+ 	if (p->userfont)
+ 		old_data = vc->vc_font.data;
+@@ -2436,6 +2635,8 @@ static int fbcon_do_set_font(struct vc_d
+ 		ret = vc_resize(vc, cols, rows);
+ 		if (ret)
+ 			goto err_out;
++		if (con_is_visible(vc) && softback_buf)
++			fbcon_update_softback(vc);
+ 	} else if (con_is_visible(vc)
+ 		   && vc->vc_mode == KD_TEXT) {
+ 		fbcon_clear_margins(vc, 0);
+@@ -2582,7 +2783,19 @@ static void fbcon_set_palette(struct vc_
+ 
+ static u16 *fbcon_screen_pos(const struct vc_data *vc, int offset)
+ {
+-	return (u16 *) (vc->vc_origin + offset);
++	unsigned long p;
++	int line;
++	
++	if (vc->vc_num != fg_console || !softback_lines)
++		return (u16 *) (vc->vc_origin + offset);
++	line = offset / vc->vc_size_row;
++	if (line >= softback_lines)
++		return (u16 *) (vc->vc_origin + offset -
++				softback_lines * vc->vc_size_row);
++	p = softback_curr + offset;
++	if (p >= softback_end)
++		p += softback_buf - softback_end;
++	return (u16 *) p;
+ }
+ 
+ static unsigned long fbcon_getxy(struct vc_data *vc, unsigned long pos,
+@@ -2596,7 +2809,22 @@ static unsigned long fbcon_getxy(struct
+ 
+ 		x = offset % vc->vc_cols;
+ 		y = offset / vc->vc_cols;
++		if (vc->vc_num == fg_console)
++			y += softback_lines;
++		ret = pos + (vc->vc_cols - x) * 2;
++	} else if (vc->vc_num == fg_console && softback_lines) {
++		unsigned long offset = pos - softback_curr;
++
++		if (pos < softback_curr)
++			offset += softback_end - softback_buf;
++		offset /= 2;
++		x = offset % vc->vc_cols;
++		y = offset / vc->vc_cols;
+ 		ret = pos + (vc->vc_cols - x) * 2;
++		if (ret == softback_end)
++			ret = softback_buf;
++		if (ret == softback_in)
++			ret = vc->vc_origin;
+ 	} else {
+ 		/* Should not happen */
+ 		x = y = 0;
+@@ -2624,11 +2852,106 @@ static void fbcon_invert_region(struct v
+ 			a = ((a) & 0x88ff) | (((a) & 0x7000) >> 4) |
+ 			    (((a) & 0x0700) << 4);
+ 		scr_writew(a, p++);
++		if (p == (u16 *) softback_end)
++			p = (u16 *) softback_buf;
++		if (p == (u16 *) softback_in)
++			p = (u16 *) vc->vc_origin;
++	}
++}
++
++static void fbcon_scrolldelta(struct vc_data *vc, int lines)
++{
++	struct fb_info *info = registered_fb[con2fb_map[fg_console]];
++	struct fbcon_ops *ops = info->fbcon_par;
++	struct fbcon_display *disp = &fb_display[fg_console];
++	int offset, limit, scrollback_old;
++
++	if (softback_top) {
++		if (vc->vc_num != fg_console)
++			return;
++		if (vc->vc_mode != KD_TEXT || !lines)
++			return;
++		if (logo_shown >= 0) {
++			struct vc_data *conp2 = vc_cons[logo_shown].d;
++
++			if (conp2->vc_top == logo_lines
++			    && conp2->vc_bottom == conp2->vc_rows)
++				conp2->vc_top = 0;
++			if (logo_shown == vc->vc_num) {
++				unsigned long p, q;
++				int i;
++
++				p = softback_in;
++				q = vc->vc_origin +
++				    logo_lines * vc->vc_size_row;
++				for (i = 0; i < logo_lines; i++) {
++					if (p == softback_top)
++						break;
++					if (p == softback_buf)
++						p = softback_end;
++					p -= vc->vc_size_row;
++					q -= vc->vc_size_row;
++					scr_memcpyw((u16 *) q, (u16 *) p,
++						    vc->vc_size_row);
++				}
++				softback_in = softback_curr = p;
++				update_region(vc, vc->vc_origin,
++					      logo_lines * vc->vc_cols);
++			}
++			logo_shown = FBCON_LOGO_CANSHOW;
++		}
++		fbcon_cursor(vc, CM_ERASE | CM_SOFTBACK);
++		fbcon_redraw_softback(vc, disp, lines);
++		fbcon_cursor(vc, CM_DRAW | CM_SOFTBACK);
++		return;
+ 	}
++
++	if (!scrollback_phys_max)
++		return;
++
++	scrollback_old = scrollback_current;
++	scrollback_current -= lines;
++	if (scrollback_current < 0)
++		scrollback_current = 0;
++	else if (scrollback_current > scrollback_max)
++		scrollback_current = scrollback_max;
++	if (scrollback_current == scrollback_old)
++		return;
++
++	if (fbcon_is_inactive(vc, info))
++		return;
++
++	fbcon_cursor(vc, CM_ERASE);
++
++	offset = disp->yscroll - scrollback_current;
++	limit = disp->vrows;
++	switch (disp->scrollmode) {
++	case SCROLL_WRAP_MOVE:
++		info->var.vmode |= FB_VMODE_YWRAP;
++		break;
++	case SCROLL_PAN_MOVE:
++	case SCROLL_PAN_REDRAW:
++		limit -= vc->vc_rows;
++		info->var.vmode &= ~FB_VMODE_YWRAP;
++		break;
++	}
++	if (offset < 0)
++		offset += limit;
++	else if (offset >= limit)
++		offset -= limit;
++
++	ops->var.xoffset = 0;
++	ops->var.yoffset = offset * vc->vc_font.height;
++	ops->update_start(info);
++
++	if (!scrollback_current)
++		fbcon_cursor(vc, CM_DRAW);
+ }
+ 
+ static int fbcon_set_origin(struct vc_data *vc)
+ {
++	if (softback_lines)
++		fbcon_scrolldelta(vc, softback_lines);
+ 	return 0;
+ }
+ 
+@@ -2692,6 +3015,8 @@ static void fbcon_modechanged(struct fb_
+ 
+ 		fbcon_set_palette(vc, color_table);
+ 		update_screen(vc);
++		if (softback_buf)
++			fbcon_update_softback(vc);
+ 	}
+ }
+ 
+@@ -3154,6 +3479,7 @@ static const struct consw fb_con = {
+ 	.con_font_get 		= fbcon_get_font,
+ 	.con_font_default	= fbcon_set_def_font,
+ 	.con_set_palette 	= fbcon_set_palette,
++	.con_scrolldelta 	= fbcon_scrolldelta,
+ 	.con_set_origin 	= fbcon_set_origin,
+ 	.con_invert_region 	= fbcon_invert_region,
+ 	.con_screen_pos 	= fbcon_screen_pos,
diff --git a/sys-kernel/pinephone-sources/files/5.19.10-11.patch b/sys-kernel/pinephone-sources/files/5.19.10-11.patch
new file mode 100644
index 0000000..a5ff5cb
--- /dev/null
+++ b/sys-kernel/pinephone-sources/files/5.19.10-11.patch
@@ -0,0 +1,1231 @@
+diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
+index 85c85b694217c..e18107eafe7cc 100644
+--- a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
++++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
+@@ -96,7 +96,7 @@ properties:
+               Documentation/devicetree/bindings/arm/cpus.yaml).
+ 
+         required:
+-          - fiq-index
++          - apple,fiq-index
+           - cpus
+ 
+ required:
+diff --git a/Makefile b/Makefile
+index 33a9b6b547c47..01463a22926d5 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 5
+ PATCHLEVEL = 19
+-SUBLEVEL = 10
++SUBLEVEL = 11
+ EXTRAVERSION =
+ NAME = Superb Owl
+ 
+diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
+index cd2b3fe156724..c68c3581483ac 100644
+--- a/arch/parisc/Kconfig
++++ b/arch/parisc/Kconfig
+@@ -225,8 +225,18 @@ config MLONGCALLS
+ 	  Enabling this option will probably slow down your kernel.
+ 
+ config 64BIT
+-	def_bool "$(ARCH)" = "parisc64"
++	def_bool y if "$(ARCH)" = "parisc64"
++	bool "64-bit kernel" if "$(ARCH)" = "parisc"
+ 	depends on PA8X00
++	help
++	  Enable this if you want to support 64bit kernel on PA-RISC platform.
++
++	  At the moment, only people willing to use more than 2GB of RAM,
++	  or having a 64bit-only capable PA-RISC machine should say Y here.
++
++	  Since there is no 64bit userland on PA-RISC, there is no point to
++	  enable this option otherwise. The 64bit kernel is significantly bigger
++	  and slower than the 32bit one.
+ 
+ choice
+ 	prompt "Kernel page size"
+diff --git a/block/blk-core.c b/block/blk-core.c
+index 27fb1357ad4b8..cc6fbcb6d2521 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -338,7 +338,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
+ 
+ 	while (!blk_try_enter_queue(q, pm)) {
+ 		if (flags & BLK_MQ_REQ_NOWAIT)
+-			return -EBUSY;
++			return -EAGAIN;
+ 
+ 		/*
+ 		 * read pair of barrier in blk_freeze_queue_start(), we need to
+@@ -368,7 +368,7 @@ int __bio_queue_enter(struct request_queue *q, struct bio *bio)
+ 			if (test_bit(GD_DEAD, &disk->state))
+ 				goto dead;
+ 			bio_wouldblock_error(bio);
+-			return -EBUSY;
++			return -EAGAIN;
+ 		}
+ 
+ 		/*
+diff --git a/block/blk-lib.c b/block/blk-lib.c
+index 09b7e1200c0f4..20e42144065b8 100644
+--- a/block/blk-lib.c
++++ b/block/blk-lib.c
+@@ -311,6 +311,11 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
+ 	struct blk_plug plug;
+ 	int ret = 0;
+ 
++	/* make sure that "len << SECTOR_SHIFT" doesn't overflow */
++	if (max_sectors > UINT_MAX >> SECTOR_SHIFT)
++		max_sectors = UINT_MAX >> SECTOR_SHIFT;
++	max_sectors &= ~bs_mask;
++
+ 	if (max_sectors == 0)
+ 		return -EOPNOTSUPP;
+ 	if ((sector | nr_sects) & bs_mask)
+@@ -324,10 +329,10 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
+ 
+ 		bio = blk_next_bio(bio, bdev, 0, REQ_OP_SECURE_ERASE, gfp);
+ 		bio->bi_iter.bi_sector = sector;
+-		bio->bi_iter.bi_size = len;
++		bio->bi_iter.bi_size = len << SECTOR_SHIFT;
+ 
+-		sector += len << SECTOR_SHIFT;
+-		nr_sects -= len << SECTOR_SHIFT;
++		sector += len;
++		nr_sects -= len;
+ 		if (!nr_sects) {
+ 			ret = submit_bio_wait(bio);
+ 			bio_put(bio);
+diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
+index a964e25ea6206..763256efddc2b 100644
+--- a/drivers/gpio/gpio-mpc8xxx.c
++++ b/drivers/gpio/gpio-mpc8xxx.c
+@@ -172,6 +172,7 @@ static int mpc8xxx_irq_set_type(struct irq_data *d, unsigned int flow_type)
+ 
+ 	switch (flow_type) {
+ 	case IRQ_TYPE_EDGE_FALLING:
++	case IRQ_TYPE_LEVEL_LOW:
+ 		raw_spin_lock_irqsave(&mpc8xxx_gc->lock, flags);
+ 		gc->write_reg(mpc8xxx_gc->regs + GPIO_ICR,
+ 			gc->read_reg(mpc8xxx_gc->regs + GPIO_ICR)
+diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c
+index e342a6dc4c6c1..bb953f6478647 100644
+--- a/drivers/gpio/gpio-rockchip.c
++++ b/drivers/gpio/gpio-rockchip.c
+@@ -418,11 +418,11 @@ static int rockchip_irq_set_type(struct irq_data *d, unsigned int type)
+ 			goto out;
+ 		} else {
+ 			bank->toggle_edge_mode |= mask;
+-			level |= mask;
++			level &= ~mask;
+ 
+ 			/*
+ 			 * Determine gpio state. If 1 next interrupt should be
+-			 * falling otherwise rising.
++			 * low otherwise high.
+ 			 */
+ 			data = readl(bank->reg_base + bank->gpio_regs->ext_port);
+ 			if (data & mask)
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 67d4a3c13ed19..929f8b75bfaee 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -2391,8 +2391,16 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
+ 		}
+ 		adev->ip_blocks[i].status.sw = true;
+ 
+-		/* need to do gmc hw init early so we can allocate gpu mem */
+-		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
++		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
++			/* need to do common hw init early so everything is set up for gmc */
++			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
++			if (r) {
++				DRM_ERROR("hw_init %d failed %d\n", i, r);
++				goto init_failed;
++			}
++			adev->ip_blocks[i].status.hw = true;
++		} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
++			/* need to do gmc hw init early so we can allocate gpu mem */
+ 			/* Try to reserve bad pages early */
+ 			if (amdgpu_sriov_vf(adev))
+ 				amdgpu_virt_exchange_data(adev);
+@@ -3078,8 +3086,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
+ 	int i, r;
+ 
+ 	static enum amd_ip_block_type ip_order[] = {
+-		AMD_IP_BLOCK_TYPE_GMC,
+ 		AMD_IP_BLOCK_TYPE_COMMON,
++		AMD_IP_BLOCK_TYPE_GMC,
+ 		AMD_IP_BLOCK_TYPE_PSP,
+ 		AMD_IP_BLOCK_TYPE_IH,
+ 	};
+diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+index f49db13b3fbee..0debdbcf46310 100644
+--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
++++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+@@ -380,6 +380,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev,
+ 		WREG32_PCIE(smnPCIE_LC_CNTL, data);
+ }
+ 
++#ifdef CONFIG_PCIEASPM
+ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
+ {
+ 	uint32_t def, data;
+@@ -401,9 +402,11 @@ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev)
+ 	if (def != data)
+ 		WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+ }
++#endif
+ 
+ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
+ {
++#ifdef CONFIG_PCIEASPM
+ 	uint32_t def, data;
+ 
+ 	def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+@@ -459,7 +462,10 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
+ 	if (def != data)
+ 		WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+ 
+-	nbio_v2_3_program_ltr(adev);
++	/* Don't bother about LTR if LTR is not enabled
++	 * in the path */
++	if (adev->pdev->ltr_path)
++		nbio_v2_3_program_ltr(adev);
+ 
+ 	def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3);
+ 	data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+@@ -483,6 +489,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
+ 	data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ 	if (def != data)
+ 		WREG32_PCIE(smnPCIE_LC_CNTL3, data);
++#endif
+ }
+ 
+ static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev)
+diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+index f7f6ddebd3e49..37615a77287bc 100644
+--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
++++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+@@ -282,6 +282,7 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
+ 			mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
+ }
+ 
++#ifdef CONFIG_PCIEASPM
+ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
+ {
+ 	uint32_t def, data;
+@@ -303,9 +304,11 @@ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev)
+ 	if (def != data)
+ 		WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+ }
++#endif
+ 
+ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
+ {
++#ifdef CONFIG_PCIEASPM
+ 	uint32_t def, data;
+ 
+ 	def = data = RREG32_PCIE(smnPCIE_LC_CNTL);
+@@ -361,7 +364,10 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
+ 	if (def != data)
+ 		WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+ 
+-	nbio_v6_1_program_ltr(adev);
++	/* Don't bother about LTR if LTR is not enabled
++	 * in the path */
++	if (adev->pdev->ltr_path)
++		nbio_v6_1_program_ltr(adev);
+ 
+ 	def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ 	data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+@@ -385,6 +391,7 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev)
+ 	data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ 	if (def != data)
+ 		WREG32_PCIE(smnPCIE_LC_CNTL3, data);
++#endif
+ }
+ 
+ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
+diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+index 11848d1e238b6..19455a7259391 100644
+--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
++++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+@@ -673,6 +673,7 @@ struct amdgpu_nbio_ras nbio_v7_4_ras = {
+ };
+ 
+ 
++#ifdef CONFIG_PCIEASPM
+ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
+ {
+ 	uint32_t def, data;
+@@ -694,9 +695,11 @@ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev)
+ 	if (def != data)
+ 		WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
+ }
++#endif
+ 
+ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
+ {
++#ifdef CONFIG_PCIEASPM
+ 	uint32_t def, data;
+ 
+ 	if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4))
+@@ -755,7 +758,10 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
+ 	if (def != data)
+ 		WREG32_PCIE(smnPCIE_LC_CNTL6, data);
+ 
+-	nbio_v7_4_program_ltr(adev);
++	/* Don't bother about LTR if LTR is not enabled
++	 * in the path */
++	if (adev->pdev->ltr_path)
++		nbio_v7_4_program_ltr(adev);
+ 
+ 	def = data = RREG32_PCIE(smnRCC_BIF_STRAP3);
+ 	data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT;
+@@ -779,6 +785,7 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev)
+ 	data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK;
+ 	if (def != data)
+ 		WREG32_PCIE(smnPCIE_LC_CNTL3, data);
++#endif
+ }
+ 
+ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
+diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+index 65181efba50ec..56424f75dd2cc 100644
+--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+@@ -1504,6 +1504,11 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
+ 		WREG32_SDMA(i, mmSDMA0_CNTL, temp);
+ 
+ 		if (!amdgpu_sriov_vf(adev)) {
++			ring = &adev->sdma.instance[i].ring;
++			adev->nbio.funcs->sdma_doorbell_range(adev, i,
++				ring->use_doorbell, ring->doorbell_index,
++				adev->doorbell_index.sdma_doorbell_range);
++
+ 			/* unhalt engine */
+ 			temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
+ 			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
+index fde6154f20096..183024d7c184e 100644
+--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
++++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
+@@ -1211,25 +1211,6 @@ static int soc15_common_sw_fini(void *handle)
+ 	return 0;
+ }
+ 
+-static void soc15_doorbell_range_init(struct amdgpu_device *adev)
+-{
+-	int i;
+-	struct amdgpu_ring *ring;
+-
+-	/* sdma/ih doorbell range are programed by hypervisor */
+-	if (!amdgpu_sriov_vf(adev)) {
+-		for (i = 0; i < adev->sdma.num_instances; i++) {
+-			ring = &adev->sdma.instance[i].ring;
+-			adev->nbio.funcs->sdma_doorbell_range(adev, i,
+-				ring->use_doorbell, ring->doorbell_index,
+-				adev->doorbell_index.sdma_doorbell_range);
+-		}
+-
+-		adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
+-						adev->irq.ih.doorbell_index);
+-	}
+-}
+-
+ static int soc15_common_hw_init(void *handle)
+ {
+ 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+@@ -1249,12 +1230,6 @@ static int soc15_common_hw_init(void *handle)
+ 
+ 	/* enable the doorbell aperture */
+ 	soc15_enable_doorbell_aperture(adev, true);
+-	/* HW doorbell routing policy: doorbell writing not
+-	 * in SDMA/IH/MM/ACV range will be routed to CP. So
+-	 * we need to init SDMA/IH/MM/ACV doorbell range prior
+-	 * to CP ip block init and ring test.
+-	 */
+-	soc15_doorbell_range_init(adev);
+ 
+ 	return 0;
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+index 03b7066471f9a..1e83db0c5438d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+@@ -289,6 +289,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
+ 		}
+ 	}
+ 
++	if (!amdgpu_sriov_vf(adev))
++		adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
++						    adev->irq.ih.doorbell_index);
++
+ 	pci_set_master(adev->pdev);
+ 
+ 	/* enable interrupts */
+diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+index 2022ffbb8dba5..59dfca093155c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
++++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+@@ -340,6 +340,10 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
+ 		}
+ 	}
+ 
++	if (!amdgpu_sriov_vf(adev))
++		adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell,
++						    adev->irq.ih.doorbell_index);
++
+ 	pci_set_master(adev->pdev);
+ 
+ 	/* enable interrupts */
+diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
+index 19bf717fd4cb6..5508ebb9eb434 100644
+--- a/drivers/gpu/drm/i915/display/icl_dsi.c
++++ b/drivers/gpu/drm/i915/display/icl_dsi.c
+@@ -1629,6 +1629,8 @@ static int gen11_dsi_dsc_compute_config(struct intel_encoder *encoder,
+ 	/* FIXME: initialize from VBT */
+ 	vdsc_cfg->rc_model_size = DSC_RC_MODEL_SIZE_CONST;
+ 
++	vdsc_cfg->pic_height = crtc_state->hw.adjusted_mode.crtc_vdisplay;
++
+ 	ret = intel_dsc_compute_params(crtc_state);
+ 	if (ret)
+ 		return ret;
+diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
+index 41aaa6c98114f..fe8b6b72970a2 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp.c
++++ b/drivers/gpu/drm/i915/display/intel_dp.c
+@@ -1379,6 +1379,7 @@ static int intel_dp_dsc_compute_params(struct intel_encoder *encoder,
+ 	 * DP_DSC_RC_BUF_SIZE for this.
+ 	 */
+ 	vdsc_cfg->rc_model_size = DSC_RC_MODEL_SIZE_CONST;
++	vdsc_cfg->pic_height = crtc_state->hw.adjusted_mode.crtc_vdisplay;
+ 
+ 	/*
+ 	 * Slice Height of 8 works for all currently available panels. So start
+diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c
+index 43e1bbc1e3035..ca530f0733e0e 100644
+--- a/drivers/gpu/drm/i915/display/intel_vdsc.c
++++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
+@@ -460,7 +460,6 @@ int intel_dsc_compute_params(struct intel_crtc_state *pipe_config)
+ 	u8 i = 0;
+ 
+ 	vdsc_cfg->pic_width = pipe_config->hw.adjusted_mode.crtc_hdisplay;
+-	vdsc_cfg->pic_height = pipe_config->hw.adjusted_mode.crtc_vdisplay;
+ 	vdsc_cfg->slice_width = DIV_ROUND_UP(vdsc_cfg->pic_width,
+ 					     pipe_config->dsc.slice_count);
+ 
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+index 9feda105f9131..a7acffbf15d1f 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+@@ -235,6 +235,14 @@ struct intel_guc {
+ 		 * @shift: Right shift value for the gpm timestamp
+ 		 */
+ 		u32 shift;
++
++		/**
++		 * @last_stat_jiffies: jiffies at last actual stats collection time
++		 * We use this timestamp to ensure we don't oversample the
++		 * stats because runtime power management events can trigger
++		 * stats collection at much higher rates than required.
++		 */
++		unsigned long last_stat_jiffies;
+ 	} timestamp;
+ 
+ #ifdef CONFIG_DRM_I915_SELFTEST
+diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+index 26a051ef119df..d7e4681d7297c 100644
+--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
++++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+@@ -1365,6 +1365,8 @@ static void __update_guc_busyness_stats(struct intel_guc *guc)
+ 	unsigned long flags;
+ 	ktime_t unused;
+ 
++	guc->timestamp.last_stat_jiffies = jiffies;
++
+ 	spin_lock_irqsave(&guc->timestamp.lock, flags);
+ 
+ 	guc_update_pm_timestamp(guc, &unused);
+@@ -1436,7 +1438,23 @@ void intel_guc_busyness_park(struct intel_gt *gt)
+ 	if (!guc_submission_initialized(guc))
+ 		return;
+ 
+-	cancel_delayed_work(&guc->timestamp.work);
++	/*
++	 * There is a race with suspend flow where the worker runs after suspend
++	 * and causes an unclaimed register access warning. Cancel the worker
++	 * synchronously here.
++	 */
++	cancel_delayed_work_sync(&guc->timestamp.work);
++
++	/*
++	 * Before parking, we should sample engine busyness stats if we need to.
++	 * We can skip it if we are less than half a ping from the last time we
++	 * sampled the busyness stats.
++	 */
++	if (guc->timestamp.last_stat_jiffies &&
++	    !time_after(jiffies, guc->timestamp.last_stat_jiffies +
++			(guc->timestamp.ping_delay / 2)))
++		return;
++
+ 	__update_guc_busyness_stats(guc);
+ }
+ 
+diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
+index 4f5a51bb9e1e4..e77956ae88a4b 100644
+--- a/drivers/gpu/drm/i915/i915_reg.h
++++ b/drivers/gpu/drm/i915/i915_reg.h
+@@ -1849,14 +1849,14 @@
+ 
+ #define GT0_PERF_LIMIT_REASONS		_MMIO(0x1381a8)
+ #define   GT0_PERF_LIMIT_REASONS_MASK	0xde3
+-#define   PROCHOT_MASK			REG_BIT(1)
+-#define   THERMAL_LIMIT_MASK		REG_BIT(2)
+-#define   RATL_MASK			REG_BIT(6)
+-#define   VR_THERMALERT_MASK		REG_BIT(7)
+-#define   VR_TDC_MASK			REG_BIT(8)
+-#define   POWER_LIMIT_4_MASK		REG_BIT(9)
+-#define   POWER_LIMIT_1_MASK		REG_BIT(11)
+-#define   POWER_LIMIT_2_MASK		REG_BIT(12)
++#define   PROCHOT_MASK			REG_BIT(0)
++#define   THERMAL_LIMIT_MASK		REG_BIT(1)
++#define   RATL_MASK			REG_BIT(5)
++#define   VR_THERMALERT_MASK		REG_BIT(6)
++#define   VR_TDC_MASK			REG_BIT(7)
++#define   POWER_LIMIT_4_MASK		REG_BIT(8)
++#define   POWER_LIMIT_1_MASK		REG_BIT(10)
++#define   POWER_LIMIT_2_MASK		REG_BIT(11)
+ 
+ #define CHV_CLK_CTL1			_MMIO(0x101100)
+ #define VLV_CLK_CTL2			_MMIO(0x101104)
+diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
+index 16460b169ed21..2a32729a74b51 100644
+--- a/drivers/gpu/drm/i915/i915_vma.c
++++ b/drivers/gpu/drm/i915/i915_vma.c
+@@ -1870,12 +1870,13 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
+ 		enum dma_resv_usage usage;
+ 		int idx;
+ 
+-		obj->read_domains = 0;
+ 		if (flags & EXEC_OBJECT_WRITE) {
+ 			usage = DMA_RESV_USAGE_WRITE;
+ 			obj->write_domain = I915_GEM_DOMAIN_RENDER;
++			obj->read_domains = 0;
+ 		} else {
+ 			usage = DMA_RESV_USAGE_READ;
++			obj->write_domain = 0;
+ 		}
+ 
+ 		dma_fence_array_for_each(curr, idx, fence)
+diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
+index 8640a8a8a4691..44aa526294439 100644
+--- a/drivers/gpu/drm/meson/meson_plane.c
++++ b/drivers/gpu/drm/meson/meson_plane.c
+@@ -168,7 +168,7 @@ static void meson_plane_atomic_update(struct drm_plane *plane,
+ 
+ 	/* Enable OSD and BLK0, set max global alpha */
+ 	priv->viu.osd1_ctrl_stat = OSD_ENABLE |
+-				   (0xFF << OSD_GLOBAL_ALPHA_SHIFT) |
++				   (0x100 << OSD_GLOBAL_ALPHA_SHIFT) |
+ 				   OSD_BLK0_ENABLE;
+ 
+ 	priv->viu.osd1_ctrl_stat2 = readl(priv->io_base +
+diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c
+index bb7e109534de1..d4b907889a21d 100644
+--- a/drivers/gpu/drm/meson/meson_viu.c
++++ b/drivers/gpu/drm/meson/meson_viu.c
+@@ -94,7 +94,7 @@ static void meson_viu_set_g12a_osd1_matrix(struct meson_drm *priv,
+ 		priv->io_base + _REG(VPP_WRAP_OSD1_MATRIX_COEF11_12));
+ 	writel(((m[9] & 0x1fff) << 16) | (m[10] & 0x1fff),
+ 		priv->io_base + _REG(VPP_WRAP_OSD1_MATRIX_COEF20_21));
+-	writel((m[11] & 0x1fff) << 16,
++	writel((m[11] & 0x1fff),
+ 		priv->io_base +	_REG(VPP_WRAP_OSD1_MATRIX_COEF22));
+ 
+ 	writel(((m[18] & 0xfff) << 16) | (m[19] & 0xfff),
+diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
+index a189982601a48..e8040defe6073 100644
+--- a/drivers/gpu/drm/panel/panel-edp.c
++++ b/drivers/gpu/drm/panel/panel-edp.c
+@@ -1270,7 +1270,8 @@ static const struct panel_desc innolux_n116bca_ea1 = {
+ 	},
+ 	.delay = {
+ 		.hpd_absent = 200,
+-		.prepare_to_enable = 80,
++		.enable = 80,
++		.disable = 50,
+ 		.unprepare = 500,
+ 	},
+ };
+diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+index d6e831576cd2b..88271f04615b0 100644
+--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
++++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+@@ -1436,11 +1436,15 @@ static void rk3568_set_intf_mux(struct vop2_video_port *vp, int id,
+ 		die &= ~RK3568_SYS_DSP_INFACE_EN_HDMI_MUX;
+ 		die |= RK3568_SYS_DSP_INFACE_EN_HDMI |
+ 			   FIELD_PREP(RK3568_SYS_DSP_INFACE_EN_HDMI_MUX, vp->id);
++		dip &= ~RK3568_DSP_IF_POL__HDMI_PIN_POL;
++		dip |= FIELD_PREP(RK3568_DSP_IF_POL__HDMI_PIN_POL, polflags);
+ 		break;
+ 	case ROCKCHIP_VOP2_EP_EDP0:
+ 		die &= ~RK3568_SYS_DSP_INFACE_EN_EDP_MUX;
+ 		die |= RK3568_SYS_DSP_INFACE_EN_EDP |
+ 			   FIELD_PREP(RK3568_SYS_DSP_INFACE_EN_EDP_MUX, vp->id);
++		dip &= ~RK3568_DSP_IF_POL__EDP_PIN_POL;
++		dip |= FIELD_PREP(RK3568_DSP_IF_POL__EDP_PIN_POL, polflags);
+ 		break;
+ 	case ROCKCHIP_VOP2_EP_MIPI0:
+ 		die &= ~RK3568_SYS_DSP_INFACE_EN_MIPI0_MUX;
+diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
+index fc8c1420c0b69..64b14ac4c7b02 100644
+--- a/drivers/iommu/intel/dmar.c
++++ b/drivers/iommu/intel/dmar.c
+@@ -2368,13 +2368,6 @@ static int dmar_device_hotplug(acpi_handle handle, bool insert)
+ 	if (!dmar_in_use())
+ 		return 0;
+ 
+-	/*
+-	 * It's unlikely that any I/O board is hot added before the IOMMU
+-	 * subsystem is initialized.
+-	 */
+-	if (IS_ENABLED(CONFIG_INTEL_IOMMU) && !intel_iommu_enabled)
+-		return -EOPNOTSUPP;
+-
+ 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
+ 		tmp = handle;
+ 	} else {
+diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
+index c0464959cbcdb..861a239d905a4 100644
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -3133,7 +3133,13 @@ static int __init init_dmars(void)
+ 
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+ 		if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
++			/*
++			 * Call dmar_alloc_hwirq() with dmar_global_lock held,
++			 * could cause possible lock race condition.
++			 */
++			up_write(&dmar_global_lock);
+ 			ret = intel_svm_enable_prq(iommu);
++			down_write(&dmar_global_lock);
+ 			if (ret)
+ 				goto free_iommu;
+ 		}
+@@ -4039,6 +4045,7 @@ int __init intel_iommu_init(void)
+ 	force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
+ 		    platform_optin_force_iommu();
+ 
++	down_write(&dmar_global_lock);
+ 	if (dmar_table_init()) {
+ 		if (force_on)
+ 			panic("tboot: Failed to initialize DMAR table\n");
+@@ -4051,6 +4058,16 @@ int __init intel_iommu_init(void)
+ 		goto out_free_dmar;
+ 	}
+ 
++	up_write(&dmar_global_lock);
++
++	/*
++	 * The bus notifier takes the dmar_global_lock, so lockdep will
++	 * complain later when we register it under the lock.
++	 */
++	dmar_register_bus_notifier();
++
++	down_write(&dmar_global_lock);
++
+ 	if (!no_iommu)
+ 		intel_iommu_debugfs_init();
+ 
+@@ -4098,9 +4115,11 @@ int __init intel_iommu_init(void)
+ 		pr_err("Initialization failed\n");
+ 		goto out_free_dmar;
+ 	}
++	up_write(&dmar_global_lock);
+ 
+ 	init_iommu_pm_ops();
+ 
++	down_read(&dmar_global_lock);
+ 	for_each_active_iommu(iommu, drhd) {
+ 		/*
+ 		 * The flush queue implementation does not perform
+@@ -4118,11 +4137,13 @@ int __init intel_iommu_init(void)
+ 				       "%s", iommu->name);
+ 		iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
+ 	}
++	up_read(&dmar_global_lock);
+ 
+ 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
+ 	if (si_domain && !hw_pass_through)
+ 		register_memory_notifier(&intel_iommu_memory_nb);
+ 
++	down_read(&dmar_global_lock);
+ 	if (probe_acpi_namespace_devices())
+ 		pr_warn("ACPI name space devices didn't probe correctly\n");
+ 
+@@ -4133,15 +4154,17 @@ int __init intel_iommu_init(void)
+ 
+ 		iommu_disable_protect_mem_regions(iommu);
+ 	}
++	up_read(&dmar_global_lock);
+ 
+-	intel_iommu_enabled = 1;
+-	dmar_register_bus_notifier();
+ 	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
+ 
++	intel_iommu_enabled = 1;
++
+ 	return 0;
+ 
+ out_free_dmar:
+ 	intel_iommu_free_dmars();
++	up_write(&dmar_global_lock);
+ 	return ret;
+ }
+ 
+diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
+index 520ed965bb7a4..583ca847a39cb 100644
+--- a/drivers/of/fdt.c
++++ b/drivers/of/fdt.c
+@@ -314,7 +314,7 @@ static int unflatten_dt_nodes(const void *blob,
+ 	for (offset = 0;
+ 	     offset >= 0 && depth >= initial_depth;
+ 	     offset = fdt_next_node(blob, offset, &depth)) {
+-		if (WARN_ON_ONCE(depth >= FDT_MAX_DEPTH))
++		if (WARN_ON_ONCE(depth >= FDT_MAX_DEPTH - 1))
+ 			continue;
+ 
+ 		if (!IS_ENABLED(CONFIG_OF_KOBJ) &&
+diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
+index f69ab90b5e22d..6052f264bbb0a 100644
+--- a/drivers/parisc/ccio-dma.c
++++ b/drivers/parisc/ccio-dma.c
+@@ -1546,6 +1546,7 @@ static int __init ccio_probe(struct parisc_device *dev)
+ 	}
+ 	ccio_ioc_init(ioc);
+ 	if (ccio_init_resources(ioc)) {
++		iounmap(ioc->ioc_regs);
+ 		kfree(ioc);
+ 		return -ENOMEM;
+ 	}
+diff --git a/drivers/pinctrl/qcom/pinctrl-sc8180x.c b/drivers/pinctrl/qcom/pinctrl-sc8180x.c
+index 6bec7f1431348..704a99d2f93ce 100644
+--- a/drivers/pinctrl/qcom/pinctrl-sc8180x.c
++++ b/drivers/pinctrl/qcom/pinctrl-sc8180x.c
+@@ -530,10 +530,10 @@ DECLARE_MSM_GPIO_PINS(187);
+ DECLARE_MSM_GPIO_PINS(188);
+ DECLARE_MSM_GPIO_PINS(189);
+ 
+-static const unsigned int sdc2_clk_pins[] = { 190 };
+-static const unsigned int sdc2_cmd_pins[] = { 191 };
+-static const unsigned int sdc2_data_pins[] = { 192 };
+-static const unsigned int ufs_reset_pins[] = { 193 };
++static const unsigned int ufs_reset_pins[] = { 190 };
++static const unsigned int sdc2_clk_pins[] = { 191 };
++static const unsigned int sdc2_cmd_pins[] = { 192 };
++static const unsigned int sdc2_data_pins[] = { 193 };
+ 
+ enum sc8180x_functions {
+ 	msm_mux_adsp_ext,
+@@ -1582,7 +1582,7 @@ static const int sc8180x_acpi_reserved_gpios[] = {
+ static const struct msm_gpio_wakeirq_map sc8180x_pdc_map[] = {
+ 	{ 3, 31 }, { 5, 32 }, { 8, 33 }, { 9, 34 }, { 10, 100 }, { 12, 104 },
+ 	{ 24, 37 }, { 26, 38 }, { 27, 41 }, { 28, 42 }, { 30, 39 }, { 36, 43 },
+-	{ 37, 43 }, { 38, 45 }, { 39, 118 }, { 39, 125 }, { 41, 47 },
++	{ 37, 44 }, { 38, 45 }, { 39, 118 }, { 39, 125 }, { 41, 47 },
+ 	{ 42, 48 }, { 46, 50 }, { 47, 49 }, { 48, 51 }, { 49, 53 }, { 50, 52 },
+ 	{ 51, 116 }, { 51, 123 }, { 53, 54 }, { 54, 55 }, { 55, 56 },
+ 	{ 56, 57 }, { 58, 58 }, { 60, 60 }, { 68, 62 }, { 70, 63 }, { 76, 86 },
+diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-a100-r.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-a100-r.c
+index 21054fcacd345..18088f6f44b23 100644
+--- a/drivers/pinctrl/sunxi/pinctrl-sun50i-a100-r.c
++++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-a100-r.c
+@@ -98,7 +98,7 @@ MODULE_DEVICE_TABLE(of, a100_r_pinctrl_match);
+ static struct platform_driver a100_r_pinctrl_driver = {
+ 	.probe	= a100_r_pinctrl_probe,
+ 	.driver	= {
+-		.name		= "sun50iw10p1-r-pinctrl",
++		.name		= "sun50i-a100-r-pinctrl",
+ 		.of_match_table	= a100_r_pinctrl_match,
+ 	},
+ };
+diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
+index 386bb523c69ea..bdc3efdb12219 100644
+--- a/fs/cifs/connect.c
++++ b/fs/cifs/connect.c
+@@ -707,9 +707,6 @@ cifs_readv_from_socket(struct TCP_Server_Info *server, struct msghdr *smb_msg)
+ 	int length = 0;
+ 	int total_read;
+ 
+-	smb_msg->msg_control = NULL;
+-	smb_msg->msg_controllen = 0;
+-
+ 	for (total_read = 0; msg_data_left(smb_msg); total_read += length) {
+ 		try_to_freeze();
+ 
+@@ -765,7 +762,7 @@ int
+ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
+ 		      unsigned int to_read)
+ {
+-	struct msghdr smb_msg;
++	struct msghdr smb_msg = {};
+ 	struct kvec iov = {.iov_base = buf, .iov_len = to_read};
+ 	iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read);
+ 
+@@ -775,15 +772,13 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
+ ssize_t
+ cifs_discard_from_socket(struct TCP_Server_Info *server, size_t to_read)
+ {
+-	struct msghdr smb_msg;
++	struct msghdr smb_msg = {};
+ 
+ 	/*
+ 	 *  iov_iter_discard already sets smb_msg.type and count and iov_offset
+ 	 *  and cifs_readv_from_socket sets msg_control and msg_controllen
+ 	 *  so little to initialize in struct msghdr
+ 	 */
+-	smb_msg.msg_name = NULL;
+-	smb_msg.msg_namelen = 0;
+ 	iov_iter_discard(&smb_msg.msg_iter, READ, to_read);
+ 
+ 	return cifs_readv_from_socket(server, &smb_msg);
+@@ -793,7 +788,7 @@ int
+ cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
+ 	unsigned int page_offset, unsigned int to_read)
+ {
+-	struct msghdr smb_msg;
++	struct msghdr smb_msg = {};
+ 	struct bio_vec bv = {
+ 		.bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
+ 	iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read);
+diff --git a/fs/cifs/file.c b/fs/cifs/file.c
+index 0f03c0bfdf280..02dd591acabb3 100644
+--- a/fs/cifs/file.c
++++ b/fs/cifs/file.c
+@@ -3327,6 +3327,9 @@ static ssize_t __cifs_writev(
+ 
+ ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
+ {
++	struct file *file = iocb->ki_filp;
++
++	cifs_revalidate_mapping(file->f_inode);
+ 	return __cifs_writev(iocb, from, true);
+ }
+ 
+diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
+index bfc9bd55870a0..8adc0f2a59518 100644
+--- a/fs/cifs/transport.c
++++ b/fs/cifs/transport.c
+@@ -196,10 +196,6 @@ smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg,
+ 
+ 	*sent = 0;
+ 
+-	smb_msg->msg_name = (struct sockaddr *) &server->dstaddr;
+-	smb_msg->msg_namelen = sizeof(struct sockaddr);
+-	smb_msg->msg_control = NULL;
+-	smb_msg->msg_controllen = 0;
+ 	if (server->noblocksnd)
+ 		smb_msg->msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
+ 	else
+@@ -311,7 +307,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
+ 	sigset_t mask, oldmask;
+ 	size_t total_len = 0, sent, size;
+ 	struct socket *ssocket = server->ssocket;
+-	struct msghdr smb_msg;
++	struct msghdr smb_msg = {};
+ 	__be32 rfc1002_marker;
+ 
+ 	if (cifs_rdma_enabled(server)) {
+diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
+index 8f8cd6e2d4dbc..597e3ce3f148a 100644
+--- a/fs/nfs/internal.h
++++ b/fs/nfs/internal.h
+@@ -604,6 +604,31 @@ static inline gfp_t nfs_io_gfp_mask(void)
+ 	return GFP_KERNEL;
+ }
+ 
++/*
++ * Special version of should_remove_suid() that ignores capabilities.
++ */
++static inline int nfs_should_remove_suid(const struct inode *inode)
++{
++	umode_t mode = inode->i_mode;
++	int kill = 0;
++
++	/* suid always must be killed */
++	if (unlikely(mode & S_ISUID))
++		kill = ATTR_KILL_SUID;
++
++	/*
++	 * sgid without any exec bits is just a mandatory locking mark; leave
++	 * it alone.  If some exec bits are set, it's a real sgid; kill it.
++	 */
++	if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
++		kill |= ATTR_KILL_SGID;
++
++	if (unlikely(kill && S_ISREG(mode)))
++		return kill;
++
++	return 0;
++}
++
+ /* unlink.c */
+ extern struct rpc_task *
+ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
+diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
+index 068c45b3bc1ab..6dab9e4083729 100644
+--- a/fs/nfs/nfs42proc.c
++++ b/fs/nfs/nfs42proc.c
+@@ -78,10 +78,15 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
+ 
+ 	status = nfs4_call_sync(server->client, server, msg,
+ 				&args.seq_args, &res.seq_res, 0);
+-	if (status == 0)
++	if (status == 0) {
++		if (nfs_should_remove_suid(inode)) {
++			spin_lock(&inode->i_lock);
++			nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
++			spin_unlock(&inode->i_lock);
++		}
+ 		status = nfs_post_op_update_inode_force_wcc(inode,
+ 							    res.falloc_fattr);
+-
++	}
+ 	if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE])
+ 		trace_nfs4_fallocate(inode, &args, status);
+ 	else
+diff --git a/fs/nfs/super.c b/fs/nfs/super.c
+index 6ab5eeb000dc0..5e4bacb77bfc7 100644
+--- a/fs/nfs/super.c
++++ b/fs/nfs/super.c
+@@ -1051,22 +1051,31 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx)
+ 	if (ctx->bsize)
+ 		sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits);
+ 
+-	if (server->nfs_client->rpc_ops->version != 2) {
+-		/* The VFS shouldn't apply the umask to mode bits. We will do
+-		 * so ourselves when necessary.
++	switch (server->nfs_client->rpc_ops->version) {
++	case 2:
++		sb->s_time_gran = 1000;
++		sb->s_time_min = 0;
++		sb->s_time_max = U32_MAX;
++		break;
++	case 3:
++		/*
++		 * The VFS shouldn't apply the umask to mode bits.
++		 * We will do so ourselves when necessary.
+ 		 */
+ 		sb->s_flags |= SB_POSIXACL;
+ 		sb->s_time_gran = 1;
+-		sb->s_export_op = &nfs_export_ops;
+-	} else
+-		sb->s_time_gran = 1000;
+-
+-	if (server->nfs_client->rpc_ops->version != 4) {
+ 		sb->s_time_min = 0;
+ 		sb->s_time_max = U32_MAX;
+-	} else {
++		sb->s_export_op = &nfs_export_ops;
++		break;
++	case 4:
++		sb->s_flags |= SB_POSIXACL;
++		sb->s_time_gran = 1;
+ 		sb->s_time_min = S64_MIN;
+ 		sb->s_time_max = S64_MAX;
++		if (server->caps & NFS_CAP_ATOMIC_OPEN_V1)
++			sb->s_export_op = &nfs_export_ops;
++		break;
+ 	}
+ 
+ 	sb->s_magic = NFS_SUPER_MAGIC;
+diff --git a/fs/nfs/write.c b/fs/nfs/write.c
+index 5d7e1c2061842..4212473c69ee9 100644
+--- a/fs/nfs/write.c
++++ b/fs/nfs/write.c
+@@ -1497,31 +1497,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
+ 	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
+ }
+ 
+-/*
+- * Special version of should_remove_suid() that ignores capabilities.
+- */
+-static int nfs_should_remove_suid(const struct inode *inode)
+-{
+-	umode_t mode = inode->i_mode;
+-	int kill = 0;
+-
+-	/* suid always must be killed */
+-	if (unlikely(mode & S_ISUID))
+-		kill = ATTR_KILL_SUID;
+-
+-	/*
+-	 * sgid without any exec bits is just a mandatory locking mark; leave
+-	 * it alone.  If some exec bits are set, it's a real sgid; kill it.
+-	 */
+-	if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+-		kill |= ATTR_KILL_SGID;
+-
+-	if (unlikely(kill && S_ISREG(mode)))
+-		return kill;
+-
+-	return 0;
+-}
+-
+ static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
+ 		struct nfs_fattr *fattr)
+ {
+diff --git a/include/linux/dmar.h b/include/linux/dmar.h
+index f3a3d95df5325..cbd714a198a0a 100644
+--- a/include/linux/dmar.h
++++ b/include/linux/dmar.h
+@@ -69,7 +69,6 @@ struct dmar_pci_notify_info {
+ 
+ extern struct rw_semaphore dmar_global_lock;
+ extern struct list_head dmar_drhd_units;
+-extern int intel_iommu_enabled;
+ 
+ #define for_each_drhd_unit(drhd)					\
+ 	list_for_each_entry_rcu(drhd, &dmar_drhd_units, list,		\
+@@ -93,8 +92,7 @@ extern int intel_iommu_enabled;
+ static inline bool dmar_rcu_check(void)
+ {
+ 	return rwsem_is_locked(&dmar_global_lock) ||
+-	       system_state == SYSTEM_BOOTING ||
+-	       (IS_ENABLED(CONFIG_INTEL_IOMMU) && !intel_iommu_enabled);
++	       system_state == SYSTEM_BOOTING;
+ }
+ 
+ #define	dmar_rcu_dereference(p)	rcu_dereference_check((p), dmar_rcu_check())
+diff --git a/include/linux/of_device.h b/include/linux/of_device.h
+index 1d7992a02e36e..1a803e4335d30 100644
+--- a/include/linux/of_device.h
++++ b/include/linux/of_device.h
+@@ -101,8 +101,9 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
+ }
+ 
+ static inline int of_dma_configure_id(struct device *dev,
+-				   struct device_node *np,
+-				   bool force_dma)
++				      struct device_node *np,
++				      bool force_dma,
++				      const u32 *id)
+ {
+ 	return 0;
+ }
+diff --git a/include/net/xfrm.h b/include/net/xfrm.h
+index c39d910d4b454..9ca397eed1638 100644
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -1195,6 +1195,8 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk);
+ 
+ static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
+ {
++	if (!sk_fullsock(osk))
++		return 0;
+ 	sk->sk_policy[0] = NULL;
+ 	sk->sk_policy[1] = NULL;
+ 	if (unlikely(osk->sk_policy[0] || osk->sk_policy[1]))
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 48833d0edd089..602da2cfd57c8 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -5061,7 +5061,8 @@ done:
+ 		req_set_fail(req);
+ 	__io_req_complete(req, issue_flags, ret, 0);
+ 	/* put file to avoid an attempt to IOPOLL the req */
+-	io_put_file(req->file);
++	if (!(req->flags & REQ_F_FIXED_FILE))
++		io_put_file(req->file);
+ 	req->file = NULL;
+ 	return 0;
+ }
+diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
+index afc6c0e9c966e..f93983910b5e1 100644
+--- a/kernel/cgroup/cgroup-v1.c
++++ b/kernel/cgroup/cgroup-v1.c
+@@ -59,6 +59,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
+ 	int retval = 0;
+ 
+ 	mutex_lock(&cgroup_mutex);
++	cpus_read_lock();
+ 	percpu_down_write(&cgroup_threadgroup_rwsem);
+ 	for_each_root(root) {
+ 		struct cgroup *from_cgrp;
+@@ -72,6 +73,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
+ 			break;
+ 	}
+ 	percpu_up_write(&cgroup_threadgroup_rwsem);
++	cpus_read_unlock();
+ 	mutex_unlock(&cgroup_mutex);
+ 
+ 	return retval;
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index da8b3cc67234d..028eb28c7882d 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -1704,7 +1704,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+ 			   tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
+ 			   arg->uid);
+ 	security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
+-	rt = ip_route_output_key(net, &fl4);
++	rt = ip_route_output_flow(net, &fl4, sk);
+ 	if (IS_ERR(rt))
+ 		return;
+ 
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 586c102ce152d..9fd92e263d0a3 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -819,6 +819,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+ 		ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
+ 				   inet_twsk(sk)->tw_priority : sk->sk_priority;
+ 		transmit_time = tcp_transmit_time(sk);
++		xfrm_sk_clone_policy(ctl_sk, sk);
+ 	}
+ 	ip_send_unicast_reply(ctl_sk,
+ 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
+@@ -827,6 +828,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+ 			      transmit_time);
+ 
+ 	ctl_sk->sk_mark = 0;
++	xfrm_sk_free_policy(ctl_sk);
+ 	sock_net_set(ctl_sk, &init_net);
+ 	__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
+ 	__TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index be09941fe6d9a..5eabe746cfa76 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -952,7 +952,10 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
+ 	 * Underlying function will use this to retrieve the network
+ 	 * namespace
+ 	 */
+-	dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
++	if (sk && sk->sk_state != TCP_TIME_WAIT)
++		dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
++	else
++		dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
+ 	if (!IS_ERR(dst)) {
+ 		skb_dst_set(buff, dst);
+ 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
+diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
+index c1a01947530f0..db8c0de1de422 100644
+--- a/net/sunrpc/clnt.c
++++ b/net/sunrpc/clnt.c
+@@ -2858,6 +2858,9 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
+ 
+ 	task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC,
+ 			&rpc_cb_add_xprt_call_ops, data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++
+ 	data->xps->xps_nunique_destaddr_xprts++;
+ 	rpc_put_task(task);
+ success:
+diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
+index 53b024cea3b3e..5ecafffe7ce59 100644
+--- a/net/sunrpc/xprt.c
++++ b/net/sunrpc/xprt.c
+@@ -1179,11 +1179,8 @@ xprt_request_dequeue_receive_locked(struct rpc_task *task)
+ {
+ 	struct rpc_rqst *req = task->tk_rqstp;
+ 
+-	if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
++	if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate))
+ 		xprt_request_rb_remove(req->rq_xprt, req);
+-		xdr_free_bvec(&req->rq_rcv_buf);
+-		req->rq_private_buf.bvec = NULL;
+-	}
+ }
+ 
+ /**
+@@ -1221,6 +1218,8 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
+ 
+ 	xprt->stat.recvs++;
+ 
++	xdr_free_bvec(&req->rq_rcv_buf);
++	req->rq_private_buf.bvec = NULL;
+ 	req->rq_private_buf.len = copied;
+ 	/* Ensure all writes are done before we update */
+ 	/* req->rq_reply_bytes_recvd */
+@@ -1453,6 +1452,7 @@ xprt_request_dequeue_xprt(struct rpc_task *task)
+ 		xprt_request_dequeue_transmit_locked(task);
+ 		xprt_request_dequeue_receive_locked(task);
+ 		spin_unlock(&xprt->queue_lock);
++		xdr_free_bvec(&req->rq_rcv_buf);
+ 	}
+ }
+ 
+diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
+index 61df4d33c48ff..7f340f18599c9 100644
+--- a/sound/pci/hda/patch_sigmatel.c
++++ b/sound/pci/hda/patch_sigmatel.c
+@@ -209,6 +209,7 @@ struct sigmatel_spec {
+ 
+ 	/* beep widgets */
+ 	hda_nid_t anabeep_nid;
++	bool beep_power_on;
+ 
+ 	/* SPDIF-out mux */
+ 	const char * const *spdif_labels;
+@@ -4443,6 +4444,28 @@ static int stac_suspend(struct hda_codec *codec)
+ 
+ 	return 0;
+ }
++
++static int stac_check_power_status(struct hda_codec *codec, hda_nid_t nid)
++{
++#ifdef CONFIG_SND_HDA_INPUT_BEEP
++	struct sigmatel_spec *spec = codec->spec;
++#endif
++	int ret = snd_hda_gen_check_power_status(codec, nid);
++
++#ifdef CONFIG_SND_HDA_INPUT_BEEP
++	if (nid == spec->gen.beep_nid && codec->beep) {
++		if (codec->beep->enabled != spec->beep_power_on) {
++			spec->beep_power_on = codec->beep->enabled;
++			if (spec->beep_power_on)
++				snd_hda_power_up_pm(codec);
++			else
++				snd_hda_power_down_pm(codec);
++		}
++		ret |= spec->beep_power_on;
++	}
++#endif
++	return ret;
++}
+ #else
+ #define stac_suspend		NULL
+ #endif /* CONFIG_PM */
+@@ -4455,6 +4478,7 @@ static const struct hda_codec_ops stac_patch_ops = {
+ 	.unsol_event = snd_hda_jack_unsol_event,
+ #ifdef CONFIG_PM
+ 	.suspend = stac_suspend,
++	.check_power_status = stac_check_power_status,
+ #endif
+ };
+ 
+diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h
+index d30439b4b8ab4..869379f91fe48 100644
+--- a/tools/include/uapi/asm/errno.h
++++ b/tools/include/uapi/asm/errno.h
+@@ -9,8 +9,8 @@
+ #include "../../../arch/alpha/include/uapi/asm/errno.h"
+ #elif defined(__mips__)
+ #include "../../../arch/mips/include/uapi/asm/errno.h"
+-#elif defined(__xtensa__)
+-#include "../../../arch/xtensa/include/uapi/asm/errno.h"
++#elif defined(__hppa__)
++#include "../../../arch/parisc/include/uapi/asm/errno.h"
+ #else
+ #include <asm-generic/errno.h>
+ #endif
diff --git a/sys-kernel/pinephone-sources/files/5.19.11-12.patch b/sys-kernel/pinephone-sources/files/5.19.11-12.patch
new file mode 100644
index 0000000..8c6e32f
--- /dev/null
+++ b/sys-kernel/pinephone-sources/files/5.19.11-12.patch
@@ -0,0 +1,9776 @@
+diff --git a/Makefile b/Makefile
+index 01463a22926d5..7df4c195c8ab2 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 5
+ PATCHLEVEL = 19
+-SUBLEVEL = 11
++SUBLEVEL = 12
+ EXTRAVERSION =
+ NAME = Superb Owl
+ 
+diff --git a/arch/arm/boot/dts/lan966x.dtsi b/arch/arm/boot/dts/lan966x.dtsi
+index 38e90a31d2dd1..25c19f9d0a12f 100644
+--- a/arch/arm/boot/dts/lan966x.dtsi
++++ b/arch/arm/boot/dts/lan966x.dtsi
+@@ -515,13 +515,13 @@
+ 
+ 			phy0: ethernet-phy@1 {
+ 				reg = <1>;
+-				interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
++				interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>;
+ 				status = "disabled";
+ 			};
+ 
+ 			phy1: ethernet-phy@2 {
+ 				reg = <2>;
+-				interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
++				interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
+ 				status = "disabled";
+ 			};
+ 		};
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-mx8menlo.dts b/arch/arm64/boot/dts/freescale/imx8mm-mx8menlo.dts
+index 92eaf4ef45638..57ecdfa0dfc09 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-mx8menlo.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mm-mx8menlo.dts
+@@ -152,11 +152,11 @@
+ 	 * CPLD_reset is RESET_SOFT in schematic
+ 	 */
+ 	gpio-line-names =
+-		"CPLD_D[1]", "CPLD_int", "CPLD_reset", "",
+-		"", "CPLD_D[0]", "", "",
+-		"", "", "", "CPLD_D[2]",
+-		"CPLD_D[3]", "CPLD_D[4]", "CPLD_D[5]", "CPLD_D[6]",
+-		"CPLD_D[7]", "", "", "",
++		"CPLD_D[6]", "CPLD_int", "CPLD_reset", "",
++		"", "CPLD_D[7]", "", "",
++		"", "", "", "CPLD_D[5]",
++		"CPLD_D[4]", "CPLD_D[3]", "CPLD_D[2]", "CPLD_D[1]",
++		"CPLD_D[0]", "", "", "",
+ 		"", "", "", "",
+ 		"", "", "", "KBD_intK",
+ 		"", "", "", "";
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts b/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts
+index 286d2df01cfa7..7e0aeb2db3054 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml-mba8mx.dts
+@@ -5,7 +5,6 @@
+ 
+ /dts-v1/;
+ 
+-#include <dt-bindings/phy/phy-imx8-pcie.h>
+ #include "imx8mm-tqma8mqml.dtsi"
+ #include "mba8mx.dtsi"
+ 
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml.dtsi
+index 16ee9b5179e6e..f649dfacb4b69 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mm-tqma8mqml.dtsi
+@@ -3,6 +3,7 @@
+  * Copyright 2020-2021 TQ-Systems GmbH
+  */
+ 
++#include <dt-bindings/phy/phy-imx8-pcie.h>
+ #include "imx8mm.dtsi"
+ 
+ / {
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
+index c2d4da25482ff..44b473494d0f5 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
+@@ -359,8 +359,8 @@
+ 				nxp,dvs-standby-voltage = <850000>;
+ 				regulator-always-on;
+ 				regulator-boot-on;
+-				regulator-max-microvolt = <950000>;
+-				regulator-min-microvolt = <850000>;
++				regulator-max-microvolt = <1050000>;
++				regulator-min-microvolt = <805000>;
+ 				regulator-name = "On-module +VDD_ARM (BUCK2)";
+ 				regulator-ramp-delay = <3125>;
+ 			};
+@@ -368,8 +368,8 @@
+ 			reg_vdd_dram: BUCK3 {
+ 				regulator-always-on;
+ 				regulator-boot-on;
+-				regulator-max-microvolt = <950000>;
+-				regulator-min-microvolt = <850000>;
++				regulator-max-microvolt = <1000000>;
++				regulator-min-microvolt = <805000>;
+ 				regulator-name = "On-module +VDD_GPU_VPU_DDR (BUCK3)";
+ 			};
+ 
+@@ -408,7 +408,7 @@
+ 			reg_vdd_snvs: LDO2 {
+ 				regulator-always-on;
+ 				regulator-boot-on;
+-				regulator-max-microvolt = <900000>;
++				regulator-max-microvolt = <800000>;
+ 				regulator-min-microvolt = <800000>;
+ 				regulator-name = "On-module +V0.8_SNVS (LDO2)";
+ 			};
+diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
+index e41e1d56f980d..7bd4eecd592ef 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
+@@ -672,7 +672,6 @@
+ 							 <&clk IMX8MN_CLK_GPU_SHADER>,
+ 							 <&clk IMX8MN_CLK_GPU_BUS_ROOT>,
+ 							 <&clk IMX8MN_CLK_GPU_AHB>;
+-						resets = <&src IMX8MQ_RESET_GPU_RESET>;
+ 					};
+ 
+ 					pgc_dispmix: power-domain@3 {
+diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
+index 6630ec561dc25..211e6a1b296e1 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
+@@ -123,8 +123,7 @@
+ 		pinctrl-names = "default";
+ 		pinctrl-0 = <&pinctrl_reg_can>;
+ 		regulator-name = "can2_stby";
+-		gpio = <&gpio3 19 GPIO_ACTIVE_HIGH>;
+-		enable-active-high;
++		gpio = <&gpio3 19 GPIO_ACTIVE_LOW>;
+ 		regulator-min-microvolt = <3300000>;
+ 		regulator-max-microvolt = <3300000>;
+ 	};
+@@ -484,35 +483,40 @@
+ 			lan1: port@0 {
+ 				reg = <0>;
+ 				label = "lan1";
++				phy-mode = "internal";
+ 				local-mac-address = [00 00 00 00 00 00];
+ 			};
+ 
+ 			lan2: port@1 {
+ 				reg = <1>;
+ 				label = "lan2";
++				phy-mode = "internal";
+ 				local-mac-address = [00 00 00 00 00 00];
+ 			};
+ 
+ 			lan3: port@2 {
+ 				reg = <2>;
+ 				label = "lan3";
++				phy-mode = "internal";
+ 				local-mac-address = [00 00 00 00 00 00];
+ 			};
+ 
+ 			lan4: port@3 {
+ 				reg = <3>;
+ 				label = "lan4";
++				phy-mode = "internal";
+ 				local-mac-address = [00 00 00 00 00 00];
+ 			};
+ 
+ 			lan5: port@4 {
+ 				reg = <4>;
+ 				label = "lan5";
++				phy-mode = "internal";
+ 				local-mac-address = [00 00 00 00 00 00];
+ 			};
+ 
+-			port@6 {
+-				reg = <6>;
++			port@5 {
++				reg = <5>;
+ 				label = "cpu";
+ 				ethernet = <&fec>;
+ 				phy-mode = "rgmii-id";
+diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
+index 09f7364dd1d05..1cd389b1b95d6 100644
+--- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi
+@@ -172,6 +172,7 @@
+ 				compatible = "fsl,imx8ulp-pcc3";
+ 				reg = <0x292d0000 0x10000>;
+ 				#clock-cells = <1>;
++				#reset-cells = <1>;
+ 			};
+ 
+ 			tpm5: tpm@29340000 {
+@@ -270,6 +271,7 @@
+ 				compatible = "fsl,imx8ulp-pcc4";
+ 				reg = <0x29800000 0x10000>;
+ 				#clock-cells = <1>;
++				#reset-cells = <1>;
+ 			};
+ 
+ 			lpi2c6: i2c@29840000 {
+@@ -414,6 +416,7 @@
+ 				compatible = "fsl,imx8ulp-pcc5";
+ 				reg = <0x2da70000 0x10000>;
+ 				#clock-cells = <1>;
++				#reset-cells = <1>;
+ 			};
+ 		};
+ 
+diff --git a/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core.dtsi b/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core.dtsi
+index 7249871530ab9..5eecbefa8a336 100644
+--- a/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core.dtsi
++++ b/arch/arm64/boot/dts/rockchip/px30-engicam-px30-core.dtsi
+@@ -2,8 +2,8 @@
+ /*
+  * Copyright (c) 2020 Fuzhou Rockchip Electronics Co., Ltd
+  * Copyright (c) 2020 Engicam srl
+- * Copyright (c) 2020 Amarula Solutons
+- * Copyright (c) 2020 Amarula Solutons(India)
++ * Copyright (c) 2020 Amarula Solutions
++ * Copyright (c) 2020 Amarula Solutions(India)
+  */
+ 
+ #include <dt-bindings/gpio/gpio.h>
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
+index 31ebb4e5fd330..0f9cc042d9bf0 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-bob.dts
+@@ -88,3 +88,8 @@
+ 		};
+ 	};
+ };
++
++&wlan_host_wake_l {
++	/* Kevin has an external pull up, but Bob does not. */
++	rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_up>;
++};
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
+index 50d459ee4831c..af5810e5f5b79 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
++++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi
+@@ -244,6 +244,14 @@
+ &edp {
+ 	status = "okay";
+ 
++	/*
++	 * eDP PHY/clk don't sync reliably at anything other than 24 MHz. Only
++	 * set this here, because rk3399-gru.dtsi ensures we can generate this
++	 * off GPLL=600MHz, whereas some other RK3399 boards may not.
++	 */
++	assigned-clocks = <&cru PCLK_EDP>;
++	assigned-clock-rates = <24000000>;
++
+ 	ports {
+ 		edp_out: port@1 {
+ 			reg = <1>;
+@@ -578,6 +586,7 @@ ap_i2c_tp: &i2c5 {
+ 	};
+ 
+ 	wlan_host_wake_l: wlan-host-wake-l {
++		/* Kevin has an external pull up, but Bob does not */
+ 		rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>;
+ 	};
+ };
+diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
+index b1ac3a89f259c..aa3e21bd6c8f4 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
++++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi
+@@ -62,7 +62,6 @@
+ 	vcc5v0_host: vcc5v0-host-regulator {
+ 		compatible = "regulator-fixed";
+ 		gpio = <&gpio4 RK_PA3 GPIO_ACTIVE_LOW>;
+-		enable-active-low;
+ 		pinctrl-names = "default";
+ 		pinctrl-0 = <&vcc5v0_host_en>;
+ 		regulator-name = "vcc5v0_host";
+diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
+index fa953b7366421..fdbfdf3634e43 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
+@@ -163,7 +163,6 @@
+ 
+ 	vcc3v3_sd: vcc3v3_sd {
+ 		compatible = "regulator-fixed";
+-		enable-active-low;
+ 		gpio = <&gpio0 RK_PA5 GPIO_ACTIVE_LOW>;
+ 		pinctrl-names = "default";
+ 		pinctrl-0 = <&vcc_sd_h>;
+diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
+index 02d5f5a8ca036..528bb4e8ac776 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
+@@ -506,7 +506,7 @@
+ 	disable-wp;
+ 	pinctrl-names = "default";
+ 	pinctrl-0 = <&sdmmc0_bus4 &sdmmc0_clk &sdmmc0_cmd &sdmmc0_det>;
+-	sd-uhs-sdr104;
++	sd-uhs-sdr50;
+ 	vmmc-supply = <&vcc3v3_sd>;
+ 	vqmmc-supply = <&vccio_sd>;
+ 	status = "okay";
+diff --git a/arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts
+index 622be8be9813d..282f5c74d5cda 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts
+@@ -618,7 +618,7 @@
+ };
+ 
+ &usb2phy0_otg {
+-	vbus-supply = <&vcc5v0_usb_otg>;
++	phy-supply = <&vcc5v0_usb_otg>;
+ 	status = "okay";
+ };
+ 
+diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts
+index 0813c0c5abded..26912f02684ce 100644
+--- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts
++++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts
+@@ -543,7 +543,7 @@
+ };
+ 
+ &usb2phy0_otg {
+-	vbus-supply = <&vcc5v0_usb_otg>;
++	phy-supply = <&vcc5v0_usb_otg>;
+ 	status = "okay";
+ };
+ 
+diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
+index 707b5451929d4..d4abb948eb14e 100644
+--- a/arch/arm64/kernel/topology.c
++++ b/arch/arm64/kernel/topology.c
+@@ -251,7 +251,7 @@ static void amu_fie_setup(const struct cpumask *cpus)
+ 	for_each_cpu(cpu, cpus) {
+ 		if (!freq_counters_valid(cpu) ||
+ 		    freq_inv_set_max_ratio(cpu,
+-					   cpufreq_get_hw_max_freq(cpu) * 1000,
++					   cpufreq_get_hw_max_freq(cpu) * 1000ULL,
+ 					   arch_timer_get_rate()))
+ 			return;
+ 	}
+diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c
+index 7a623684d9b5e..2d5a0bcb0cec1 100644
+--- a/arch/mips/lantiq/clk.c
++++ b/arch/mips/lantiq/clk.c
+@@ -50,6 +50,7 @@ struct clk *clk_get_io(void)
+ {
+ 	return &cpu_clk_generic[2];
+ }
++EXPORT_SYMBOL_GPL(clk_get_io);
+ 
+ struct clk *clk_get_ppe(void)
+ {
+diff --git a/arch/mips/loongson32/common/platform.c b/arch/mips/loongson32/common/platform.c
+index 794c96c2a4cdd..311dc1580bbde 100644
+--- a/arch/mips/loongson32/common/platform.c
++++ b/arch/mips/loongson32/common/platform.c
+@@ -98,7 +98,7 @@ int ls1x_eth_mux_init(struct platform_device *pdev, void *priv)
+ 	if (plat_dat->bus_id) {
+ 		__raw_writel(__raw_readl(LS1X_MUX_CTRL0) | GMAC1_USE_UART1 |
+ 			     GMAC1_USE_UART0, LS1X_MUX_CTRL0);
+-		switch (plat_dat->interface) {
++		switch (plat_dat->phy_interface) {
+ 		case PHY_INTERFACE_MODE_RGMII:
+ 			val &= ~(GMAC1_USE_TXCLK | GMAC1_USE_PWM23);
+ 			break;
+@@ -107,12 +107,12 @@ int ls1x_eth_mux_init(struct platform_device *pdev, void *priv)
+ 			break;
+ 		default:
+ 			pr_err("unsupported mii mode %d\n",
+-			       plat_dat->interface);
++			       plat_dat->phy_interface);
+ 			return -ENOTSUPP;
+ 		}
+ 		val &= ~GMAC1_SHUT;
+ 	} else {
+-		switch (plat_dat->interface) {
++		switch (plat_dat->phy_interface) {
+ 		case PHY_INTERFACE_MODE_RGMII:
+ 			val &= ~(GMAC0_USE_TXCLK | GMAC0_USE_PWM01);
+ 			break;
+@@ -121,7 +121,7 @@ int ls1x_eth_mux_init(struct platform_device *pdev, void *priv)
+ 			break;
+ 		default:
+ 			pr_err("unsupported mii mode %d\n",
+-			       plat_dat->interface);
++			       plat_dat->phy_interface);
+ 			return -ENOTSUPP;
+ 		}
+ 		val &= ~GMAC0_SHUT;
+@@ -131,7 +131,7 @@ int ls1x_eth_mux_init(struct platform_device *pdev, void *priv)
+ 	plat_dat = dev_get_platdata(&pdev->dev);
+ 
+ 	val &= ~PHY_INTF_SELI;
+-	if (plat_dat->interface == PHY_INTERFACE_MODE_RMII)
++	if (plat_dat->phy_interface == PHY_INTERFACE_MODE_RMII)
+ 		val |= 0x4 << PHY_INTF_SELI_SHIFT;
+ 	__raw_writel(val, LS1X_MUX_CTRL1);
+ 
+@@ -146,9 +146,9 @@ static struct plat_stmmacenet_data ls1x_eth0_pdata = {
+ 	.bus_id			= 0,
+ 	.phy_addr		= -1,
+ #if defined(CONFIG_LOONGSON1_LS1B)
+-	.interface		= PHY_INTERFACE_MODE_MII,
++	.phy_interface		= PHY_INTERFACE_MODE_MII,
+ #elif defined(CONFIG_LOONGSON1_LS1C)
+-	.interface		= PHY_INTERFACE_MODE_RMII,
++	.phy_interface		= PHY_INTERFACE_MODE_RMII,
+ #endif
+ 	.mdio_bus_data		= &ls1x_mdio_bus_data,
+ 	.dma_cfg		= &ls1x_eth_dma_cfg,
+@@ -186,7 +186,7 @@ struct platform_device ls1x_eth0_pdev = {
+ static struct plat_stmmacenet_data ls1x_eth1_pdata = {
+ 	.bus_id			= 1,
+ 	.phy_addr		= -1,
+-	.interface		= PHY_INTERFACE_MODE_MII,
++	.phy_interface		= PHY_INTERFACE_MODE_MII,
+ 	.mdio_bus_data		= &ls1x_mdio_bus_data,
+ 	.dma_cfg		= &ls1x_eth_dma_cfg,
+ 	.has_gmac		= 1,
+diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
+index fcbb81feb7ad8..1f02f15569749 100644
+--- a/arch/riscv/Kconfig
++++ b/arch/riscv/Kconfig
+@@ -361,6 +361,7 @@ config RISCV_ISA_C
+ config RISCV_ISA_SVPBMT
+ 	bool "SVPBMT extension support"
+ 	depends on 64BIT && MMU
++	depends on !XIP_KERNEL
+ 	select RISCV_ALTERNATIVE
+ 	default y
+ 	help
+diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
+index 5a2de6b6f8822..5c591123c4409 100644
+--- a/arch/riscv/kernel/signal.c
++++ b/arch/riscv/kernel/signal.c
+@@ -124,6 +124,8 @@ SYSCALL_DEFINE0(rt_sigreturn)
+ 	if (restore_altstack(&frame->uc.uc_stack))
+ 		goto badframe;
+ 
++	regs->cause = -1UL;
++
+ 	return regs->a0;
+ 
+ badframe:
+diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
+index e0de60e503b98..d9e023c78f568 100644
+--- a/arch/um/kernel/um_arch.c
++++ b/arch/um/kernel/um_arch.c
+@@ -33,7 +33,7 @@
+ #include "um_arch.h"
+ 
+ #define DEFAULT_COMMAND_LINE_ROOT "root=98:0"
+-#define DEFAULT_COMMAND_LINE_CONSOLE "console=tty"
++#define DEFAULT_COMMAND_LINE_CONSOLE "console=tty0"
+ 
+ /* Changed in add_arg and setup_arch, which run before SMP is started */
+ static char __initdata command_line[COMMAND_LINE_SIZE] = { 0 };
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index 4c0e812f2f044..19c04412f6e16 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -713,6 +713,7 @@ struct kvm_vcpu_arch {
+ 	struct fpu_guest guest_fpu;
+ 
+ 	u64 xcr0;
++	u64 guest_supported_xcr0;
+ 
+ 	struct kvm_pio_request pio;
+ 	void *pio_data;
+diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
+index de6d44e07e348..3ab498165639f 100644
+--- a/arch/x86/kvm/cpuid.c
++++ b/arch/x86/kvm/cpuid.c
+@@ -283,7 +283,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+ {
+ 	struct kvm_lapic *apic = vcpu->arch.apic;
+ 	struct kvm_cpuid_entry2 *best;
+-	u64 guest_supported_xcr0;
+ 
+ 	best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ 	if (best && apic) {
+@@ -295,10 +294,16 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+ 		kvm_apic_set_version(vcpu);
+ 	}
+ 
+-	guest_supported_xcr0 =
++	vcpu->arch.guest_supported_xcr0 =
+ 		cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent);
+ 
+-	vcpu->arch.guest_fpu.fpstate->user_xfeatures = guest_supported_xcr0;
++	/*
++	 * FP+SSE can always be saved/restored via KVM_{G,S}ET_XSAVE, even if
++	 * XSAVE/XCRO are not exposed to the guest, and even if XSAVE isn't
++	 * supported by the host.
++	 */
++	vcpu->arch.guest_fpu.fpstate->user_xfeatures = vcpu->arch.guest_supported_xcr0 |
++						       XFEATURE_MASK_FPSSE;
+ 
+ 	kvm_update_pv_runtime(vcpu);
+ 
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index 09fa8a94807bf..0c4a866813b31 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -4134,6 +4134,9 @@ static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
+ {
+ 	u32 eax, ecx, edx;
+ 
++	if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSXSAVE))
++		return emulate_ud(ctxt);
++
+ 	eax = reg_read(ctxt, VCPU_REGS_RAX);
+ 	edx = reg_read(ctxt, VCPU_REGS_RDX);
+ 	ecx = reg_read(ctxt, VCPU_REGS_RCX);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 5b36866528568..8c2815151864b 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1025,15 +1025,10 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
+ }
+ EXPORT_SYMBOL_GPL(kvm_load_host_xsave_state);
+ 
+-static inline u64 kvm_guest_supported_xcr0(struct kvm_vcpu *vcpu)
+-{
+-	return vcpu->arch.guest_fpu.fpstate->user_xfeatures;
+-}
+-
+ #ifdef CONFIG_X86_64
+ static inline u64 kvm_guest_supported_xfd(struct kvm_vcpu *vcpu)
+ {
+-	return kvm_guest_supported_xcr0(vcpu) & XFEATURE_MASK_USER_DYNAMIC;
++	return vcpu->arch.guest_supported_xcr0 & XFEATURE_MASK_USER_DYNAMIC;
+ }
+ #endif
+ 
+@@ -1056,7 +1051,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
+ 	 * saving.  However, xcr0 bit 0 is always set, even if the
+ 	 * emulated CPU does not support XSAVE (see kvm_vcpu_reset()).
+ 	 */
+-	valid_bits = kvm_guest_supported_xcr0(vcpu) | XFEATURE_MASK_FP;
++	valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
+ 	if (xcr0 & ~valid_bits)
+ 		return 1;
+ 
+@@ -1084,6 +1079,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
+ 
+ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
+ {
++	/* Note, #UD due to CR4.OSXSAVE=0 has priority over the intercept. */
+ 	if (static_call(kvm_x86_get_cpl)(vcpu) != 0 ||
+ 	    __kvm_set_xcr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu))) {
+ 		kvm_inject_gp(vcpu, 0);
+diff --git a/block/blk-core.c b/block/blk-core.c
+index cc6fbcb6d2521..7743c68177e89 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -284,49 +284,6 @@ void blk_queue_start_drain(struct request_queue *q)
+ 	wake_up_all(&q->mq_freeze_wq);
+ }
+ 
+-/**
+- * blk_cleanup_queue - shutdown a request queue
+- * @q: request queue to shutdown
+- *
+- * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and
+- * put it.  All future requests will be failed immediately with -ENODEV.
+- *
+- * Context: can sleep
+- */
+-void blk_cleanup_queue(struct request_queue *q)
+-{
+-	/* cannot be called from atomic context */
+-	might_sleep();
+-
+-	WARN_ON_ONCE(blk_queue_registered(q));
+-
+-	/* mark @q DYING, no new request or merges will be allowed afterwards */
+-	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
+-	blk_queue_start_drain(q);
+-
+-	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
+-	blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
+-
+-	/*
+-	 * Drain all requests queued before DYING marking. Set DEAD flag to
+-	 * prevent that blk_mq_run_hw_queues() accesses the hardware queues
+-	 * after draining finished.
+-	 */
+-	blk_freeze_queue(q);
+-
+-	blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
+-
+-	blk_sync_queue(q);
+-	if (queue_is_mq(q)) {
+-		blk_mq_cancel_work_sync(q);
+-		blk_mq_exit_queue(q);
+-	}
+-
+-	/* @q is and will stay empty, shutdown and put */
+-	blk_put_queue(q);
+-}
+-EXPORT_SYMBOL(blk_cleanup_queue);
+-
+ /**
+  * blk_queue_enter() - try to increase q->q_usage_counter
+  * @q: request queue pointer
+diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
+index 61f179e5f151a..28adb01f64419 100644
+--- a/block/blk-mq-debugfs.c
++++ b/block/blk-mq-debugfs.c
+@@ -116,7 +116,6 @@ static const char *const blk_queue_flag_name[] = {
+ 	QUEUE_FLAG_NAME(NOXMERGES),
+ 	QUEUE_FLAG_NAME(ADD_RANDOM),
+ 	QUEUE_FLAG_NAME(SAME_FORCE),
+-	QUEUE_FLAG_NAME(DEAD),
+ 	QUEUE_FLAG_NAME(INIT_DONE),
+ 	QUEUE_FLAG_NAME(STABLE_WRITES),
+ 	QUEUE_FLAG_NAME(POLL),
+@@ -151,11 +150,10 @@ static ssize_t queue_state_write(void *data, const char __user *buf,
+ 	char opbuf[16] = { }, *op;
+ 
+ 	/*
+-	 * The "state" attribute is removed after blk_cleanup_queue() has called
+-	 * blk_mq_free_queue(). Return if QUEUE_FLAG_DEAD has been set to avoid
+-	 * triggering a use-after-free.
++	 * The "state" attribute is removed when the queue is removed.  Don't
++	 * allow setting the state on a dying queue to avoid a use-after-free.
+ 	 */
+-	if (blk_queue_dead(q))
++	if (blk_queue_dying(q))
+ 		return -ENOENT;
+ 
+ 	if (count >= sizeof(opbuf)) {
+diff --git a/block/blk-mq.c b/block/blk-mq.c
+index 0a299941c622e..69d0a58f9e2f1 100644
+--- a/block/blk-mq.c
++++ b/block/blk-mq.c
+@@ -3896,7 +3896,7 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
+ 	q->queuedata = queuedata;
+ 	ret = blk_mq_init_allocated_queue(set, q);
+ 	if (ret) {
+-		blk_cleanup_queue(q);
++		blk_put_queue(q);
+ 		return ERR_PTR(ret);
+ 	}
+ 	return q;
+@@ -3908,6 +3908,35 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
+ }
+ EXPORT_SYMBOL(blk_mq_init_queue);
+ 
++/**
++ * blk_mq_destroy_queue - shutdown a request queue
++ * @q: request queue to shutdown
++ *
++ * This shuts down a request queue allocated by blk_mq_init_queue() and drops
++ * the initial reference.  All future requests will failed with -ENODEV.
++ *
++ * Context: can sleep
++ */
++void blk_mq_destroy_queue(struct request_queue *q)
++{
++	WARN_ON_ONCE(!queue_is_mq(q));
++	WARN_ON_ONCE(blk_queue_registered(q));
++
++	might_sleep();
++
++	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
++	blk_queue_start_drain(q);
++	blk_freeze_queue(q);
++
++	blk_sync_queue(q);
++	blk_mq_cancel_work_sync(q);
++	blk_mq_exit_queue(q);
++
++	/* @q is and will stay empty, shutdown and put */
++	blk_put_queue(q);
++}
++EXPORT_SYMBOL(blk_mq_destroy_queue);
++
+ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
+ 		struct lock_class_key *lkclass)
+ {
+@@ -3920,13 +3949,23 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
+ 
+ 	disk = __alloc_disk_node(q, set->numa_node, lkclass);
+ 	if (!disk) {
+-		blk_cleanup_queue(q);
++		blk_mq_destroy_queue(q);
+ 		return ERR_PTR(-ENOMEM);
+ 	}
++	set_bit(GD_OWNS_QUEUE, &disk->state);
+ 	return disk;
+ }
+ EXPORT_SYMBOL(__blk_mq_alloc_disk);
+ 
++struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q,
++		struct lock_class_key *lkclass)
++{
++	if (!blk_get_queue(q))
++		return NULL;
++	return __alloc_disk_node(q, NUMA_NO_NODE, lkclass);
++}
++EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue);
++
+ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
+ 		struct blk_mq_tag_set *set, struct request_queue *q,
+ 		int hctx_idx, int node)
+diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
+index 9b905e9443e49..84d7f87015673 100644
+--- a/block/blk-sysfs.c
++++ b/block/blk-sysfs.c
+@@ -748,11 +748,6 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
+  * decremented with blk_put_queue(). Once the refcount reaches 0 this function
+  * is called.
+  *
+- * For drivers that have a request_queue on a gendisk and added with
+- * __device_add_disk() the refcount to request_queue will reach 0 with
+- * the last put_disk() called by the driver. For drivers which don't use
+- * __device_add_disk() this happens with blk_cleanup_queue().
+- *
+  * Drivers exist which depend on the release of the request_queue to be
+  * synchronous, it should not be deferred.
+  *
+diff --git a/block/blk.h b/block/blk.h
+index 434017701403f..0d6668663ab5d 100644
+--- a/block/blk.h
++++ b/block/blk.h
+@@ -411,6 +411,9 @@ int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start,
+ 		sector_t length);
+ void blk_drop_partitions(struct gendisk *disk);
+ 
++struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
++		struct lock_class_key *lkclass);
++
+ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
+ 		struct page *page, unsigned int len, unsigned int offset,
+ 		unsigned int max_sectors, bool *same_page);
+diff --git a/block/bsg-lib.c b/block/bsg-lib.c
+index acfe1357bf6c4..fd4cd5e682826 100644
+--- a/block/bsg-lib.c
++++ b/block/bsg-lib.c
+@@ -324,7 +324,7 @@ void bsg_remove_queue(struct request_queue *q)
+ 			container_of(q->tag_set, struct bsg_set, tag_set);
+ 
+ 		bsg_unregister_queue(bset->bd);
+-		blk_cleanup_queue(q);
++		blk_mq_destroy_queue(q);
+ 		blk_mq_free_tag_set(&bset->tag_set);
+ 		kfree(bset);
+ 	}
+@@ -399,7 +399,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
+ 
+ 	return q;
+ out_cleanup_queue:
+-	blk_cleanup_queue(q);
++	blk_mq_destroy_queue(q);
+ out_queue:
+ 	blk_mq_free_tag_set(set);
+ out_tag_set:
+diff --git a/block/genhd.c b/block/genhd.c
+index 278227ba1d531..a39c416d658fd 100644
+--- a/block/genhd.c
++++ b/block/genhd.c
+@@ -617,13 +617,14 @@ void del_gendisk(struct gendisk *disk)
+ 	 * Fail any new I/O.
+ 	 */
+ 	set_bit(GD_DEAD, &disk->state);
++	if (test_bit(GD_OWNS_QUEUE, &disk->state))
++		blk_queue_flag_set(QUEUE_FLAG_DYING, q);
+ 	set_capacity(disk, 0);
+ 
+ 	/*
+ 	 * Prevent new I/O from crossing bio_queue_enter().
+ 	 */
+ 	blk_queue_start_drain(q);
+-	blk_mq_freeze_queue_wait(q);
+ 
+ 	if (!(disk->flags & GENHD_FL_HIDDEN)) {
+ 		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
+@@ -647,6 +648,8 @@ void del_gendisk(struct gendisk *disk)
+ 	pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
+ 	device_del(disk_to_dev(disk));
+ 
++	blk_mq_freeze_queue_wait(q);
++
+ 	blk_throtl_cancel_bios(disk->queue);
+ 
+ 	blk_sync_queue(q);
+@@ -663,11 +666,16 @@ void del_gendisk(struct gendisk *disk)
+ 	blk_mq_unquiesce_queue(q);
+ 
+ 	/*
+-	 * Allow using passthrough request again after the queue is torn down.
++	 * If the disk does not own the queue, allow using passthrough requests
++	 * again.  Else leave the queue frozen to fail all I/O.
+ 	 */
+-	blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
+-	__blk_mq_unfreeze_queue(q, true);
+-
++	if (!test_bit(GD_OWNS_QUEUE, &disk->state)) {
++		blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
++		__blk_mq_unfreeze_queue(q, true);
++	} else {
++		if (queue_is_mq(q))
++			blk_mq_exit_queue(q);
++	}
+ }
+ EXPORT_SYMBOL(del_gendisk);
+ 
+@@ -1151,6 +1159,18 @@ static void disk_release(struct device *dev)
+ 	might_sleep();
+ 	WARN_ON_ONCE(disk_live(disk));
+ 
++	/*
++	 * To undo the all initialization from blk_mq_init_allocated_queue in
++	 * case of a probe failure where add_disk is never called we have to
++	 * call blk_mq_exit_queue here. We can't do this for the more common
++	 * teardown case (yet) as the tagset can be gone by the time the disk
++	 * is released once it was added.
++	 */
++	if (queue_is_mq(disk->queue) &&
++	    test_bit(GD_OWNS_QUEUE, &disk->state) &&
++	    !test_bit(GD_ADDED, &disk->state))
++		blk_mq_exit_queue(disk->queue);
++
+ 	blkcg_exit_queue(disk->queue);
+ 
+ 	disk_release_events(disk);
+@@ -1338,12 +1358,9 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
+ {
+ 	struct gendisk *disk;
+ 
+-	if (!blk_get_queue(q))
+-		return NULL;
+-
+ 	disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
+ 	if (!disk)
+-		goto out_put_queue;
++		return NULL;
+ 
+ 	disk->bdi = bdi_alloc(node_id);
+ 	if (!disk->bdi)
+@@ -1387,11 +1404,8 @@ out_free_bdi:
+ 	bdi_put(disk->bdi);
+ out_free_disk:
+ 	kfree(disk);
+-out_put_queue:
+-	blk_put_queue(q);
+ 	return NULL;
+ }
+-EXPORT_SYMBOL(__alloc_disk_node);
+ 
+ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
+ {
+@@ -1404,9 +1418,10 @@ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
+ 
+ 	disk = __alloc_disk_node(q, node, lkclass);
+ 	if (!disk) {
+-		blk_cleanup_queue(q);
++		blk_put_queue(q);
+ 		return NULL;
+ 	}
++	set_bit(GD_OWNS_QUEUE, &disk->state);
+ 	return disk;
+ }
+ EXPORT_SYMBOL(__blk_alloc_disk);
+@@ -1418,6 +1433,9 @@ EXPORT_SYMBOL(__blk_alloc_disk);
+  * This decrements the refcount for the struct gendisk. When this reaches 0
+  * we'll have disk_release() called.
+  *
++ * Note: for blk-mq disk put_disk must be called before freeing the tag_set
++ * when handling probe errors (that is before add_disk() is called).
++ *
+  * Context: Any context, but the last reference must not be dropped from
+  *          atomic context.
+  */
+@@ -1439,7 +1457,6 @@ EXPORT_SYMBOL(put_disk);
+  */
+ void blk_cleanup_disk(struct gendisk *disk)
+ {
+-	blk_cleanup_queue(disk->queue);
+ 	put_disk(disk);
+ }
+ EXPORT_SYMBOL(blk_cleanup_disk);
+diff --git a/certs/Kconfig b/certs/Kconfig
+index bf9b511573d75..1f109b0708778 100644
+--- a/certs/Kconfig
++++ b/certs/Kconfig
+@@ -43,7 +43,7 @@ config SYSTEM_TRUSTED_KEYRING
+ 	bool "Provide system-wide ring of trusted keys"
+ 	depends on KEYS
+ 	depends on ASYMMETRIC_KEY_TYPE
+-	depends on X509_CERTIFICATE_PARSER
++	depends on X509_CERTIFICATE_PARSER = y
+ 	help
+ 	  Provide a system keyring to which trusted keys can be added.  Keys in
+ 	  the keyring are considered to be trusted.  Keys may be added at will
+diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
+index e232cc4fd444b..c6e41ee18aaa2 100644
+--- a/drivers/block/ataflop.c
++++ b/drivers/block/ataflop.c
+@@ -2045,7 +2045,6 @@ static void atari_floppy_cleanup(void)
+ 			if (!unit[i].disk[type])
+ 				continue;
+ 			del_gendisk(unit[i].disk[type]);
+-			blk_cleanup_queue(unit[i].disk[type]->queue);
+ 			put_disk(unit[i].disk[type]);
+ 		}
+ 		blk_mq_free_tag_set(&unit[i].tag_set);
+diff --git a/drivers/block/loop.c b/drivers/block/loop.c
+index a59910ef948e9..1c036ef686fbb 100644
+--- a/drivers/block/loop.c
++++ b/drivers/block/loop.c
+@@ -2062,7 +2062,6 @@ static void loop_remove(struct loop_device *lo)
+ {
+ 	/* Make this loop device unreachable from pathname. */
+ 	del_gendisk(lo->lo_disk);
+-	blk_cleanup_queue(lo->lo_disk->queue);
+ 	blk_mq_free_tag_set(&lo->tag_set);
+ 
+ 	mutex_lock(&loop_ctl_mutex);
+diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
+index 6699e4b2f7f43..06994a35acc7a 100644
+--- a/drivers/block/mtip32xx/mtip32xx.c
++++ b/drivers/block/mtip32xx/mtip32xx.c
+@@ -3677,7 +3677,6 @@ static int mtip_block_shutdown(struct driver_data *dd)
+ 	if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag))
+ 		del_gendisk(dd->disk);
+ 
+-	blk_cleanup_queue(dd->queue);
+ 	blk_mq_free_tag_set(&dd->tags);
+ 	put_disk(dd->disk);
+ 	return 0;
+@@ -4040,7 +4039,6 @@ static void mtip_pci_remove(struct pci_dev *pdev)
+ 		dev_info(&dd->pdev->dev, "device %s surprise removal\n",
+ 						dd->disk->disk_name);
+ 
+-	blk_cleanup_queue(dd->queue);
+ 	blk_mq_free_tag_set(&dd->tags);
+ 
+ 	/* De-initialize the protocol layer. */
+diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
+index 409c76b81aed4..a4470374f54fc 100644
+--- a/drivers/block/rnbd/rnbd-clt.c
++++ b/drivers/block/rnbd/rnbd-clt.c
+@@ -1755,7 +1755,7 @@ static void rnbd_destroy_sessions(void)
+ 		list_for_each_entry_safe(dev, tn, &sess->devs_list, list) {
+ 			/*
+ 			 * Here unmap happens in parallel for only one reason:
+-			 * blk_cleanup_queue() takes around half a second, so
++			 * del_gendisk() takes around half a second, so
+ 			 * on huge amount of devices the whole module unload
+ 			 * procedure takes minutes.
+ 			 */
+diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
+index 63b4f6431d2e6..75057dbbcfbea 100644
+--- a/drivers/block/sx8.c
++++ b/drivers/block/sx8.c
+@@ -1536,7 +1536,7 @@ err_out_free_majors:
+ 		clear_bit(0, &carm_major_alloc);
+ 	else if (host->major == 161)
+ 		clear_bit(1, &carm_major_alloc);
+-	blk_cleanup_queue(host->oob_q);
++	blk_mq_destroy_queue(host->oob_q);
+ 	blk_mq_free_tag_set(&host->tag_set);
+ err_out_dma_free:
+ 	dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
+@@ -1570,7 +1570,7 @@ static void carm_remove_one (struct pci_dev *pdev)
+ 		clear_bit(0, &carm_major_alloc);
+ 	else if (host->major == 161)
+ 		clear_bit(1, &carm_major_alloc);
+-	blk_cleanup_queue(host->oob_q);
++	blk_mq_destroy_queue(host->oob_q);
+ 	blk_mq_free_tag_set(&host->tag_set);
+ 	dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
+ 	iounmap(host->mmio);
+diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
+index d756423e0059a..59d6d5faf7396 100644
+--- a/drivers/block/virtio_blk.c
++++ b/drivers/block/virtio_blk.c
+@@ -1107,7 +1107,6 @@ static void virtblk_remove(struct virtio_device *vdev)
+ 	flush_work(&vblk->config_work);
+ 
+ 	del_gendisk(vblk->disk);
+-	blk_cleanup_queue(vblk->disk->queue);
+ 	blk_mq_free_tag_set(&vblk->tag_set);
+ 
+ 	mutex_lock(&vblk->vdev_mutex);
+diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
+index 7a6ed83481b8d..18ad43d9933ec 100644
+--- a/drivers/block/z2ram.c
++++ b/drivers/block/z2ram.c
+@@ -384,7 +384,6 @@ static void __exit z2_exit(void)
+ 
+ 	for (i = 0; i < Z2MINOR_COUNT; i++) {
+ 		del_gendisk(z2ram_gendisk[i]);
+-		blk_cleanup_queue(z2ram_gendisk[i]->queue);
+ 		put_disk(z2ram_gendisk[i]);
+ 	}
+ 	blk_mq_free_tag_set(&tag_set);
+diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
+index 8e78b37d0f6a4..f4cc90ea6198e 100644
+--- a/drivers/cdrom/gdrom.c
++++ b/drivers/cdrom/gdrom.c
+@@ -831,7 +831,6 @@ probe_fail_no_mem:
+ 
+ static int remove_gdrom(struct platform_device *devptr)
+ {
+-	blk_cleanup_queue(gd.gdrom_rq);
+ 	blk_mq_free_tag_set(&gd.tag_set);
+ 	free_irq(HW_EVENT_GDROM_CMD, &gd);
+ 	free_irq(HW_EVENT_GDROM_DMA, &gd);
+diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
+index cb6401c9e9a4f..acf31cc1dbcca 100644
+--- a/drivers/dax/hmem/device.c
++++ b/drivers/dax/hmem/device.c
+@@ -15,6 +15,7 @@ void hmem_register_device(int target_nid, struct resource *r)
+ 		.start = r->start,
+ 		.end = r->end,
+ 		.flags = IORESOURCE_MEM,
++		.desc = IORES_DESC_SOFT_RESERVED,
+ 	};
+ 	struct platform_device *pdev;
+ 	struct memregion_info info;
+diff --git a/drivers/dma/ti/k3-udma-private.c b/drivers/dma/ti/k3-udma-private.c
+index d4f1e4e9603a4..85e00701473cb 100644
+--- a/drivers/dma/ti/k3-udma-private.c
++++ b/drivers/dma/ti/k3-udma-private.c
+@@ -31,14 +31,14 @@ struct udma_dev *of_xudma_dev_get(struct device_node *np, const char *property)
+ 	}
+ 
+ 	pdev = of_find_device_by_node(udma_node);
++	if (np != udma_node)
++		of_node_put(udma_node);
++
+ 	if (!pdev) {
+ 		pr_debug("UDMA device not found\n");
+ 		return ERR_PTR(-EPROBE_DEFER);
+ 	}
+ 
+-	if (np != udma_node)
+-		of_node_put(udma_node);
+-
+ 	ud = platform_get_drvdata(pdev);
+ 	if (!ud) {
+ 		pr_debug("UDMA has not been probed\n");
+diff --git a/drivers/firmware/arm_scmi/reset.c b/drivers/firmware/arm_scmi/reset.c
+index 673f3eb498f43..e9afa8cab7309 100644
+--- a/drivers/firmware/arm_scmi/reset.c
++++ b/drivers/firmware/arm_scmi/reset.c
+@@ -166,9 +166,13 @@ static int scmi_domain_reset(const struct scmi_protocol_handle *ph, u32 domain,
+ 	struct scmi_xfer *t;
+ 	struct scmi_msg_reset_domain_reset *dom;
+ 	struct scmi_reset_info *pi = ph->get_priv(ph);
+-	struct reset_dom_info *rdom = pi->dom_info + domain;
++	struct reset_dom_info *rdom;
+ 
+-	if (rdom->async_reset)
++	if (domain >= pi->num_domains)
++		return -EINVAL;
++
++	rdom = pi->dom_info + domain;
++	if (rdom->async_reset && flags & AUTONOMOUS_RESET)
+ 		flags |= ASYNCHRONOUS_RESET;
+ 
+ 	ret = ph->xops->xfer_get_init(ph, RESET, sizeof(*dom), 0, &t);
+@@ -180,7 +184,7 @@ static int scmi_domain_reset(const struct scmi_protocol_handle *ph, u32 domain,
+ 	dom->flags = cpu_to_le32(flags);
+ 	dom->reset_state = cpu_to_le32(state);
+ 
+-	if (rdom->async_reset)
++	if (flags & ASYNCHRONOUS_RESET)
+ 		ret = ph->xops->do_xfer_with_response(ph, t);
+ 	else
+ 		ret = ph->xops->do_xfer(ph, t);
+diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c
+index 8a18930f3eb69..516f4f0069bd2 100644
+--- a/drivers/firmware/efi/libstub/secureboot.c
++++ b/drivers/firmware/efi/libstub/secureboot.c
+@@ -14,7 +14,7 @@
+ 
+ /* SHIM variables */
+ static const efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID;
+-static const efi_char16_t shim_MokSBState_name[] = L"MokSBState";
++static const efi_char16_t shim_MokSBState_name[] = L"MokSBStateRT";
+ 
+ static efi_status_t get_var(efi_char16_t *name, efi_guid_t *vendor, u32 *attr,
+ 			    unsigned long *data_size, void *data)
+@@ -43,8 +43,8 @@ enum efi_secureboot_mode efi_get_secureboot(void)
+ 
+ 	/*
+ 	 * See if a user has put the shim into insecure mode. If so, and if the
+-	 * variable doesn't have the runtime attribute set, we might as well
+-	 * honor that.
++	 * variable doesn't have the non-volatile attribute set, we might as
++	 * well honor that.
+ 	 */
+ 	size = sizeof(moksbstate);
+ 	status = get_efi_var(shim_MokSBState_name, &shim_guid,
+@@ -53,7 +53,7 @@ enum efi_secureboot_mode efi_get_secureboot(void)
+ 	/* If it fails, we don't care why. Default to secure */
+ 	if (status != EFI_SUCCESS)
+ 		goto secure_boot_enabled;
+-	if (!(attr & EFI_VARIABLE_RUNTIME_ACCESS) && moksbstate == 1)
++	if (!(attr & EFI_VARIABLE_NON_VOLATILE) && moksbstate == 1)
+ 		return efi_secureboot_mode_disabled;
+ 
+ secure_boot_enabled:
+diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
+index 05ae8bcc9d671..9780f32a9f243 100644
+--- a/drivers/firmware/efi/libstub/x86-stub.c
++++ b/drivers/firmware/efi/libstub/x86-stub.c
+@@ -517,6 +517,13 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
+ 	hdr->ramdisk_image = 0;
+ 	hdr->ramdisk_size = 0;
+ 
++	/*
++	 * Disregard any setup data that was provided by the bootloader:
++	 * setup_data could be pointing anywhere, and we have no way of
++	 * authenticating or validating the payload.
++	 */
++	hdr->setup_data = 0;
++
+ 	efi_stub_entry(handle, sys_table_arg, boot_params);
+ 	/* not reached */
+ 
+diff --git a/drivers/gpio/gpio-ixp4xx.c b/drivers/gpio/gpio-ixp4xx.c
+index 312309be0287d..56656fb519f85 100644
+--- a/drivers/gpio/gpio-ixp4xx.c
++++ b/drivers/gpio/gpio-ixp4xx.c
+@@ -63,6 +63,14 @@ static void ixp4xx_gpio_irq_ack(struct irq_data *d)
+ 	__raw_writel(BIT(d->hwirq), g->base + IXP4XX_REG_GPIS);
+ }
+ 
++static void ixp4xx_gpio_mask_irq(struct irq_data *d)
++{
++	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
++
++	irq_chip_mask_parent(d);
++	gpiochip_disable_irq(gc, d->hwirq);
++}
++
+ static void ixp4xx_gpio_irq_unmask(struct irq_data *d)
+ {
+ 	struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+@@ -72,6 +80,7 @@ static void ixp4xx_gpio_irq_unmask(struct irq_data *d)
+ 	if (!(g->irq_edge & BIT(d->hwirq)))
+ 		ixp4xx_gpio_irq_ack(d);
+ 
++	gpiochip_enable_irq(gc, d->hwirq);
+ 	irq_chip_unmask_parent(d);
+ }
+ 
+@@ -149,12 +158,14 @@ static int ixp4xx_gpio_irq_set_type(struct irq_data *d, unsigned int type)
+ 	return irq_chip_set_type_parent(d, IRQ_TYPE_LEVEL_HIGH);
+ }
+ 
+-static struct irq_chip ixp4xx_gpio_irqchip = {
++static const struct irq_chip ixp4xx_gpio_irqchip = {
+ 	.name = "IXP4GPIO",
+ 	.irq_ack = ixp4xx_gpio_irq_ack,
+-	.irq_mask = irq_chip_mask_parent,
++	.irq_mask = ixp4xx_gpio_mask_irq,
+ 	.irq_unmask = ixp4xx_gpio_irq_unmask,
+ 	.irq_set_type = ixp4xx_gpio_irq_set_type,
++	.flags = IRQCHIP_IMMUTABLE,
++	GPIOCHIP_IRQ_RESOURCE_HELPERS,
+ };
+ 
+ static int ixp4xx_gpio_child_to_parent_hwirq(struct gpio_chip *gc,
+@@ -263,7 +274,7 @@ static int ixp4xx_gpio_probe(struct platform_device *pdev)
+ 	g->gc.owner = THIS_MODULE;
+ 
+ 	girq = &g->gc.irq;
+-	girq->chip = &ixp4xx_gpio_irqchip;
++	gpio_irq_chip_set_chip(girq, &ixp4xx_gpio_irqchip);
+ 	girq->fwnode = g->fwnode;
+ 	girq->parent_domain = parent;
+ 	girq->child_to_parent_hwirq = ixp4xx_gpio_child_to_parent_hwirq;
+diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
+index a2e505a7545cd..523dfd17dd922 100644
+--- a/drivers/gpio/gpio-mockup.c
++++ b/drivers/gpio/gpio-mockup.c
+@@ -533,8 +533,10 @@ static int __init gpio_mockup_register_chip(int idx)
+ 	}
+ 
+ 	fwnode = fwnode_create_software_node(properties, NULL);
+-	if (IS_ERR(fwnode))
++	if (IS_ERR(fwnode)) {
++		kfree_strarray(line_names, ngpio);
+ 		return PTR_ERR(fwnode);
++	}
+ 
+ 	pdevinfo.name = "gpio-mockup";
+ 	pdevinfo.id = idx;
+@@ -597,9 +599,9 @@ static int __init gpio_mockup_init(void)
+ 
+ static void __exit gpio_mockup_exit(void)
+ {
++	gpio_mockup_unregister_pdevs();
+ 	debugfs_remove_recursive(gpio_mockup_dbg_dir);
+ 	platform_driver_unregister(&gpio_mockup_driver);
+-	gpio_mockup_unregister_pdevs();
+ }
+ 
+ module_init(gpio_mockup_init);
+diff --git a/drivers/gpio/gpio-mt7621.c b/drivers/gpio/gpio-mt7621.c
+index d8a26e503ca5d..f163f5ca857be 100644
+--- a/drivers/gpio/gpio-mt7621.c
++++ b/drivers/gpio/gpio-mt7621.c
+@@ -112,6 +112,8 @@ mediatek_gpio_irq_unmask(struct irq_data *d)
+ 	unsigned long flags;
+ 	u32 rise, fall, high, low;
+ 
++	gpiochip_enable_irq(gc, d->hwirq);
++
+ 	spin_lock_irqsave(&rg->lock, flags);
+ 	rise = mtk_gpio_r32(rg, GPIO_REG_REDGE);
+ 	fall = mtk_gpio_r32(rg, GPIO_REG_FEDGE);
+@@ -143,6 +145,8 @@ mediatek_gpio_irq_mask(struct irq_data *d)
+ 	mtk_gpio_w32(rg, GPIO_REG_HLVL, high & ~BIT(pin));
+ 	mtk_gpio_w32(rg, GPIO_REG_LLVL, low & ~BIT(pin));
+ 	spin_unlock_irqrestore(&rg->lock, flags);
++
++	gpiochip_disable_irq(gc, d->hwirq);
+ }
+ 
+ static int
+@@ -204,6 +208,16 @@ mediatek_gpio_xlate(struct gpio_chip *chip,
+ 	return gpio % MTK_BANK_WIDTH;
+ }
+ 
++static const struct irq_chip mt7621_irq_chip = {
++	.name		= "mt7621-gpio",
++	.irq_mask_ack	= mediatek_gpio_irq_mask,
++	.irq_mask	= mediatek_gpio_irq_mask,
++	.irq_unmask	= mediatek_gpio_irq_unmask,
++	.irq_set_type	= mediatek_gpio_irq_type,
++	.flags		= IRQCHIP_IMMUTABLE,
++	GPIOCHIP_IRQ_RESOURCE_HELPERS,
++};
++
+ static int
+ mediatek_gpio_bank_probe(struct device *dev, int bank)
+ {
+@@ -238,11 +252,6 @@ mediatek_gpio_bank_probe(struct device *dev, int bank)
+ 		return -ENOMEM;
+ 
+ 	rg->chip.offset = bank * MTK_BANK_WIDTH;
+-	rg->irq_chip.name = dev_name(dev);
+-	rg->irq_chip.irq_unmask = mediatek_gpio_irq_unmask;
+-	rg->irq_chip.irq_mask = mediatek_gpio_irq_mask;
+-	rg->irq_chip.irq_mask_ack = mediatek_gpio_irq_mask;
+-	rg->irq_chip.irq_set_type = mediatek_gpio_irq_type;
+ 
+ 	if (mtk->gpio_irq) {
+ 		struct gpio_irq_chip *girq;
+@@ -262,7 +271,7 @@ mediatek_gpio_bank_probe(struct device *dev, int bank)
+ 		}
+ 
+ 		girq = &rg->chip.irq;
+-		girq->chip = &rg->irq_chip;
++		gpio_irq_chip_set_chip(girq, &mt7621_irq_chip);
+ 		/* This will let us handle the parent IRQ in the driver */
+ 		girq->parent_handler = NULL;
+ 		girq->num_parents = 0;
+diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c
+index fa4bc7481f9a6..e739dcea61b23 100644
+--- a/drivers/gpio/gpio-tqmx86.c
++++ b/drivers/gpio/gpio-tqmx86.c
+@@ -307,6 +307,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
+ 		girq->default_type = IRQ_TYPE_NONE;
+ 		girq->handler = handle_simple_irq;
+ 		girq->init_valid_mask = tqmx86_init_irq_valid_mask;
++
++		irq_domain_set_pm_device(girq->domain, dev);
+ 	}
+ 
+ 	ret = devm_gpiochip_add_data(dev, chip, gpio);
+@@ -315,8 +317,6 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
+ 		goto out_pm_dis;
+ 	}
+ 
+-	irq_domain_set_pm_device(girq->domain, dev);
+-
+ 	dev_info(dev, "GPIO functionality initialized with %d pins\n",
+ 		 chip->ngpio);
+ 
+diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
+index b26e643383762..21fee9ed7f0d2 100644
+--- a/drivers/gpio/gpiolib-cdev.c
++++ b/drivers/gpio/gpiolib-cdev.c
+@@ -1975,7 +1975,6 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
+ 		ret = -ENODEV;
+ 		goto out_free_le;
+ 	}
+-	le->irq = irq;
+ 
+ 	if (eflags & GPIOEVENT_REQUEST_RISING_EDGE)
+ 		irqflags |= test_bit(FLAG_ACTIVE_LOW, &desc->flags) ?
+@@ -1989,7 +1988,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
+ 	init_waitqueue_head(&le->wait);
+ 
+ 	/* Request a thread to read the events */
+-	ret = request_threaded_irq(le->irq,
++	ret = request_threaded_irq(irq,
+ 				   lineevent_irq_handler,
+ 				   lineevent_irq_thread,
+ 				   irqflags,
+@@ -1998,6 +1997,8 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
+ 	if (ret)
+ 		goto out_free_le;
+ 
++	le->irq = irq;
++
+ 	fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC);
+ 	if (fd < 0) {
+ 		ret = fd;
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+index 4dfd6724b3caa..0a8c15c3a04c3 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+@@ -35,6 +35,8 @@
+ #include <linux/pci.h>
+ #include <linux/pm_runtime.h>
+ #include <drm/drm_crtc_helper.h>
++#include <drm/drm_damage_helper.h>
++#include <drm/drm_drv.h>
+ #include <drm/drm_edid.h>
+ #include <drm/drm_gem_framebuffer_helper.h>
+ #include <drm/drm_fb_helper.h>
+@@ -495,6 +497,12 @@ static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
+ 	.create_handle = drm_gem_fb_create_handle,
+ };
+ 
++static const struct drm_framebuffer_funcs amdgpu_fb_funcs_atomic = {
++	.destroy = drm_gem_fb_destroy,
++	.create_handle = drm_gem_fb_create_handle,
++	.dirty = drm_atomic_helper_dirtyfb,
++};
++
+ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
+ 					  uint64_t bo_flags)
+ {
+@@ -1069,7 +1077,10 @@ static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev,
+ 	if (ret)
+ 		goto err;
+ 
+-	ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
++	if (drm_drv_uses_atomic_modeset(dev))
++		ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs_atomic);
++	else
++		ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+ 	if (ret)
+ 		goto err;
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+index b19bf0c3f3737..79ce654bd3dad 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+@@ -748,7 +748,7 @@ static int psp_tmr_init(struct psp_context *psp)
+ 	}
+ 
+ 	pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+-	ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE(psp->adev),
++	ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT,
+ 				      AMDGPU_GEM_DOMAIN_VRAM,
+ 				      &psp->tmr_bo, &psp->tmr_mc_addr, pptr);
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+index e431f49949319..cd366c7f311fd 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+@@ -36,6 +36,7 @@
+ #define PSP_CMD_BUFFER_SIZE	0x1000
+ #define PSP_1_MEG		0x100000
+ #define PSP_TMR_SIZE(adev)	((adev)->asic_type == CHIP_ALDEBARAN ? 0x800000 : 0x400000)
++#define PSP_TMR_ALIGNMENT	0x100000
+ #define PSP_FW_NAME_LEN		0x24
+ 
+ enum psp_shared_mem_size {
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+index dac202ae864dd..9193ca5d6fe7a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+@@ -1805,7 +1805,8 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
+ 		amdgpu_ras_query_error_status(adev, &info);
+ 
+ 		if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
+-		    adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
++		    adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4) &&
++		    adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 0)) {
+ 			if (amdgpu_ras_reset_error_status(adev, info.head.block))
+ 				dev_warn(adev->dev, "Failed to reset error counter and error status");
+ 		}
+diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
+index cdc0c97798483..6c1fd471a4c7d 100644
+--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
++++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c
+@@ -28,6 +28,14 @@
+ #include "nbio/nbio_7_7_0_sh_mask.h"
+ #include <uapi/linux/kfd_ioctl.h>
+ 
++static void nbio_v7_7_remap_hdp_registers(struct amdgpu_device *adev)
++{
++	WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL,
++		     adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL);
++	WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL,
++		     adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL);
++}
++
+ static u32 nbio_v7_7_get_rev_id(struct amdgpu_device *adev)
+ {
+ 	u32 tmp;
+@@ -237,4 +245,5 @@ const struct amdgpu_nbio_funcs nbio_v7_7_funcs = {
+ 	.ih_doorbell_range = nbio_v7_7_ih_doorbell_range,
+ 	.ih_control = nbio_v7_7_ih_control,
+ 	.init_registers = nbio_v7_7_init_registers,
++	.remap_hdp_registers = nbio_v7_7_remap_hdp_registers,
+ };
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+index f47d82da115c9..42a567e71439b 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+@@ -6651,8 +6651,7 @@ static double CalculateUrgentLatency(
+ 	return ret;
+ }
+ 
+-
+-static void UseMinimumDCFCLK(
++static noinline_for_stack void UseMinimumDCFCLK(
+ 		struct display_mode_lib *mode_lib,
+ 		int MaxInterDCNTileRepeaters,
+ 		int MaxPrefetchMode,
+diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+index e4b9fd31223c9..40a672236198e 100644
+--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
++++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+@@ -261,33 +261,13 @@ static void CalculateRowBandwidth(
+ 
+ static void CalculateFlipSchedule(
+ 		struct display_mode_lib *mode_lib,
++		unsigned int k,
+ 		double HostVMInefficiencyFactor,
+ 		double UrgentExtraLatency,
+ 		double UrgentLatency,
+-		unsigned int GPUVMMaxPageTableLevels,
+-		bool HostVMEnable,
+-		unsigned int HostVMMaxNonCachedPageTableLevels,
+-		bool GPUVMEnable,
+-		double HostVMMinPageSize,
+ 		double PDEAndMetaPTEBytesPerFrame,
+ 		double MetaRowBytes,
+-		double DPTEBytesPerRow,
+-		double BandwidthAvailableForImmediateFlip,
+-		unsigned int TotImmediateFlipBytes,
+-		enum source_format_class SourcePixelFormat,
+-		double LineTime,
+-		double VRatio,
+-		double VRatioChroma,
+-		double Tno_bw,
+-		bool DCCEnable,
+-		unsigned int dpte_row_height,
+-		unsigned int meta_row_height,
+-		unsigned int dpte_row_height_chroma,
+-		unsigned int meta_row_height_chroma,
+-		double *DestinationLinesToRequestVMInImmediateFlip,
+-		double *DestinationLinesToRequestRowInImmediateFlip,
+-		double *final_flip_bw,
+-		bool *ImmediateFlipSupportedForPipe);
++		double DPTEBytesPerRow);
+ static double CalculateWriteBackDelay(
+ 		enum source_format_class WritebackPixelFormat,
+ 		double WritebackHRatio,
+@@ -321,64 +301,28 @@ static void CalculateVupdateAndDynamicMetadataParameters(
+ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ 		struct display_mode_lib *mode_lib,
+ 		unsigned int PrefetchMode,
+-		unsigned int NumberOfActivePlanes,
+-		unsigned int MaxLineBufferLines,
+-		unsigned int LineBufferSize,
+-		unsigned int WritebackInterfaceBufferSize,
+ 		double DCFCLK,
+ 		double ReturnBW,
+-		bool SynchronizedVBlank,
+-		unsigned int dpte_group_bytes[],
+-		unsigned int MetaChunkSize,
+ 		double UrgentLatency,
+ 		double ExtraLatency,
+-		double WritebackLatency,
+-		double WritebackChunkSize,
+ 		double SOCCLK,
+-		double DRAMClockChangeLatency,
+-		double SRExitTime,
+-		double SREnterPlusExitTime,
+-		double SRExitZ8Time,
+-		double SREnterPlusExitZ8Time,
+ 		double DCFCLKDeepSleep,
+ 		unsigned int DETBufferSizeY[],
+ 		unsigned int DETBufferSizeC[],
+ 		unsigned int SwathHeightY[],
+ 		unsigned int SwathHeightC[],
+-		unsigned int LBBitPerPixel[],
+ 		double SwathWidthY[],
+ 		double SwathWidthC[],
+-		double HRatio[],
+-		double HRatioChroma[],
+-		unsigned int vtaps[],
+-		unsigned int VTAPsChroma[],
+-		double VRatio[],
+-		double VRatioChroma[],
+-		unsigned int HTotal[],
+-		double PixelClock[],
+-		unsigned int BlendingAndTiming[],
+ 		unsigned int DPPPerPlane[],
+ 		double BytePerPixelDETY[],
+ 		double BytePerPixelDETC[],
+-		double DSTXAfterScaler[],
+-		double DSTYAfterScaler[],
+-		bool WritebackEnable[],
+-		enum source_format_class WritebackPixelFormat[],
+-		double WritebackDestinationWidth[],
+-		double WritebackDestinationHeight[],
+-		double WritebackSourceHeight[],
+ 		bool UnboundedRequestEnabled,
+ 		int unsigned CompressedBufferSizeInkByte,
+ 		enum clock_change_support *DRAMClockChangeSupport,
+-		double *UrgentWatermark,
+-		double *WritebackUrgentWatermark,
+-		double *DRAMClockChangeWatermark,
+-		double *WritebackDRAMClockChangeWatermark,
+ 		double *StutterExitWatermark,
+ 		double *StutterEnterPlusExitWatermark,
+ 		double *Z8StutterExitWatermark,
+-		double *Z8StutterEnterPlusExitWatermark,
+-		double *MinActiveDRAMClockChangeLatencySupported);
++		double *Z8StutterEnterPlusExitWatermark);
+ 
+ static void CalculateDCFCLKDeepSleep(
+ 		struct display_mode_lib *mode_lib,
+@@ -2914,33 +2858,13 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
+ 			for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ 				CalculateFlipSchedule(
+ 						mode_lib,
++						k,
+ 						HostVMInefficiencyFactor,
+ 						v->UrgentExtraLatency,
+ 						v->UrgentLatency,
+-						v->GPUVMMaxPageTableLevels,
+-						v->HostVMEnable,
+-						v->HostVMMaxNonCachedPageTableLevels,
+-						v->GPUVMEnable,
+-						v->HostVMMinPageSize,
+ 						v->PDEAndMetaPTEBytesFrame[k],
+ 						v->MetaRowByte[k],
+-						v->PixelPTEBytesPerRow[k],
+-						v->BandwidthAvailableForImmediateFlip,
+-						v->TotImmediateFlipBytes,
+-						v->SourcePixelFormat[k],
+-						v->HTotal[k] / v->PixelClock[k],
+-						v->VRatio[k],
+-						v->VRatioChroma[k],
+-						v->Tno_bw[k],
+-						v->DCCEnable[k],
+-						v->dpte_row_height[k],
+-						v->meta_row_height[k],
+-						v->dpte_row_height_chroma[k],
+-						v->meta_row_height_chroma[k],
+-						&v->DestinationLinesToRequestVMInImmediateFlip[k],
+-						&v->DestinationLinesToRequestRowInImmediateFlip[k],
+-						&v->final_flip_bw[k],
+-						&v->ImmediateFlipSupportedForPipe[k]);
++						v->PixelPTEBytesPerRow[k]);
+ 			}
+ 
+ 			v->total_dcn_read_bw_with_flip = 0.0;
+@@ -3027,64 +2951,28 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
+ 		CalculateWatermarksAndDRAMSpeedChangeSupport(
+ 				mode_lib,
+ 				PrefetchMode,
+-				v->NumberOfActivePlanes,
+-				v->MaxLineBufferLines,
+-				v->LineBufferSize,
+-				v->WritebackInterfaceBufferSize,
+ 				v->DCFCLK,
+ 				v->ReturnBW,
+-				v->SynchronizedVBlank,
+-				v->dpte_group_bytes,
+-				v->MetaChunkSize,
+ 				v->UrgentLatency,
+ 				v->UrgentExtraLatency,
+-				v->WritebackLatency,
+-				v->WritebackChunkSize,
+ 				v->SOCCLK,
+-				v->DRAMClockChangeLatency,
+-				v->SRExitTime,
+-				v->SREnterPlusExitTime,
+-				v->SRExitZ8Time,
+-				v->SREnterPlusExitZ8Time,
+ 				v->DCFCLKDeepSleep,
+ 				v->DETBufferSizeY,
+ 				v->DETBufferSizeC,
+ 				v->SwathHeightY,
+ 				v->SwathHeightC,
+-				v->LBBitPerPixel,
+ 				v->SwathWidthY,
+ 				v->SwathWidthC,
+-				v->HRatio,
+-				v->HRatioChroma,
+-				v->vtaps,
+-				v->VTAPsChroma,
+-				v->VRatio,
+-				v->VRatioChroma,
+-				v->HTotal,
+-				v->PixelClock,
+-				v->BlendingAndTiming,
+ 				v->DPPPerPlane,
+ 				v->BytePerPixelDETY,
+ 				v->BytePerPixelDETC,
+-				v->DSTXAfterScaler,
+-				v->DSTYAfterScaler,
+-				v->WritebackEnable,
+-				v->WritebackPixelFormat,
+-				v->WritebackDestinationWidth,
+-				v->WritebackDestinationHeight,
+-				v->WritebackSourceHeight,
+ 				v->UnboundedRequestEnabled,
+ 				v->CompressedBufferSizeInkByte,
+ 				&DRAMClockChangeSupport,
+-				&v->UrgentWatermark,
+-				&v->WritebackUrgentWatermark,
+-				&v->DRAMClockChangeWatermark,
+-				&v->WritebackDRAMClockChangeWatermark,
+ 				&v->StutterExitWatermark,
+ 				&v->StutterEnterPlusExitWatermark,
+ 				&v->Z8StutterExitWatermark,
+-				&v->Z8StutterEnterPlusExitWatermark,
+-				&v->MinActiveDRAMClockChangeLatencySupported);
++				&v->Z8StutterEnterPlusExitWatermark);
+ 
+ 		for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ 			if (v->WritebackEnable[k] == true) {
+@@ -3696,61 +3584,43 @@ static void CalculateRowBandwidth(
+ 
+ static void CalculateFlipSchedule(
+ 		struct display_mode_lib *mode_lib,
++		unsigned int k,
+ 		double HostVMInefficiencyFactor,
+ 		double UrgentExtraLatency,
+ 		double UrgentLatency,
+-		unsigned int GPUVMMaxPageTableLevels,
+-		bool HostVMEnable,
+-		unsigned int HostVMMaxNonCachedPageTableLevels,
+-		bool GPUVMEnable,
+-		double HostVMMinPageSize,
+ 		double PDEAndMetaPTEBytesPerFrame,
+ 		double MetaRowBytes,
+-		double DPTEBytesPerRow,
+-		double BandwidthAvailableForImmediateFlip,
+-		unsigned int TotImmediateFlipBytes,
+-		enum source_format_class SourcePixelFormat,
+-		double LineTime,
+-		double VRatio,
+-		double VRatioChroma,
+-		double Tno_bw,
+-		bool DCCEnable,
+-		unsigned int dpte_row_height,
+-		unsigned int meta_row_height,
+-		unsigned int dpte_row_height_chroma,
+-		unsigned int meta_row_height_chroma,
+-		double *DestinationLinesToRequestVMInImmediateFlip,
+-		double *DestinationLinesToRequestRowInImmediateFlip,
+-		double *final_flip_bw,
+-		bool *ImmediateFlipSupportedForPipe)
++		double DPTEBytesPerRow)
+ {
++	struct vba_vars_st *v = &mode_lib->vba;
+ 	double min_row_time = 0.0;
+ 	unsigned int HostVMDynamicLevelsTrips;
+ 	double TimeForFetchingMetaPTEImmediateFlip;
+ 	double TimeForFetchingRowInVBlankImmediateFlip;
+ 	double ImmediateFlipBW;
++	double LineTime = v->HTotal[k] / v->PixelClock[k];
+ 
+-	if (GPUVMEnable == true && HostVMEnable == true) {
+-		HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
++	if (v->GPUVMEnable == true && v->HostVMEnable == true) {
++		HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
+ 	} else {
+ 		HostVMDynamicLevelsTrips = 0;
+ 	}
+ 
+-	if (GPUVMEnable == true || DCCEnable == true) {
+-		ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
++	if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
++		ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
+ 	}
+ 
+-	if (GPUVMEnable == true) {
++	if (v->GPUVMEnable == true) {
+ 		TimeForFetchingMetaPTEImmediateFlip = dml_max3(
+-				Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
+-				UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
++				v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
++				UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
+ 				LineTime / 4.0);
+ 	} else {
+ 		TimeForFetchingMetaPTEImmediateFlip = 0;
+ 	}
+ 
+-	*DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
+-	if ((GPUVMEnable == true || DCCEnable == true)) {
++	v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
++	if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
+ 		TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
+ 				(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
+ 				UrgentLatency * (HostVMDynamicLevelsTrips + 1),
+@@ -3759,54 +3629,54 @@ static void CalculateFlipSchedule(
+ 		TimeForFetchingRowInVBlankImmediateFlip = 0;
+ 	}
+ 
+-	*DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
++	v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
+ 
+-	if (GPUVMEnable == true) {
+-		*final_flip_bw = dml_max(
+-				PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
+-				(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
+-	} else if ((GPUVMEnable == true || DCCEnable == true)) {
+-		*final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
++	if (v->GPUVMEnable == true) {
++		v->final_flip_bw[k] = dml_max(
++				PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
++				(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
++	} else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
++		v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
+ 	} else {
+-		*final_flip_bw = 0;
++		v->final_flip_bw[k] = 0;
+ 	}
+ 
+-	if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
+-		if (GPUVMEnable == true && DCCEnable != true) {
+-			min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
+-		} else if (GPUVMEnable != true && DCCEnable == true) {
+-			min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
++	if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
++		if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
++			min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
++		} else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
++			min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
+ 		} else {
+ 			min_row_time = dml_min4(
+-					dpte_row_height * LineTime / VRatio,
+-					meta_row_height * LineTime / VRatio,
+-					dpte_row_height_chroma * LineTime / VRatioChroma,
+-					meta_row_height_chroma * LineTime / VRatioChroma);
++					v->dpte_row_height[k] * LineTime / v->VRatio[k],
++					v->meta_row_height[k] * LineTime / v->VRatio[k],
++					v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
++					v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
+ 		}
+ 	} else {
+-		if (GPUVMEnable == true && DCCEnable != true) {
+-			min_row_time = dpte_row_height * LineTime / VRatio;
+-		} else if (GPUVMEnable != true && DCCEnable == true) {
+-			min_row_time = meta_row_height * LineTime / VRatio;
++		if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
++			min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
++		} else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
++			min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
+ 		} else {
+-			min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
++			min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
+ 		}
+ 	}
+ 
+-	if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
++	if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
+ 			|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
+-		*ImmediateFlipSupportedForPipe = false;
++		v->ImmediateFlipSupportedForPipe[k] = false;
+ 	} else {
+-		*ImmediateFlipSupportedForPipe = true;
++		v->ImmediateFlipSupportedForPipe[k] = true;
+ 	}
+ 
+ #ifdef __DML_VBA_DEBUG__
+-	dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip);
+-	dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip);
++	dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
++	dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
+ 	dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
+ 	dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
+ 	dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
+-	dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
++	dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
+ #endif
+ 
+ }
+@@ -5397,33 +5267,13 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
+ 					for (k = 0; k < v->NumberOfActivePlanes; k++) {
+ 						CalculateFlipSchedule(
+ 								mode_lib,
++								k,
+ 								HostVMInefficiencyFactor,
+ 								v->ExtraLatency,
+ 								v->UrgLatency[i],
+-								v->GPUVMMaxPageTableLevels,
+-								v->HostVMEnable,
+-								v->HostVMMaxNonCachedPageTableLevels,
+-								v->GPUVMEnable,
+-								v->HostVMMinPageSize,
+ 								v->PDEAndMetaPTEBytesPerFrame[i][j][k],
+ 								v->MetaRowBytes[i][j][k],
+-								v->DPTEBytesPerRow[i][j][k],
+-								v->BandwidthAvailableForImmediateFlip,
+-								v->TotImmediateFlipBytes,
+-								v->SourcePixelFormat[k],
+-								v->HTotal[k] / v->PixelClock[k],
+-								v->VRatio[k],
+-								v->VRatioChroma[k],
+-								v->Tno_bw[k],
+-								v->DCCEnable[k],
+-								v->dpte_row_height[k],
+-								v->meta_row_height[k],
+-								v->dpte_row_height_chroma[k],
+-								v->meta_row_height_chroma[k],
+-								&v->DestinationLinesToRequestVMInImmediateFlip[k],
+-								&v->DestinationLinesToRequestRowInImmediateFlip[k],
+-								&v->final_flip_bw[k],
+-								&v->ImmediateFlipSupportedForPipe[k]);
++								v->DPTEBytesPerRow[i][j][k]);
+ 					}
+ 					v->total_dcn_read_bw_with_flip = 0.0;
+ 					for (k = 0; k < v->NumberOfActivePlanes; k++) {
+@@ -5481,64 +5331,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
+ 			CalculateWatermarksAndDRAMSpeedChangeSupport(
+ 					mode_lib,
+ 					v->PrefetchModePerState[i][j],
+-					v->NumberOfActivePlanes,
+-					v->MaxLineBufferLines,
+-					v->LineBufferSize,
+-					v->WritebackInterfaceBufferSize,
+ 					v->DCFCLKState[i][j],
+ 					v->ReturnBWPerState[i][j],
+-					v->SynchronizedVBlank,
+-					v->dpte_group_bytes,
+-					v->MetaChunkSize,
+ 					v->UrgLatency[i],
+ 					v->ExtraLatency,
+-					v->WritebackLatency,
+-					v->WritebackChunkSize,
+ 					v->SOCCLKPerState[i],
+-					v->DRAMClockChangeLatency,
+-					v->SRExitTime,
+-					v->SREnterPlusExitTime,
+-					v->SRExitZ8Time,
+-					v->SREnterPlusExitZ8Time,
+ 					v->ProjectedDCFCLKDeepSleep[i][j],
+ 					v->DETBufferSizeYThisState,
+ 					v->DETBufferSizeCThisState,
+ 					v->SwathHeightYThisState,
+ 					v->SwathHeightCThisState,
+-					v->LBBitPerPixel,
+ 					v->SwathWidthYThisState,
+ 					v->SwathWidthCThisState,
+-					v->HRatio,
+-					v->HRatioChroma,
+-					v->vtaps,
+-					v->VTAPsChroma,
+-					v->VRatio,
+-					v->VRatioChroma,
+-					v->HTotal,
+-					v->PixelClock,
+-					v->BlendingAndTiming,
+ 					v->NoOfDPPThisState,
+ 					v->BytePerPixelInDETY,
+ 					v->BytePerPixelInDETC,
+-					v->DSTXAfterScaler,
+-					v->DSTYAfterScaler,
+-					v->WritebackEnable,
+-					v->WritebackPixelFormat,
+-					v->WritebackDestinationWidth,
+-					v->WritebackDestinationHeight,
+-					v->WritebackSourceHeight,
+ 					UnboundedRequestEnabledThisState,
+ 					CompressedBufferSizeInkByteThisState,
+ 					&v->DRAMClockChangeSupport[i][j],
+-					&v->UrgentWatermark,
+-					&v->WritebackUrgentWatermark,
+-					&v->DRAMClockChangeWatermark,
+-					&v->WritebackDRAMClockChangeWatermark,
+-					&dummy,
+ 					&dummy,
+ 					&dummy,
+ 					&dummy,
+-					&v->MinActiveDRAMClockChangeLatencySupported);
++					&dummy);
+ 		}
+ 	}
+ 
+@@ -5663,64 +5477,28 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
+ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ 		struct display_mode_lib *mode_lib,
+ 		unsigned int PrefetchMode,
+-		unsigned int NumberOfActivePlanes,
+-		unsigned int MaxLineBufferLines,
+-		unsigned int LineBufferSize,
+-		unsigned int WritebackInterfaceBufferSize,
+ 		double DCFCLK,
+ 		double ReturnBW,
+-		bool SynchronizedVBlank,
+-		unsigned int dpte_group_bytes[],
+-		unsigned int MetaChunkSize,
+ 		double UrgentLatency,
+ 		double ExtraLatency,
+-		double WritebackLatency,
+-		double WritebackChunkSize,
+ 		double SOCCLK,
+-		double DRAMClockChangeLatency,
+-		double SRExitTime,
+-		double SREnterPlusExitTime,
+-		double SRExitZ8Time,
+-		double SREnterPlusExitZ8Time,
+ 		double DCFCLKDeepSleep,
+ 		unsigned int DETBufferSizeY[],
+ 		unsigned int DETBufferSizeC[],
+ 		unsigned int SwathHeightY[],
+ 		unsigned int SwathHeightC[],
+-		unsigned int LBBitPerPixel[],
+ 		double SwathWidthY[],
+ 		double SwathWidthC[],
+-		double HRatio[],
+-		double HRatioChroma[],
+-		unsigned int vtaps[],
+-		unsigned int VTAPsChroma[],
+-		double VRatio[],
+-		double VRatioChroma[],
+-		unsigned int HTotal[],
+-		double PixelClock[],
+-		unsigned int BlendingAndTiming[],
+ 		unsigned int DPPPerPlane[],
+ 		double BytePerPixelDETY[],
+ 		double BytePerPixelDETC[],
+-		double DSTXAfterScaler[],
+-		double DSTYAfterScaler[],
+-		bool WritebackEnable[],
+-		enum source_format_class WritebackPixelFormat[],
+-		double WritebackDestinationWidth[],
+-		double WritebackDestinationHeight[],
+-		double WritebackSourceHeight[],
+ 		bool UnboundedRequestEnabled,
+ 		int unsigned CompressedBufferSizeInkByte,
+ 		enum clock_change_support *DRAMClockChangeSupport,
+-		double *UrgentWatermark,
+-		double *WritebackUrgentWatermark,
+-		double *DRAMClockChangeWatermark,
+-		double *WritebackDRAMClockChangeWatermark,
+ 		double *StutterExitWatermark,
+ 		double *StutterEnterPlusExitWatermark,
+ 		double *Z8StutterExitWatermark,
+-		double *Z8StutterEnterPlusExitWatermark,
+-		double *MinActiveDRAMClockChangeLatencySupported)
++		double *Z8StutterEnterPlusExitWatermark)
+ {
+ 	struct vba_vars_st *v = &mode_lib->vba;
+ 	double EffectiveLBLatencyHidingY;
+@@ -5740,103 +5518,103 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ 	double TotalPixelBW = 0.0;
+ 	int k, j;
+ 
+-	*UrgentWatermark = UrgentLatency + ExtraLatency;
++	v->UrgentWatermark = UrgentLatency + ExtraLatency;
+ 
+ #ifdef __DML_VBA_DEBUG__
+ 	dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
+ 	dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
+-	dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark);
++	dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
+ #endif
+ 
+-	*DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark;
++	v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
+ 
+ #ifdef __DML_VBA_DEBUG__
+-	dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency);
+-	dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark);
++	dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
++	dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
+ #endif
+ 
+ 	v->TotalActiveWriteback = 0;
+-	for (k = 0; k < NumberOfActivePlanes; ++k) {
+-		if (WritebackEnable[k] == true) {
++	for (k = 0; k < v->NumberOfActivePlanes; ++k) {
++		if (v->WritebackEnable[k] == true) {
+ 			v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
+ 		}
+ 	}
+ 
+ 	if (v->TotalActiveWriteback <= 1) {
+-		*WritebackUrgentWatermark = WritebackLatency;
++		v->WritebackUrgentWatermark = v->WritebackLatency;
+ 	} else {
+-		*WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
++		v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ 	}
+ 
+ 	if (v->TotalActiveWriteback <= 1) {
+-		*WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency;
++		v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
+ 	} else {
+-		*WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
++		v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
+ 	}
+ 
+-	for (k = 0; k < NumberOfActivePlanes; ++k) {
++	for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ 		TotalPixelBW = TotalPixelBW
+-				+ DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k])
+-						/ (HTotal[k] / PixelClock[k]);
++				+ DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
++						/ (v->HTotal[k] / v->PixelClock[k]);
+ 	}
+ 
+-	for (k = 0; k < NumberOfActivePlanes; ++k) {
++	for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ 		double EffectiveDETBufferSizeY = DETBufferSizeY[k];
+ 
+ 		v->LBLatencyHidingSourceLinesY = dml_min(
+-				(double) MaxLineBufferLines,
+-				dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1);
++				(double) v->MaxLineBufferLines,
++				dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
+ 
+ 		v->LBLatencyHidingSourceLinesC = dml_min(
+-				(double) MaxLineBufferLines,
+-				dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1);
++				(double) v->MaxLineBufferLines,
++				dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
+ 
+-		EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]);
++		EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
+ 
+-		EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
++		EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
+ 
+ 		if (UnboundedRequestEnabled) {
+ 			EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
+-					+ CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW;
++					+ CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
+ 		}
+ 
+ 		LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
+ 		LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
+-		FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
++		FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
+ 		if (BytePerPixelDETC[k] > 0) {
+ 			LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
+ 			LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
+-			FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k];
++			FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
+ 		} else {
+ 			LinesInDETC = 0;
+ 			FullDETBufferingTimeC = 999999;
+ 		}
+ 
+ 		ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
+-				- ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
++				- ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
+ 
+-		if (NumberOfActivePlanes > 1) {
++		if (v->NumberOfActivePlanes > 1) {
+ 			ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
+-					- (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k];
++					- (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
+ 		}
+ 
+ 		if (BytePerPixelDETC[k] > 0) {
+ 			ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
+-					- ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark;
++					- ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
+ 
+-			if (NumberOfActivePlanes > 1) {
++			if (v->NumberOfActivePlanes > 1) {
+ 				ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
+-						- (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k];
++						- (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
+ 			}
+ 			v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
+ 		} else {
+ 			v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
+ 		}
+ 
+-		if (WritebackEnable[k] == true) {
+-			WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024
+-					/ (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
+-			if (WritebackPixelFormat[k] == dm_444_64) {
++		if (v->WritebackEnable[k] == true) {
++			WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
++					/ (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
++			if (v->WritebackPixelFormat[k] == dm_444_64) {
+ 				WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
+ 			}
+ 			WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
+@@ -5846,14 +5624,14 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ 
+ 	v->MinActiveDRAMClockChangeMargin = 999999;
+ 	PlaneWithMinActiveDRAMClockChangeMargin = 0;
+-	for (k = 0; k < NumberOfActivePlanes; ++k) {
++	for (k = 0; k < v->NumberOfActivePlanes; ++k) {
+ 		if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
+ 			v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
+-			if (BlendingAndTiming[k] == k) {
++			if (v->BlendingAndTiming[k] == k) {
+ 				PlaneWithMinActiveDRAMClockChangeMargin = k;
+ 			} else {
+-				for (j = 0; j < NumberOfActivePlanes; ++j) {
+-					if (BlendingAndTiming[k] == j) {
++				for (j = 0; j < v->NumberOfActivePlanes; ++j) {
++					if (v->BlendingAndTiming[k] == j) {
+ 						PlaneWithMinActiveDRAMClockChangeMargin = j;
+ 					}
+ 				}
+@@ -5861,11 +5639,11 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ 		}
+ 	}
+ 
+-	*MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency;
++	v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
+ 
+ 	SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
+-	for (k = 0; k < NumberOfActivePlanes; ++k) {
+-		if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
++	for (k = 0; k < v->NumberOfActivePlanes; ++k) {
++		if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
+ 				&& v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
+ 			SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
+ 		}
+@@ -5873,25 +5651,25 @@ static void CalculateWatermarksAndDRAMSpeedChangeSupport(
+ 
+ 	v->TotalNumberOfActiveOTG = 0;
+ 
+-	for (k = 0; k < NumberOfActivePlanes; ++k) {
+-		if (BlendingAndTiming[k] == k) {
++	for (k = 0; k < v->NumberOfActivePlanes; ++k) {
++		if (v->BlendingAndTiming[k] == k) {
+ 			v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
+ 		}
+ 	}
+ 
+ 	if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
+ 		*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
+-	} else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
++	} else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
+ 			|| SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
+ 		*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
+ 	} else {
+ 		*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
+ 	}
+ 
+-	*StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
+-	*StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
+-	*Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+-	*Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
++	*StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
++	*StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
++	*Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
++	*Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
+ 
+ #ifdef __DML_VBA_DEBUG__
+ 	dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
+diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+index 64a38f08f4974..5a51be753e87f 100644
+--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
++++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+@@ -1603,6 +1603,7 @@ static void interpolate_user_regamma(uint32_t hw_points_num,
+ 	struct fixed31_32 lut2;
+ 	struct fixed31_32 delta_lut;
+ 	struct fixed31_32 delta_index;
++	const struct fixed31_32 one = dc_fixpt_from_int(1);
+ 
+ 	i = 0;
+ 	/* fixed_pt library has problems handling too small values */
+@@ -1631,6 +1632,9 @@ static void interpolate_user_regamma(uint32_t hw_points_num,
+ 			} else
+ 				hw_x = coordinates_x[i].x;
+ 
++			if (dc_fixpt_le(one, hw_x))
++				hw_x = one;
++
+ 			norm_x = dc_fixpt_mul(norm_factor, hw_x);
+ 			index = dc_fixpt_floor(norm_x);
+ 			if (index < 0 || index > 255)
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+index 32bb6b1d95261..d13e455c8827e 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+@@ -368,6 +368,17 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu)
+ 		smu_baco->platform_support =
+ 			(val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true :
+ 									false;
++
++		/*
++		 * Disable BACO entry/exit completely on below SKUs to
++		 * avoid hardware intermittent failures.
++		 */
++		if (((adev->pdev->device == 0x73A1) &&
++		    (adev->pdev->revision == 0x00)) ||
++		    ((adev->pdev->device == 0x73BF) &&
++		    (adev->pdev->revision == 0xCF)))
++			smu_baco->platform_support = false;
++
+ 	}
+ }
+ 
+diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c
+index dd32b484dd825..ce96234f3df20 100644
+--- a/drivers/gpu/drm/gma500/cdv_device.c
++++ b/drivers/gpu/drm/gma500/cdv_device.c
+@@ -581,11 +581,9 @@ static const struct psb_offset cdv_regmap[2] = {
+ static int cdv_chip_setup(struct drm_device *dev)
+ {
+ 	struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
+-	struct pci_dev *pdev = to_pci_dev(dev->dev);
+ 	INIT_WORK(&dev_priv->hotplug_work, cdv_hotplug_work_func);
+ 
+-	if (pci_enable_msi(pdev))
+-		dev_warn(dev->dev, "Enabling MSI failed!\n");
++	dev_priv->use_msi = true;
+ 	dev_priv->regmap = cdv_regmap;
+ 	gma_get_core_freq(dev);
+ 	psb_intel_opregion_init(dev);
+diff --git a/drivers/gpu/drm/gma500/gem.c b/drivers/gpu/drm/gma500/gem.c
+index dffe37490206d..4b7627a726378 100644
+--- a/drivers/gpu/drm/gma500/gem.c
++++ b/drivers/gpu/drm/gma500/gem.c
+@@ -112,12 +112,12 @@ static void psb_gem_free_object(struct drm_gem_object *obj)
+ {
+ 	struct psb_gem_object *pobj = to_psb_gem_object(obj);
+ 
+-	drm_gem_object_release(obj);
+-
+ 	/* Undo the mmap pin if we are destroying the object */
+ 	if (pobj->mmapping)
+ 		psb_gem_unpin(pobj);
+ 
++	drm_gem_object_release(obj);
++
+ 	WARN_ON(pobj->in_gart && !pobj->stolen);
+ 
+ 	release_resource(&pobj->resource);
+diff --git a/drivers/gpu/drm/gma500/gma_display.c b/drivers/gpu/drm/gma500/gma_display.c
+index 34ec3fca09ba6..12287c9bb4d80 100644
+--- a/drivers/gpu/drm/gma500/gma_display.c
++++ b/drivers/gpu/drm/gma500/gma_display.c
+@@ -531,15 +531,18 @@ int gma_crtc_page_flip(struct drm_crtc *crtc,
+ 		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+ 
+ 		gma_crtc->page_flip_event = event;
++		spin_unlock_irqrestore(&dev->event_lock, flags);
+ 
+ 		/* Call this locked if we want an event at vblank interrupt. */
+ 		ret = crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y, old_fb);
+ 		if (ret) {
+-			gma_crtc->page_flip_event = NULL;
+-			drm_crtc_vblank_put(crtc);
++			spin_lock_irqsave(&dev->event_lock, flags);
++			if (gma_crtc->page_flip_event) {
++				gma_crtc->page_flip_event = NULL;
++				drm_crtc_vblank_put(crtc);
++			}
++			spin_unlock_irqrestore(&dev->event_lock, flags);
+ 		}
+-
+-		spin_unlock_irqrestore(&dev->event_lock, flags);
+ 	} else {
+ 		ret = crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y, old_fb);
+ 	}
+diff --git a/drivers/gpu/drm/gma500/oaktrail_device.c b/drivers/gpu/drm/gma500/oaktrail_device.c
+index 5923a9c893122..f90e628cb482c 100644
+--- a/drivers/gpu/drm/gma500/oaktrail_device.c
++++ b/drivers/gpu/drm/gma500/oaktrail_device.c
+@@ -501,12 +501,9 @@ static const struct psb_offset oaktrail_regmap[2] = {
+ static int oaktrail_chip_setup(struct drm_device *dev)
+ {
+ 	struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
+-	struct pci_dev *pdev = to_pci_dev(dev->dev);
+ 	int ret;
+ 
+-	if (pci_enable_msi(pdev))
+-		dev_warn(dev->dev, "Enabling MSI failed!\n");
+-
++	dev_priv->use_msi = true;
+ 	dev_priv->regmap = oaktrail_regmap;
+ 
+ 	ret = mid_chip_setup(dev);
+diff --git a/drivers/gpu/drm/gma500/power.c b/drivers/gpu/drm/gma500/power.c
+index b91de6d36e412..66873085d4505 100644
+--- a/drivers/gpu/drm/gma500/power.c
++++ b/drivers/gpu/drm/gma500/power.c
+@@ -139,8 +139,6 @@ static void gma_suspend_pci(struct pci_dev *pdev)
+ 	dev_priv->regs.saveBSM = bsm;
+ 	pci_read_config_dword(pdev, 0xFC, &vbt);
+ 	dev_priv->regs.saveVBT = vbt;
+-	pci_read_config_dword(pdev, PSB_PCIx_MSI_ADDR_LOC, &dev_priv->msi_addr);
+-	pci_read_config_dword(pdev, PSB_PCIx_MSI_DATA_LOC, &dev_priv->msi_data);
+ 
+ 	pci_disable_device(pdev);
+ 	pci_set_power_state(pdev, PCI_D3hot);
+@@ -168,9 +166,6 @@ static bool gma_resume_pci(struct pci_dev *pdev)
+ 	pci_restore_state(pdev);
+ 	pci_write_config_dword(pdev, 0x5c, dev_priv->regs.saveBSM);
+ 	pci_write_config_dword(pdev, 0xFC, dev_priv->regs.saveVBT);
+-	/* restoring MSI address and data in PCIx space */
+-	pci_write_config_dword(pdev, PSB_PCIx_MSI_ADDR_LOC, dev_priv->msi_addr);
+-	pci_write_config_dword(pdev, PSB_PCIx_MSI_DATA_LOC, dev_priv->msi_data);
+ 	ret = pci_enable_device(pdev);
+ 
+ 	if (ret != 0)
+@@ -223,8 +218,7 @@ int gma_power_resume(struct device *_dev)
+ 	mutex_lock(&power_mutex);
+ 	gma_resume_pci(pdev);
+ 	gma_resume_display(pdev);
+-	gma_irq_preinstall(dev);
+-	gma_irq_postinstall(dev);
++	gma_irq_install(dev);
+ 	mutex_unlock(&power_mutex);
+ 	return 0;
+ }
+diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
+index 1d8744f3e7020..54e756b486060 100644
+--- a/drivers/gpu/drm/gma500/psb_drv.c
++++ b/drivers/gpu/drm/gma500/psb_drv.c
+@@ -383,7 +383,7 @@ static int psb_driver_load(struct drm_device *dev, unsigned long flags)
+ 	PSB_WVDC32(0xFFFFFFFF, PSB_INT_MASK_R);
+ 	spin_unlock_irqrestore(&dev_priv->irqmask_lock, irqflags);
+ 
+-	gma_irq_install(dev, pdev->irq);
++	gma_irq_install(dev);
+ 
+ 	dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
+ 
+diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h
+index 0ddfec1a0851d..4c3fc5eaf6ad5 100644
+--- a/drivers/gpu/drm/gma500/psb_drv.h
++++ b/drivers/gpu/drm/gma500/psb_drv.h
+@@ -490,6 +490,7 @@ struct drm_psb_private {
+ 	int rpm_enabled;
+ 
+ 	/* MID specific */
++	bool use_msi;
+ 	bool has_gct;
+ 	struct oaktrail_gct_data gct_data;
+ 
+@@ -499,10 +500,6 @@ struct drm_psb_private {
+ 	/* Register state */
+ 	struct psb_save_area regs;
+ 
+-	/* MSI reg save */
+-	uint32_t msi_addr;
+-	uint32_t msi_data;
+-
+ 	/* Hotplug handling */
+ 	struct work_struct hotplug_work;
+ 
+diff --git a/drivers/gpu/drm/gma500/psb_irq.c b/drivers/gpu/drm/gma500/psb_irq.c
+index e6e6d61bbeab6..038f18ed0a95e 100644
+--- a/drivers/gpu/drm/gma500/psb_irq.c
++++ b/drivers/gpu/drm/gma500/psb_irq.c
+@@ -316,17 +316,24 @@ void gma_irq_postinstall(struct drm_device *dev)
+ 	spin_unlock_irqrestore(&dev_priv->irqmask_lock, irqflags);
+ }
+ 
+-int gma_irq_install(struct drm_device *dev, unsigned int irq)
++int gma_irq_install(struct drm_device *dev)
+ {
++	struct drm_psb_private *dev_priv = to_drm_psb_private(dev);
++	struct pci_dev *pdev = to_pci_dev(dev->dev);
+ 	int ret;
+ 
+-	if (irq == IRQ_NOTCONNECTED)
++	if (dev_priv->use_msi && pci_enable_msi(pdev)) {
++		dev_warn(dev->dev, "Enabling MSI failed!\n");
++		dev_priv->use_msi = false;
++	}
++
++	if (pdev->irq == IRQ_NOTCONNECTED)
+ 		return -ENOTCONN;
+ 
+ 	gma_irq_preinstall(dev);
+ 
+ 	/* PCI devices require shared interrupts. */
+-	ret = request_irq(irq, gma_irq_handler, IRQF_SHARED, dev->driver->name, dev);
++	ret = request_irq(pdev->irq, gma_irq_handler, IRQF_SHARED, dev->driver->name, dev);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -369,6 +376,8 @@ void gma_irq_uninstall(struct drm_device *dev)
+ 	spin_unlock_irqrestore(&dev_priv->irqmask_lock, irqflags);
+ 
+ 	free_irq(pdev->irq, dev);
++	if (dev_priv->use_msi)
++		pci_disable_msi(pdev);
+ }
+ 
+ int gma_crtc_enable_vblank(struct drm_crtc *crtc)
+diff --git a/drivers/gpu/drm/gma500/psb_irq.h b/drivers/gpu/drm/gma500/psb_irq.h
+index b51e395194fff..7648f69824a5d 100644
+--- a/drivers/gpu/drm/gma500/psb_irq.h
++++ b/drivers/gpu/drm/gma500/psb_irq.h
+@@ -17,7 +17,7 @@ struct drm_device;
+ 
+ void gma_irq_preinstall(struct drm_device *dev);
+ void gma_irq_postinstall(struct drm_device *dev);
+-int  gma_irq_install(struct drm_device *dev, unsigned int irq);
++int  gma_irq_install(struct drm_device *dev);
+ void gma_irq_uninstall(struct drm_device *dev);
+ 
+ int  gma_crtc_enable_vblank(struct drm_crtc *crtc);
+diff --git a/drivers/gpu/drm/hisilicon/hibmc/Kconfig b/drivers/gpu/drm/hisilicon/hibmc/Kconfig
+index 073adfe438ddd..4e41c144a2902 100644
+--- a/drivers/gpu/drm/hisilicon/hibmc/Kconfig
++++ b/drivers/gpu/drm/hisilicon/hibmc/Kconfig
+@@ -2,6 +2,7 @@
+ config DRM_HISI_HIBMC
+ 	tristate "DRM Support for Hisilicon Hibmc"
+ 	depends on DRM && PCI && (ARM64 || COMPILE_TEST)
++	depends on MMU
+ 	select DRM_KMS_HELPER
+ 	select DRM_VRAM_HELPER
+ 	select DRM_TTM
+diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c
+index 5a957acebfd62..82ad8fe7440c0 100644
+--- a/drivers/gpu/drm/i915/display/g4x_dp.c
++++ b/drivers/gpu/drm/i915/display/g4x_dp.c
+@@ -395,26 +395,8 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
+ 		intel_dotclock_calculate(pipe_config->port_clock,
+ 					 &pipe_config->dp_m_n);
+ 
+-	if (intel_dp_is_edp(intel_dp) && dev_priv->vbt.edp.bpp &&
+-	    pipe_config->pipe_bpp > dev_priv->vbt.edp.bpp) {
+-		/*
+-		 * This is a big fat ugly hack.
+-		 *
+-		 * Some machines in UEFI boot mode provide us a VBT that has 18
+-		 * bpp and 1.62 GHz link bandwidth for eDP, which for reasons
+-		 * unknown we fail to light up. Yet the same BIOS boots up with
+-		 * 24 bpp and 2.7 GHz link. Use the same bpp as the BIOS uses as
+-		 * max, not what it tells us to use.
+-		 *
+-		 * Note: This will still be broken if the eDP panel is not lit
+-		 * up by the BIOS, and thus we can't get the mode at module
+-		 * load.
+-		 */
+-		drm_dbg_kms(&dev_priv->drm,
+-			    "pipe has %d bpp for eDP panel, overriding BIOS-provided max %d bpp\n",
+-			    pipe_config->pipe_bpp, dev_priv->vbt.edp.bpp);
+-		dev_priv->vbt.edp.bpp = pipe_config->pipe_bpp;
+-	}
++	if (intel_dp_is_edp(intel_dp))
++		intel_edp_fixup_vbt_bpp(encoder, pipe_config->pipe_bpp);
+ }
+ 
+ static void
+diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
+index 5508ebb9eb434..f416499dad6f3 100644
+--- a/drivers/gpu/drm/i915/display/icl_dsi.c
++++ b/drivers/gpu/drm/i915/display/icl_dsi.c
+@@ -1864,7 +1864,8 @@ static void icl_dphy_param_init(struct intel_dsi *intel_dsi)
+ {
+ 	struct drm_device *dev = intel_dsi->base.base.dev;
+ 	struct drm_i915_private *dev_priv = to_i915(dev);
+-	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
++	struct intel_connector *connector = intel_dsi->attached_connector;
++	struct mipi_config *mipi_config = connector->panel.vbt.dsi.config;
+ 	u32 tlpx_ns;
+ 	u32 prepare_cnt, exit_zero_cnt, clk_zero_cnt, trail_cnt;
+ 	u32 ths_prepare_ns, tclk_trail_ns;
+@@ -2051,6 +2052,8 @@ void icl_dsi_init(struct drm_i915_private *dev_priv)
+ 	/* attach connector to encoder */
+ 	intel_connector_attach_encoder(intel_connector, encoder);
+ 
++	intel_bios_init_panel(dev_priv, &intel_connector->panel);
++
+ 	mutex_lock(&dev->mode_config.mutex);
+ 	intel_panel_add_vbt_lfp_fixed_mode(intel_connector);
+ 	mutex_unlock(&dev->mode_config.mutex);
+@@ -2064,13 +2067,20 @@ void icl_dsi_init(struct drm_i915_private *dev_priv)
+ 
+ 	intel_backlight_setup(intel_connector, INVALID_PIPE);
+ 
+-	if (dev_priv->vbt.dsi.config->dual_link)
++	if (intel_connector->panel.vbt.dsi.config->dual_link)
+ 		intel_dsi->ports = BIT(PORT_A) | BIT(PORT_B);
+ 	else
+ 		intel_dsi->ports = BIT(port);
+ 
+-	intel_dsi->dcs_backlight_ports = dev_priv->vbt.dsi.bl_ports;
+-	intel_dsi->dcs_cabc_ports = dev_priv->vbt.dsi.cabc_ports;
++	if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.bl_ports & ~intel_dsi->ports))
++		intel_connector->panel.vbt.dsi.bl_ports &= intel_dsi->ports;
++
++	intel_dsi->dcs_backlight_ports = intel_connector->panel.vbt.dsi.bl_ports;
++
++	if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.cabc_ports & ~intel_dsi->ports))
++		intel_connector->panel.vbt.dsi.cabc_ports &= intel_dsi->ports;
++
++	intel_dsi->dcs_cabc_ports = intel_connector->panel.vbt.dsi.cabc_ports;
+ 
+ 	for_each_dsi_port(port, intel_dsi->ports) {
+ 		struct intel_dsi_host *host;
+diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c
+index 3e200a2e4ba29..5182bb66bd289 100644
+--- a/drivers/gpu/drm/i915/display/intel_backlight.c
++++ b/drivers/gpu/drm/i915/display/intel_backlight.c
+@@ -1158,9 +1158,10 @@ static u32 vlv_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz)
+ 	return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * mul);
+ }
+ 
+-static u16 get_vbt_pwm_freq(struct drm_i915_private *dev_priv)
++static u16 get_vbt_pwm_freq(struct intel_connector *connector)
+ {
+-	u16 pwm_freq_hz = dev_priv->vbt.backlight.pwm_freq_hz;
++	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++	u16 pwm_freq_hz = connector->panel.vbt.backlight.pwm_freq_hz;
+ 
+ 	if (pwm_freq_hz) {
+ 		drm_dbg_kms(&dev_priv->drm,
+@@ -1180,7 +1181,7 @@ static u32 get_backlight_max_vbt(struct intel_connector *connector)
+ {
+ 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+ 	struct intel_panel *panel = &connector->panel;
+-	u16 pwm_freq_hz = get_vbt_pwm_freq(dev_priv);
++	u16 pwm_freq_hz = get_vbt_pwm_freq(connector);
+ 	u32 pwm;
+ 
+ 	if (!panel->backlight.pwm_funcs->hz_to_pwm) {
+@@ -1217,11 +1218,11 @@ static u32 get_backlight_min_vbt(struct intel_connector *connector)
+ 	 * against this by letting the minimum be at most (arbitrarily chosen)
+ 	 * 25% of the max.
+ 	 */
+-	min = clamp_t(int, dev_priv->vbt.backlight.min_brightness, 0, 64);
+-	if (min != dev_priv->vbt.backlight.min_brightness) {
++	min = clamp_t(int, connector->panel.vbt.backlight.min_brightness, 0, 64);
++	if (min != connector->panel.vbt.backlight.min_brightness) {
+ 		drm_dbg_kms(&dev_priv->drm,
+ 			    "clamping VBT min backlight %d/255 to %d/255\n",
+-			    dev_priv->vbt.backlight.min_brightness, min);
++			    connector->panel.vbt.backlight.min_brightness, min);
+ 	}
+ 
+ 	/* vbt value is a coefficient in range [0..255] */
+@@ -1410,7 +1411,7 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused)
+ 	struct intel_panel *panel = &connector->panel;
+ 	u32 pwm_ctl, val;
+ 
+-	panel->backlight.controller = dev_priv->vbt.backlight.controller;
++	panel->backlight.controller = connector->panel.vbt.backlight.controller;
+ 
+ 	pwm_ctl = intel_de_read(dev_priv,
+ 				BXT_BLC_PWM_CTL(panel->backlight.controller));
+@@ -1483,7 +1484,7 @@ static int ext_pwm_setup_backlight(struct intel_connector *connector,
+ 	u32 level;
+ 
+ 	/* Get the right PWM chip for DSI backlight according to VBT */
+-	if (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC) {
++	if (connector->panel.vbt.dsi.config->pwm_blc == PPS_BLC_PMIC) {
+ 		panel->backlight.pwm = pwm_get(dev->dev, "pwm_pmic_backlight");
+ 		desc = "PMIC";
+ 	} else {
+@@ -1512,11 +1513,11 @@ static int ext_pwm_setup_backlight(struct intel_connector *connector,
+ 
+ 		drm_dbg_kms(&dev_priv->drm, "PWM already enabled at freq %ld, VBT freq %d, level %d\n",
+ 			    NSEC_PER_SEC / (unsigned long)panel->backlight.pwm_state.period,
+-			    get_vbt_pwm_freq(dev_priv), level);
++			    get_vbt_pwm_freq(connector), level);
+ 	} else {
+ 		/* Set period from VBT frequency, leave other settings at 0. */
+ 		panel->backlight.pwm_state.period =
+-			NSEC_PER_SEC / get_vbt_pwm_freq(dev_priv);
++			NSEC_PER_SEC / get_vbt_pwm_freq(connector);
+ 	}
+ 
+ 	drm_info(&dev_priv->drm, "Using %s PWM for LCD backlight control\n",
+@@ -1601,7 +1602,7 @@ int intel_backlight_setup(struct intel_connector *connector, enum pipe pipe)
+ 	struct intel_panel *panel = &connector->panel;
+ 	int ret;
+ 
+-	if (!dev_priv->vbt.backlight.present) {
++	if (!connector->panel.vbt.backlight.present) {
+ 		if (dev_priv->quirks & QUIRK_BACKLIGHT_PRESENT) {
+ 			drm_dbg_kms(&dev_priv->drm,
+ 				    "no backlight present per VBT, but present per quirk\n");
+diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
+index 91caf4523b34d..b5de61fe9cc67 100644
+--- a/drivers/gpu/drm/i915/display/intel_bios.c
++++ b/drivers/gpu/drm/i915/display/intel_bios.c
+@@ -682,7 +682,8 @@ static int get_panel_type(struct drm_i915_private *i915)
+ 
+ /* Parse general panel options */
+ static void
+-parse_panel_options(struct drm_i915_private *i915)
++parse_panel_options(struct drm_i915_private *i915,
++		    struct intel_panel *panel)
+ {
+ 	const struct bdb_lvds_options *lvds_options;
+ 	int panel_type;
+@@ -692,11 +693,11 @@ parse_panel_options(struct drm_i915_private *i915)
+ 	if (!lvds_options)
+ 		return;
+ 
+-	i915->vbt.lvds_dither = lvds_options->pixel_dither;
++	panel->vbt.lvds_dither = lvds_options->pixel_dither;
+ 
+ 	panel_type = get_panel_type(i915);
+ 
+-	i915->vbt.panel_type = panel_type;
++	panel->vbt.panel_type = panel_type;
+ 
+ 	drrs_mode = (lvds_options->dps_panel_type_bits
+ 				>> (panel_type * 2)) & MODE_MASK;
+@@ -707,16 +708,16 @@ parse_panel_options(struct drm_i915_private *i915)
+ 	 */
+ 	switch (drrs_mode) {
+ 	case 0:
+-		i915->vbt.drrs_type = DRRS_TYPE_STATIC;
++		panel->vbt.drrs_type = DRRS_TYPE_STATIC;
+ 		drm_dbg_kms(&i915->drm, "DRRS supported mode is static\n");
+ 		break;
+ 	case 2:
+-		i915->vbt.drrs_type = DRRS_TYPE_SEAMLESS;
++		panel->vbt.drrs_type = DRRS_TYPE_SEAMLESS;
+ 		drm_dbg_kms(&i915->drm,
+ 			    "DRRS supported mode is seamless\n");
+ 		break;
+ 	default:
+-		i915->vbt.drrs_type = DRRS_TYPE_NONE;
++		panel->vbt.drrs_type = DRRS_TYPE_NONE;
+ 		drm_dbg_kms(&i915->drm,
+ 			    "DRRS not supported (VBT input)\n");
+ 		break;
+@@ -725,13 +726,14 @@ parse_panel_options(struct drm_i915_private *i915)
+ 
+ static void
+ parse_lfp_panel_dtd(struct drm_i915_private *i915,
++		    struct intel_panel *panel,
+ 		    const struct bdb_lvds_lfp_data *lvds_lfp_data,
+ 		    const struct bdb_lvds_lfp_data_ptrs *lvds_lfp_data_ptrs)
+ {
+ 	const struct lvds_dvo_timing *panel_dvo_timing;
+ 	const struct lvds_fp_timing *fp_timing;
+ 	struct drm_display_mode *panel_fixed_mode;
+-	int panel_type = i915->vbt.panel_type;
++	int panel_type = panel->vbt.panel_type;
+ 
+ 	panel_dvo_timing = get_lvds_dvo_timing(lvds_lfp_data,
+ 					       lvds_lfp_data_ptrs,
+@@ -743,7 +745,7 @@ parse_lfp_panel_dtd(struct drm_i915_private *i915,
+ 
+ 	fill_detail_timing_data(panel_fixed_mode, panel_dvo_timing);
+ 
+-	i915->vbt.lfp_lvds_vbt_mode = panel_fixed_mode;
++	panel->vbt.lfp_lvds_vbt_mode = panel_fixed_mode;
+ 
+ 	drm_dbg_kms(&i915->drm,
+ 		    "Found panel mode in BIOS VBT legacy lfp table: " DRM_MODE_FMT "\n",
+@@ -756,20 +758,21 @@ parse_lfp_panel_dtd(struct drm_i915_private *i915,
+ 	/* check the resolution, just to be sure */
+ 	if (fp_timing->x_res == panel_fixed_mode->hdisplay &&
+ 	    fp_timing->y_res == panel_fixed_mode->vdisplay) {
+-		i915->vbt.bios_lvds_val = fp_timing->lvds_reg_val;
++		panel->vbt.bios_lvds_val = fp_timing->lvds_reg_val;
+ 		drm_dbg_kms(&i915->drm,
+ 			    "VBT initial LVDS value %x\n",
+-			    i915->vbt.bios_lvds_val);
++			    panel->vbt.bios_lvds_val);
+ 	}
+ }
+ 
+ static void
+-parse_lfp_data(struct drm_i915_private *i915)
++parse_lfp_data(struct drm_i915_private *i915,
++	       struct intel_panel *panel)
+ {
+ 	const struct bdb_lvds_lfp_data *data;
+ 	const struct bdb_lvds_lfp_data_tail *tail;
+ 	const struct bdb_lvds_lfp_data_ptrs *ptrs;
+-	int panel_type = i915->vbt.panel_type;
++	int panel_type = panel->vbt.panel_type;
+ 
+ 	ptrs = find_section(i915, BDB_LVDS_LFP_DATA_PTRS);
+ 	if (!ptrs)
+@@ -779,24 +782,25 @@ parse_lfp_data(struct drm_i915_private *i915)
+ 	if (!data)
+ 		return;
+ 
+-	if (!i915->vbt.lfp_lvds_vbt_mode)
+-		parse_lfp_panel_dtd(i915, data, ptrs);
++	if (!panel->vbt.lfp_lvds_vbt_mode)
++		parse_lfp_panel_dtd(i915, panel, data, ptrs);
+ 
+ 	tail = get_lfp_data_tail(data, ptrs);
+ 	if (!tail)
+ 		return;
+ 
+ 	if (i915->vbt.version >= 188) {
+-		i915->vbt.seamless_drrs_min_refresh_rate =
++		panel->vbt.seamless_drrs_min_refresh_rate =
+ 			tail->seamless_drrs_min_refresh_rate[panel_type];
+ 		drm_dbg_kms(&i915->drm,
+ 			    "Seamless DRRS min refresh rate: %d Hz\n",
+-			    i915->vbt.seamless_drrs_min_refresh_rate);
++			    panel->vbt.seamless_drrs_min_refresh_rate);
+ 	}
+ }
+ 
+ static void
+-parse_generic_dtd(struct drm_i915_private *i915)
++parse_generic_dtd(struct drm_i915_private *i915,
++		  struct intel_panel *panel)
+ {
+ 	const struct bdb_generic_dtd *generic_dtd;
+ 	const struct generic_dtd_entry *dtd;
+@@ -831,14 +835,14 @@ parse_generic_dtd(struct drm_i915_private *i915)
+ 
+ 	num_dtd = (get_blocksize(generic_dtd) -
+ 		   sizeof(struct bdb_generic_dtd)) / generic_dtd->gdtd_size;
+-	if (i915->vbt.panel_type >= num_dtd) {
++	if (panel->vbt.panel_type >= num_dtd) {
+ 		drm_err(&i915->drm,
+ 			"Panel type %d not found in table of %d DTD's\n",
+-			i915->vbt.panel_type, num_dtd);
++			panel->vbt.panel_type, num_dtd);
+ 		return;
+ 	}
+ 
+-	dtd = &generic_dtd->dtd[i915->vbt.panel_type];
++	dtd = &generic_dtd->dtd[panel->vbt.panel_type];
+ 
+ 	panel_fixed_mode = kzalloc(sizeof(*panel_fixed_mode), GFP_KERNEL);
+ 	if (!panel_fixed_mode)
+@@ -881,15 +885,16 @@ parse_generic_dtd(struct drm_i915_private *i915)
+ 		    "Found panel mode in BIOS VBT generic dtd table: " DRM_MODE_FMT "\n",
+ 		    DRM_MODE_ARG(panel_fixed_mode));
+ 
+-	i915->vbt.lfp_lvds_vbt_mode = panel_fixed_mode;
++	panel->vbt.lfp_lvds_vbt_mode = panel_fixed_mode;
+ }
+ 
+ static void
+-parse_lfp_backlight(struct drm_i915_private *i915)
++parse_lfp_backlight(struct drm_i915_private *i915,
++		    struct intel_panel *panel)
+ {
+ 	const struct bdb_lfp_backlight_data *backlight_data;
+ 	const struct lfp_backlight_data_entry *entry;
+-	int panel_type = i915->vbt.panel_type;
++	int panel_type = panel->vbt.panel_type;
+ 	u16 level;
+ 
+ 	backlight_data = find_section(i915, BDB_LVDS_BACKLIGHT);
+@@ -905,15 +910,15 @@ parse_lfp_backlight(struct drm_i915_private *i915)
+ 
+ 	entry = &backlight_data->data[panel_type];
+ 
+-	i915->vbt.backlight.present = entry->type == BDB_BACKLIGHT_TYPE_PWM;
+-	if (!i915->vbt.backlight.present) {
++	panel->vbt.backlight.present = entry->type == BDB_BACKLIGHT_TYPE_PWM;
++	if (!panel->vbt.backlight.present) {
+ 		drm_dbg_kms(&i915->drm,
+ 			    "PWM backlight not present in VBT (type %u)\n",
+ 			    entry->type);
+ 		return;
+ 	}
+ 
+-	i915->vbt.backlight.type = INTEL_BACKLIGHT_DISPLAY_DDI;
++	panel->vbt.backlight.type = INTEL_BACKLIGHT_DISPLAY_DDI;
+ 	if (i915->vbt.version >= 191) {
+ 		size_t exp_size;
+ 
+@@ -928,13 +933,13 @@ parse_lfp_backlight(struct drm_i915_private *i915)
+ 			const struct lfp_backlight_control_method *method;
+ 
+ 			method = &backlight_data->backlight_control[panel_type];
+-			i915->vbt.backlight.type = method->type;
+-			i915->vbt.backlight.controller = method->controller;
++			panel->vbt.backlight.type = method->type;
++			panel->vbt.backlight.controller = method->controller;
+ 		}
+ 	}
+ 
+-	i915->vbt.backlight.pwm_freq_hz = entry->pwm_freq_hz;
+-	i915->vbt.backlight.active_low_pwm = entry->active_low_pwm;
++	panel->vbt.backlight.pwm_freq_hz = entry->pwm_freq_hz;
++	panel->vbt.backlight.active_low_pwm = entry->active_low_pwm;
+ 
+ 	if (i915->vbt.version >= 234) {
+ 		u16 min_level;
+@@ -955,28 +960,29 @@ parse_lfp_backlight(struct drm_i915_private *i915)
+ 			drm_warn(&i915->drm, "Brightness min level > 255\n");
+ 			level = 255;
+ 		}
+-		i915->vbt.backlight.min_brightness = min_level;
++		panel->vbt.backlight.min_brightness = min_level;
+ 
+-		i915->vbt.backlight.brightness_precision_bits =
++		panel->vbt.backlight.brightness_precision_bits =
+ 			backlight_data->brightness_precision_bits[panel_type];
+ 	} else {
+ 		level = backlight_data->level[panel_type];
+-		i915->vbt.backlight.min_brightness = entry->min_brightness;
++		panel->vbt.backlight.min_brightness = entry->min_brightness;
+ 	}
+ 
+ 	drm_dbg_kms(&i915->drm,
+ 		    "VBT backlight PWM modulation frequency %u Hz, "
+ 		    "active %s, min brightness %u, level %u, controller %u\n",
+-		    i915->vbt.backlight.pwm_freq_hz,
+-		    i915->vbt.backlight.active_low_pwm ? "low" : "high",
+-		    i915->vbt.backlight.min_brightness,
++		    panel->vbt.backlight.pwm_freq_hz,
++		    panel->vbt.backlight.active_low_pwm ? "low" : "high",
++		    panel->vbt.backlight.min_brightness,
+ 		    level,
+-		    i915->vbt.backlight.controller);
++		    panel->vbt.backlight.controller);
+ }
+ 
+ /* Try to find sdvo panel data */
+ static void
+-parse_sdvo_panel_data(struct drm_i915_private *i915)
++parse_sdvo_panel_data(struct drm_i915_private *i915,
++		      struct intel_panel *panel)
+ {
+ 	const struct bdb_sdvo_panel_dtds *dtds;
+ 	struct drm_display_mode *panel_fixed_mode;
+@@ -1009,7 +1015,7 @@ parse_sdvo_panel_data(struct drm_i915_private *i915)
+ 
+ 	fill_detail_timing_data(panel_fixed_mode, &dtds->dtds[index]);
+ 
+-	i915->vbt.sdvo_lvds_vbt_mode = panel_fixed_mode;
++	panel->vbt.sdvo_lvds_vbt_mode = panel_fixed_mode;
+ 
+ 	drm_dbg_kms(&i915->drm,
+ 		    "Found SDVO panel mode in BIOS VBT tables: " DRM_MODE_FMT "\n",
+@@ -1188,6 +1194,17 @@ parse_driver_features(struct drm_i915_private *i915)
+ 		    driver->lvds_config != BDB_DRIVER_FEATURE_INT_SDVO_LVDS)
+ 			i915->vbt.int_lvds_support = 0;
+ 	}
++}
++
++static void
++parse_panel_driver_features(struct drm_i915_private *i915,
++			    struct intel_panel *panel)
++{
++	const struct bdb_driver_features *driver;
++
++	driver = find_section(i915, BDB_DRIVER_FEATURES);
++	if (!driver)
++		return;
+ 
+ 	if (i915->vbt.version < 228) {
+ 		drm_dbg_kms(&i915->drm, "DRRS State Enabled:%d\n",
+@@ -1199,17 +1216,18 @@ parse_driver_features(struct drm_i915_private *i915)
+ 		 * driver->drrs_enabled=false
+ 		 */
+ 		if (!driver->drrs_enabled)
+-			i915->vbt.drrs_type = DRRS_TYPE_NONE;
++			panel->vbt.drrs_type = DRRS_TYPE_NONE;
+ 
+-		i915->vbt.psr.enable = driver->psr_enabled;
++		panel->vbt.psr.enable = driver->psr_enabled;
+ 	}
+ }
+ 
+ static void
+-parse_power_conservation_features(struct drm_i915_private *i915)
++parse_power_conservation_features(struct drm_i915_private *i915,
++				  struct intel_panel *panel)
+ {
+ 	const struct bdb_lfp_power *power;
+-	u8 panel_type = i915->vbt.panel_type;
++	u8 panel_type = panel->vbt.panel_type;
+ 
+ 	if (i915->vbt.version < 228)
+ 		return;
+@@ -1218,7 +1236,7 @@ parse_power_conservation_features(struct drm_i915_private *i915)
+ 	if (!power)
+ 		return;
+ 
+-	i915->vbt.psr.enable = power->psr & BIT(panel_type);
++	panel->vbt.psr.enable = power->psr & BIT(panel_type);
+ 
+ 	/*
+ 	 * If DRRS is not supported, drrs_type has to be set to 0.
+@@ -1227,19 +1245,20 @@ parse_power_conservation_features(struct drm_i915_private *i915)
+ 	 * power->drrs & BIT(panel_type)=false
+ 	 */
+ 	if (!(power->drrs & BIT(panel_type)))
+-		i915->vbt.drrs_type = DRRS_TYPE_NONE;
++		panel->vbt.drrs_type = DRRS_TYPE_NONE;
+ 
+ 	if (i915->vbt.version >= 232)
+-		i915->vbt.edp.hobl = power->hobl & BIT(panel_type);
++		panel->vbt.edp.hobl = power->hobl & BIT(panel_type);
+ }
+ 
+ static void
+-parse_edp(struct drm_i915_private *i915)
++parse_edp(struct drm_i915_private *i915,
++	  struct intel_panel *panel)
+ {
+ 	const struct bdb_edp *edp;
+ 	const struct edp_power_seq *edp_pps;
+ 	const struct edp_fast_link_params *edp_link_params;
+-	int panel_type = i915->vbt.panel_type;
++	int panel_type = panel->vbt.panel_type;
+ 
+ 	edp = find_section(i915, BDB_EDP);
+ 	if (!edp)
+@@ -1247,13 +1266,13 @@ parse_edp(struct drm_i915_private *i915)
+ 
+ 	switch ((edp->color_depth >> (panel_type * 2)) & 3) {
+ 	case EDP_18BPP:
+-		i915->vbt.edp.bpp = 18;
++		panel->vbt.edp.bpp = 18;
+ 		break;
+ 	case EDP_24BPP:
+-		i915->vbt.edp.bpp = 24;
++		panel->vbt.edp.bpp = 24;
+ 		break;
+ 	case EDP_30BPP:
+-		i915->vbt.edp.bpp = 30;
++		panel->vbt.edp.bpp = 30;
+ 		break;
+ 	}
+ 
+@@ -1261,14 +1280,14 @@ parse_edp(struct drm_i915_private *i915)
+ 	edp_pps = &edp->power_seqs[panel_type];
+ 	edp_link_params = &edp->fast_link_params[panel_type];
+ 
+-	i915->vbt.edp.pps = *edp_pps;
++	panel->vbt.edp.pps = *edp_pps;
+ 
+ 	switch (edp_link_params->rate) {
+ 	case EDP_RATE_1_62:
+-		i915->vbt.edp.rate = DP_LINK_BW_1_62;
++		panel->vbt.edp.rate = DP_LINK_BW_1_62;
+ 		break;
+ 	case EDP_RATE_2_7:
+-		i915->vbt.edp.rate = DP_LINK_BW_2_7;
++		panel->vbt.edp.rate = DP_LINK_BW_2_7;
+ 		break;
+ 	default:
+ 		drm_dbg_kms(&i915->drm,
+@@ -1279,13 +1298,13 @@ parse_edp(struct drm_i915_private *i915)
+ 
+ 	switch (edp_link_params->lanes) {
+ 	case EDP_LANE_1:
+-		i915->vbt.edp.lanes = 1;
++		panel->vbt.edp.lanes = 1;
+ 		break;
+ 	case EDP_LANE_2:
+-		i915->vbt.edp.lanes = 2;
++		panel->vbt.edp.lanes = 2;
+ 		break;
+ 	case EDP_LANE_4:
+-		i915->vbt.edp.lanes = 4;
++		panel->vbt.edp.lanes = 4;
+ 		break;
+ 	default:
+ 		drm_dbg_kms(&i915->drm,
+@@ -1296,16 +1315,16 @@ parse_edp(struct drm_i915_private *i915)
+ 
+ 	switch (edp_link_params->preemphasis) {
+ 	case EDP_PREEMPHASIS_NONE:
+-		i915->vbt.edp.preemphasis = DP_TRAIN_PRE_EMPH_LEVEL_0;
++		panel->vbt.edp.preemphasis = DP_TRAIN_PRE_EMPH_LEVEL_0;
+ 		break;
+ 	case EDP_PREEMPHASIS_3_5dB:
+-		i915->vbt.edp.preemphasis = DP_TRAIN_PRE_EMPH_LEVEL_1;
++		panel->vbt.edp.preemphasis = DP_TRAIN_PRE_EMPH_LEVEL_1;
+ 		break;
+ 	case EDP_PREEMPHASIS_6dB:
+-		i915->vbt.edp.preemphasis = DP_TRAIN_PRE_EMPH_LEVEL_2;
++		panel->vbt.edp.preemphasis = DP_TRAIN_PRE_EMPH_LEVEL_2;
+ 		break;
+ 	case EDP_PREEMPHASIS_9_5dB:
+-		i915->vbt.edp.preemphasis = DP_TRAIN_PRE_EMPH_LEVEL_3;
++		panel->vbt.edp.preemphasis = DP_TRAIN_PRE_EMPH_LEVEL_3;
+ 		break;
+ 	default:
+ 		drm_dbg_kms(&i915->drm,
+@@ -1316,16 +1335,16 @@ parse_edp(struct drm_i915_private *i915)
+ 
+ 	switch (edp_link_params->vswing) {
+ 	case EDP_VSWING_0_4V:
+-		i915->vbt.edp.vswing = DP_TRAIN_VOLTAGE_SWING_LEVEL_0;
++		panel->vbt.edp.vswing = DP_TRAIN_VOLTAGE_SWING_LEVEL_0;
+ 		break;
+ 	case EDP_VSWING_0_6V:
+-		i915->vbt.edp.vswing = DP_TRAIN_VOLTAGE_SWING_LEVEL_1;
++		panel->vbt.edp.vswing = DP_TRAIN_VOLTAGE_SWING_LEVEL_1;
+ 		break;
+ 	case EDP_VSWING_0_8V:
+-		i915->vbt.edp.vswing = DP_TRAIN_VOLTAGE_SWING_LEVEL_2;
++		panel->vbt.edp.vswing = DP_TRAIN_VOLTAGE_SWING_LEVEL_2;
+ 		break;
+ 	case EDP_VSWING_1_2V:
+-		i915->vbt.edp.vswing = DP_TRAIN_VOLTAGE_SWING_LEVEL_3;
++		panel->vbt.edp.vswing = DP_TRAIN_VOLTAGE_SWING_LEVEL_3;
+ 		break;
+ 	default:
+ 		drm_dbg_kms(&i915->drm,
+@@ -1339,24 +1358,25 @@ parse_edp(struct drm_i915_private *i915)
+ 
+ 		/* Don't read from VBT if module parameter has valid value*/
+ 		if (i915->params.edp_vswing) {
+-			i915->vbt.edp.low_vswing =
++			panel->vbt.edp.low_vswing =
+ 				i915->params.edp_vswing == 1;
+ 		} else {
+ 			vswing = (edp->edp_vswing_preemph >> (panel_type * 4)) & 0xF;
+-			i915->vbt.edp.low_vswing = vswing == 0;
++			panel->vbt.edp.low_vswing = vswing == 0;
+ 		}
+ 	}
+ 
+-	i915->vbt.edp.drrs_msa_timing_delay =
++	panel->vbt.edp.drrs_msa_timing_delay =
+ 		(edp->sdrrs_msa_timing_delay >> (panel_type * 2)) & 3;
+ }
+ 
+ static void
+-parse_psr(struct drm_i915_private *i915)
++parse_psr(struct drm_i915_private *i915,
++	  struct intel_panel *panel)
+ {
+ 	const struct bdb_psr *psr;
+ 	const struct psr_table *psr_table;
+-	int panel_type = i915->vbt.panel_type;
++	int panel_type = panel->vbt.panel_type;
+ 
+ 	psr = find_section(i915, BDB_PSR);
+ 	if (!psr) {
+@@ -1366,11 +1386,11 @@ parse_psr(struct drm_i915_private *i915)
+ 
+ 	psr_table = &psr->psr_table[panel_type];
+ 
+-	i915->vbt.psr.full_link = psr_table->full_link;
+-	i915->vbt.psr.require_aux_wakeup = psr_table->require_aux_to_wakeup;
++	panel->vbt.psr.full_link = psr_table->full_link;
++	panel->vbt.psr.require_aux_wakeup = psr_table->require_aux_to_wakeup;
+ 
+ 	/* Allowed VBT values goes from 0 to 15 */
+-	i915->vbt.psr.idle_frames = psr_table->idle_frames < 0 ? 0 :
++	panel->vbt.psr.idle_frames = psr_table->idle_frames < 0 ? 0 :
+ 		psr_table->idle_frames > 15 ? 15 : psr_table->idle_frames;
+ 
+ 	/*
+@@ -1381,13 +1401,13 @@ parse_psr(struct drm_i915_private *i915)
+ 	    (DISPLAY_VER(i915) >= 9 && !IS_BROXTON(i915))) {
+ 		switch (psr_table->tp1_wakeup_time) {
+ 		case 0:
+-			i915->vbt.psr.tp1_wakeup_time_us = 500;
++			panel->vbt.psr.tp1_wakeup_time_us = 500;
+ 			break;
+ 		case 1:
+-			i915->vbt.psr.tp1_wakeup_time_us = 100;
++			panel->vbt.psr.tp1_wakeup_time_us = 100;
+ 			break;
+ 		case 3:
+-			i915->vbt.psr.tp1_wakeup_time_us = 0;
++			panel->vbt.psr.tp1_wakeup_time_us = 0;
+ 			break;
+ 		default:
+ 			drm_dbg_kms(&i915->drm,
+@@ -1395,19 +1415,19 @@ parse_psr(struct drm_i915_private *i915)
+ 				    psr_table->tp1_wakeup_time);
+ 			fallthrough;
+ 		case 2:
+-			i915->vbt.psr.tp1_wakeup_time_us = 2500;
++			panel->vbt.psr.tp1_wakeup_time_us = 2500;
+ 			break;
+ 		}
+ 
+ 		switch (psr_table->tp2_tp3_wakeup_time) {
+ 		case 0:
+-			i915->vbt.psr.tp2_tp3_wakeup_time_us = 500;
++			panel->vbt.psr.tp2_tp3_wakeup_time_us = 500;
+ 			break;
+ 		case 1:
+-			i915->vbt.psr.tp2_tp3_wakeup_time_us = 100;
++			panel->vbt.psr.tp2_tp3_wakeup_time_us = 100;
+ 			break;
+ 		case 3:
+-			i915->vbt.psr.tp2_tp3_wakeup_time_us = 0;
++			panel->vbt.psr.tp2_tp3_wakeup_time_us = 0;
+ 			break;
+ 		default:
+ 			drm_dbg_kms(&i915->drm,
+@@ -1415,12 +1435,12 @@ parse_psr(struct drm_i915_private *i915)
+ 				    psr_table->tp2_tp3_wakeup_time);
+ 			fallthrough;
+ 		case 2:
+-			i915->vbt.psr.tp2_tp3_wakeup_time_us = 2500;
++			panel->vbt.psr.tp2_tp3_wakeup_time_us = 2500;
+ 		break;
+ 		}
+ 	} else {
+-		i915->vbt.psr.tp1_wakeup_time_us = psr_table->tp1_wakeup_time * 100;
+-		i915->vbt.psr.tp2_tp3_wakeup_time_us = psr_table->tp2_tp3_wakeup_time * 100;
++		panel->vbt.psr.tp1_wakeup_time_us = psr_table->tp1_wakeup_time * 100;
++		panel->vbt.psr.tp2_tp3_wakeup_time_us = psr_table->tp2_tp3_wakeup_time * 100;
+ 	}
+ 
+ 	if (i915->vbt.version >= 226) {
+@@ -1442,62 +1462,66 @@ parse_psr(struct drm_i915_private *i915)
+ 			wakeup_time = 2500;
+ 			break;
+ 		}
+-		i915->vbt.psr.psr2_tp2_tp3_wakeup_time_us = wakeup_time;
++		panel->vbt.psr.psr2_tp2_tp3_wakeup_time_us = wakeup_time;
+ 	} else {
+ 		/* Reusing PSR1 wakeup time for PSR2 in older VBTs */
+-		i915->vbt.psr.psr2_tp2_tp3_wakeup_time_us = i915->vbt.psr.tp2_tp3_wakeup_time_us;
++		panel->vbt.psr.psr2_tp2_tp3_wakeup_time_us = panel->vbt.psr.tp2_tp3_wakeup_time_us;
+ 	}
+ }
+ 
+ static void parse_dsi_backlight_ports(struct drm_i915_private *i915,
+-				      u16 version, enum port port)
++				      struct intel_panel *panel,
++				      enum port port)
+ {
+-	if (!i915->vbt.dsi.config->dual_link || version < 197) {
+-		i915->vbt.dsi.bl_ports = BIT(port);
+-		if (i915->vbt.dsi.config->cabc_supported)
+-			i915->vbt.dsi.cabc_ports = BIT(port);
++	enum port port_bc = DISPLAY_VER(i915) >= 11 ? PORT_B : PORT_C;
++
++	if (!panel->vbt.dsi.config->dual_link || i915->vbt.version < 197) {
++		panel->vbt.dsi.bl_ports = BIT(port);
++		if (panel->vbt.dsi.config->cabc_supported)
++			panel->vbt.dsi.cabc_ports = BIT(port);
+ 
+ 		return;
+ 	}
+ 
+-	switch (i915->vbt.dsi.config->dl_dcs_backlight_ports) {
++	switch (panel->vbt.dsi.config->dl_dcs_backlight_ports) {
+ 	case DL_DCS_PORT_A:
+-		i915->vbt.dsi.bl_ports = BIT(PORT_A);
++		panel->vbt.dsi.bl_ports = BIT(PORT_A);
+ 		break;
+ 	case DL_DCS_PORT_C:
+-		i915->vbt.dsi.bl_ports = BIT(PORT_C);
++		panel->vbt.dsi.bl_ports = BIT(port_bc);
+ 		break;
+ 	default:
+ 	case DL_DCS_PORT_A_AND_C:
+-		i915->vbt.dsi.bl_ports = BIT(PORT_A) | BIT(PORT_C);
++		panel->vbt.dsi.bl_ports = BIT(PORT_A) | BIT(port_bc);
+ 		break;
+ 	}
+ 
+-	if (!i915->vbt.dsi.config->cabc_supported)
++	if (!panel->vbt.dsi.config->cabc_supported)
+ 		return;
+ 
+-	switch (i915->vbt.dsi.config->dl_dcs_cabc_ports) {
++	switch (panel->vbt.dsi.config->dl_dcs_cabc_ports) {
+ 	case DL_DCS_PORT_A:
+-		i915->vbt.dsi.cabc_ports = BIT(PORT_A);
++		panel->vbt.dsi.cabc_ports = BIT(PORT_A);
+ 		break;
+ 	case DL_DCS_PORT_C:
+-		i915->vbt.dsi.cabc_ports = BIT(PORT_C);
++		panel->vbt.dsi.cabc_ports = BIT(port_bc);
+ 		break;
+ 	default:
+ 	case DL_DCS_PORT_A_AND_C:
+-		i915->vbt.dsi.cabc_ports =
+-					BIT(PORT_A) | BIT(PORT_C);
++		panel->vbt.dsi.cabc_ports =
++					BIT(PORT_A) | BIT(port_bc);
+ 		break;
+ 	}
+ }
+ 
+ static void
+-parse_mipi_config(struct drm_i915_private *i915)
++parse_mipi_config(struct drm_i915_private *i915,
++		  struct intel_panel *panel)
+ {
+ 	const struct bdb_mipi_config *start;
+ 	const struct mipi_config *config;
+ 	const struct mipi_pps_data *pps;
+-	int panel_type = i915->vbt.panel_type;
++	int panel_type = panel->vbt.panel_type;
+ 	enum port port;
+ 
+ 	/* parse MIPI blocks only if LFP type is MIPI */
+@@ -1505,7 +1529,7 @@ parse_mipi_config(struct drm_i915_private *i915)
+ 		return;
+ 
+ 	/* Initialize this to undefined indicating no generic MIPI support */
+-	i915->vbt.dsi.panel_id = MIPI_DSI_UNDEFINED_PANEL_ID;
++	panel->vbt.dsi.panel_id = MIPI_DSI_UNDEFINED_PANEL_ID;
+ 
+ 	/* Block #40 is already parsed and panel_fixed_mode is
+ 	 * stored in i915->lfp_lvds_vbt_mode
+@@ -1532,17 +1556,17 @@ parse_mipi_config(struct drm_i915_private *i915)
+ 	pps = &start->pps[panel_type];
+ 
+ 	/* store as of now full data. Trim when we realise all is not needed */
+-	i915->vbt.dsi.config = kmemdup(config, sizeof(struct mipi_config), GFP_KERNEL);
+-	if (!i915->vbt.dsi.config)
++	panel->vbt.dsi.config = kmemdup(config, sizeof(struct mipi_config), GFP_KERNEL);
++	if (!panel->vbt.dsi.config)
+ 		return;
+ 
+-	i915->vbt.dsi.pps = kmemdup(pps, sizeof(struct mipi_pps_data), GFP_KERNEL);
+-	if (!i915->vbt.dsi.pps) {
+-		kfree(i915->vbt.dsi.config);
++	panel->vbt.dsi.pps = kmemdup(pps, sizeof(struct mipi_pps_data), GFP_KERNEL);
++	if (!panel->vbt.dsi.pps) {
++		kfree(panel->vbt.dsi.config);
+ 		return;
+ 	}
+ 
+-	parse_dsi_backlight_ports(i915, i915->vbt.version, port);
++	parse_dsi_backlight_ports(i915, panel, port);
+ 
+ 	/* FIXME is the 90 vs. 270 correct? */
+ 	switch (config->rotation) {
+@@ -1551,25 +1575,25 @@ parse_mipi_config(struct drm_i915_private *i915)
+ 		 * Most (all?) VBTs claim 0 degrees despite having
+ 		 * an upside down panel, thus we do not trust this.
+ 		 */
+-		i915->vbt.dsi.orientation =
++		panel->vbt.dsi.orientation =
+ 			DRM_MODE_PANEL_ORIENTATION_UNKNOWN;
+ 		break;
+ 	case ENABLE_ROTATION_90:
+-		i915->vbt.dsi.orientation =
++		panel->vbt.dsi.orientation =
+ 			DRM_MODE_PANEL_ORIENTATION_RIGHT_UP;
+ 		break;
+ 	case ENABLE_ROTATION_180:
+-		i915->vbt.dsi.orientation =
++		panel->vbt.dsi.orientation =
+ 			DRM_MODE_PANEL_ORIENTATION_BOTTOM_UP;
+ 		break;
+ 	case ENABLE_ROTATION_270:
+-		i915->vbt.dsi.orientation =
++		panel->vbt.dsi.orientation =
+ 			DRM_MODE_PANEL_ORIENTATION_LEFT_UP;
+ 		break;
+ 	}
+ 
+ 	/* We have mandatory mipi config blocks. Initialize as generic panel */
+-	i915->vbt.dsi.panel_id = MIPI_DSI_GENERIC_PANEL_ID;
++	panel->vbt.dsi.panel_id = MIPI_DSI_GENERIC_PANEL_ID;
+ }
+ 
+ /* Find the sequence block and size for the given panel. */
+@@ -1732,13 +1756,14 @@ static int goto_next_sequence_v3(const u8 *data, int index, int total)
+  * Get len of pre-fixed deassert fragment from a v1 init OTP sequence,
+  * skip all delay + gpio operands and stop at the first DSI packet op.
+  */
+-static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915)
++static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915,
++					      struct intel_panel *panel)
+ {
+-	const u8 *data = i915->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
++	const u8 *data = panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
+ 	int index, len;
+ 
+ 	if (drm_WARN_ON(&i915->drm,
+-			!data || i915->vbt.dsi.seq_version != 1))
++			!data || panel->vbt.dsi.seq_version != 1))
+ 		return 0;
+ 
+ 	/* index = 1 to skip sequence byte */
+@@ -1766,7 +1791,8 @@ static int get_init_otp_deassert_fragment_len(struct drm_i915_private *i915)
+  * these devices we split the init OTP sequence into a deassert sequence and
+  * the actual init OTP part.
+  */
+-static void fixup_mipi_sequences(struct drm_i915_private *i915)
++static void fixup_mipi_sequences(struct drm_i915_private *i915,
++				 struct intel_panel *panel)
+ {
+ 	u8 *init_otp;
+ 	int len;
+@@ -1776,18 +1802,18 @@ static void fixup_mipi_sequences(struct drm_i915_private *i915)
+ 		return;
+ 
+ 	/* Limit this to v1 vid-mode sequences */
+-	if (i915->vbt.dsi.config->is_cmd_mode ||
+-	    i915->vbt.dsi.seq_version != 1)
++	if (panel->vbt.dsi.config->is_cmd_mode ||
++	    panel->vbt.dsi.seq_version != 1)
+ 		return;
+ 
+ 	/* Only do this if there are otp and assert seqs and no deassert seq */
+-	if (!i915->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] ||
+-	    !i915->vbt.dsi.sequence[MIPI_SEQ_ASSERT_RESET] ||
+-	    i915->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET])
++	if (!panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] ||
++	    !panel->vbt.dsi.sequence[MIPI_SEQ_ASSERT_RESET] ||
++	    panel->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET])
+ 		return;
+ 
+ 	/* The deassert-sequence ends at the first DSI packet */
+-	len = get_init_otp_deassert_fragment_len(i915);
++	len = get_init_otp_deassert_fragment_len(i915, panel);
+ 	if (!len)
+ 		return;
+ 
+@@ -1795,25 +1821,26 @@ static void fixup_mipi_sequences(struct drm_i915_private *i915)
+ 		    "Using init OTP fragment to deassert reset\n");
+ 
+ 	/* Copy the fragment, update seq byte and terminate it */
+-	init_otp = (u8 *)i915->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
+-	i915->vbt.dsi.deassert_seq = kmemdup(init_otp, len + 1, GFP_KERNEL);
+-	if (!i915->vbt.dsi.deassert_seq)
++	init_otp = (u8 *)panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP];
++	panel->vbt.dsi.deassert_seq = kmemdup(init_otp, len + 1, GFP_KERNEL);
++	if (!panel->vbt.dsi.deassert_seq)
+ 		return;
+-	i915->vbt.dsi.deassert_seq[0] = MIPI_SEQ_DEASSERT_RESET;
+-	i915->vbt.dsi.deassert_seq[len] = MIPI_SEQ_ELEM_END;
++	panel->vbt.dsi.deassert_seq[0] = MIPI_SEQ_DEASSERT_RESET;
++	panel->vbt.dsi.deassert_seq[len] = MIPI_SEQ_ELEM_END;
+ 	/* Use the copy for deassert */
+-	i915->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET] =
+-		i915->vbt.dsi.deassert_seq;
++	panel->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET] =
++		panel->vbt.dsi.deassert_seq;
+ 	/* Replace the last byte of the fragment with init OTP seq byte */
+ 	init_otp[len - 1] = MIPI_SEQ_INIT_OTP;
+ 	/* And make MIPI_MIPI_SEQ_INIT_OTP point to it */
+-	i915->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1;
++	panel->vbt.dsi.sequence[MIPI_SEQ_INIT_OTP] = init_otp + len - 1;
+ }
+ 
+ static void
+-parse_mipi_sequence(struct drm_i915_private *i915)
++parse_mipi_sequence(struct drm_i915_private *i915,
++		    struct intel_panel *panel)
+ {
+-	int panel_type = i915->vbt.panel_type;
++	int panel_type = panel->vbt.panel_type;
+ 	const struct bdb_mipi_sequence *sequence;
+ 	const u8 *seq_data;
+ 	u32 seq_size;
+@@ -1821,7 +1848,7 @@ parse_mipi_sequence(struct drm_i915_private *i915)
+ 	int index = 0;
+ 
+ 	/* Only our generic panel driver uses the sequence block. */
+-	if (i915->vbt.dsi.panel_id != MIPI_DSI_GENERIC_PANEL_ID)
++	if (panel->vbt.dsi.panel_id != MIPI_DSI_GENERIC_PANEL_ID)
+ 		return;
+ 
+ 	sequence = find_section(i915, BDB_MIPI_SEQUENCE);
+@@ -1867,7 +1894,7 @@ parse_mipi_sequence(struct drm_i915_private *i915)
+ 			drm_dbg_kms(&i915->drm,
+ 				    "Unsupported sequence %u\n", seq_id);
+ 
+-		i915->vbt.dsi.sequence[seq_id] = data + index;
++		panel->vbt.dsi.sequence[seq_id] = data + index;
+ 
+ 		if (sequence->version >= 3)
+ 			index = goto_next_sequence_v3(data, index, seq_size);
+@@ -1880,18 +1907,18 @@ parse_mipi_sequence(struct drm_i915_private *i915)
+ 		}
+ 	}
+ 
+-	i915->vbt.dsi.data = data;
+-	i915->vbt.dsi.size = seq_size;
+-	i915->vbt.dsi.seq_version = sequence->version;
++	panel->vbt.dsi.data = data;
++	panel->vbt.dsi.size = seq_size;
++	panel->vbt.dsi.seq_version = sequence->version;
+ 
+-	fixup_mipi_sequences(i915);
++	fixup_mipi_sequences(i915, panel);
+ 
+ 	drm_dbg(&i915->drm, "MIPI related VBT parsing complete\n");
+ 	return;
+ 
+ err:
+ 	kfree(data);
+-	memset(i915->vbt.dsi.sequence, 0, sizeof(i915->vbt.dsi.sequence));
++	memset(panel->vbt.dsi.sequence, 0, sizeof(panel->vbt.dsi.sequence));
+ }
+ 
+ static void
+@@ -2645,15 +2672,6 @@ init_vbt_defaults(struct drm_i915_private *i915)
+ {
+ 	i915->vbt.crt_ddc_pin = GMBUS_PIN_VGADDC;
+ 
+-	/* Default to having backlight */
+-	i915->vbt.backlight.present = true;
+-
+-	/* LFP panel data */
+-	i915->vbt.lvds_dither = 1;
+-
+-	/* SDVO panel data */
+-	i915->vbt.sdvo_lvds_vbt_mode = NULL;
+-
+ 	/* general features */
+ 	i915->vbt.int_tv_support = 1;
+ 	i915->vbt.int_crt_support = 1;
+@@ -2673,6 +2691,17 @@ init_vbt_defaults(struct drm_i915_private *i915)
+ 		    i915->vbt.lvds_ssc_freq);
+ }
+ 
++/* Common defaults which may be overridden by VBT. */
++static void
++init_vbt_panel_defaults(struct intel_panel *panel)
++{
++	/* Default to having backlight */
++	panel->vbt.backlight.present = true;
++
++	/* LFP panel data */
++	panel->vbt.lvds_dither = true;
++}
++
+ /* Defaults to initialize only if there is no VBT. */
+ static void
+ init_vbt_missing_defaults(struct drm_i915_private *i915)
+@@ -2959,17 +2988,7 @@ void intel_bios_init(struct drm_i915_private *i915)
+ 	/* Grab useful general definitions */
+ 	parse_general_features(i915);
+ 	parse_general_definitions(i915);
+-	parse_panel_options(i915);
+-	parse_generic_dtd(i915);
+-	parse_lfp_data(i915);
+-	parse_lfp_backlight(i915);
+-	parse_sdvo_panel_data(i915);
+ 	parse_driver_features(i915);
+-	parse_power_conservation_features(i915);
+-	parse_edp(i915);
+-	parse_psr(i915);
+-	parse_mipi_config(i915);
+-	parse_mipi_sequence(i915);
+ 
+ 	/* Depends on child device list */
+ 	parse_compression_parameters(i915);
+@@ -2988,6 +3007,24 @@ out:
+ 	kfree(oprom_vbt);
+ }
+ 
++void intel_bios_init_panel(struct drm_i915_private *i915,
++			   struct intel_panel *panel)
++{
++	init_vbt_panel_defaults(panel);
++
++	parse_panel_options(i915, panel);
++	parse_generic_dtd(i915, panel);
++	parse_lfp_data(i915, panel);
++	parse_lfp_backlight(i915, panel);
++	parse_sdvo_panel_data(i915, panel);
++	parse_panel_driver_features(i915, panel);
++	parse_power_conservation_features(i915, panel);
++	parse_edp(i915, panel);
++	parse_psr(i915, panel);
++	parse_mipi_config(i915, panel);
++	parse_mipi_sequence(i915, panel);
++}
++
+ /**
+  * intel_bios_driver_remove - Free any resources allocated by intel_bios_init()
+  * @i915: i915 device instance
+@@ -3007,19 +3044,22 @@ void intel_bios_driver_remove(struct drm_i915_private *i915)
+ 		list_del(&entry->node);
+ 		kfree(entry);
+ 	}
++}
+ 
+-	kfree(i915->vbt.sdvo_lvds_vbt_mode);
+-	i915->vbt.sdvo_lvds_vbt_mode = NULL;
+-	kfree(i915->vbt.lfp_lvds_vbt_mode);
+-	i915->vbt.lfp_lvds_vbt_mode = NULL;
+-	kfree(i915->vbt.dsi.data);
+-	i915->vbt.dsi.data = NULL;
+-	kfree(i915->vbt.dsi.pps);
+-	i915->vbt.dsi.pps = NULL;
+-	kfree(i915->vbt.dsi.config);
+-	i915->vbt.dsi.config = NULL;
+-	kfree(i915->vbt.dsi.deassert_seq);
+-	i915->vbt.dsi.deassert_seq = NULL;
++void intel_bios_fini_panel(struct intel_panel *panel)
++{
++	kfree(panel->vbt.sdvo_lvds_vbt_mode);
++	panel->vbt.sdvo_lvds_vbt_mode = NULL;
++	kfree(panel->vbt.lfp_lvds_vbt_mode);
++	panel->vbt.lfp_lvds_vbt_mode = NULL;
++	kfree(panel->vbt.dsi.data);
++	panel->vbt.dsi.data = NULL;
++	kfree(panel->vbt.dsi.pps);
++	panel->vbt.dsi.pps = NULL;
++	kfree(panel->vbt.dsi.config);
++	panel->vbt.dsi.config = NULL;
++	kfree(panel->vbt.dsi.deassert_seq);
++	panel->vbt.dsi.deassert_seq = NULL;
+ }
+ 
+ /**
+diff --git a/drivers/gpu/drm/i915/display/intel_bios.h b/drivers/gpu/drm/i915/display/intel_bios.h
+index 4709c4d298059..86129f015718d 100644
+--- a/drivers/gpu/drm/i915/display/intel_bios.h
++++ b/drivers/gpu/drm/i915/display/intel_bios.h
+@@ -36,6 +36,7 @@ struct drm_i915_private;
+ struct intel_bios_encoder_data;
+ struct intel_crtc_state;
+ struct intel_encoder;
++struct intel_panel;
+ enum port;
+ 
+ enum intel_backlight_type {
+@@ -230,6 +231,9 @@ struct mipi_pps_data {
+ } __packed;
+ 
+ void intel_bios_init(struct drm_i915_private *dev_priv);
++void intel_bios_init_panel(struct drm_i915_private *dev_priv,
++			   struct intel_panel *panel);
++void intel_bios_fini_panel(struct intel_panel *panel);
+ void intel_bios_driver_remove(struct drm_i915_private *dev_priv);
+ bool intel_bios_is_valid_vbt(const void *buf, size_t size);
+ bool intel_bios_is_tv_present(struct drm_i915_private *dev_priv);
+diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
+index 9e6fa59eabba7..333871cf3a2c5 100644
+--- a/drivers/gpu/drm/i915/display/intel_ddi.c
++++ b/drivers/gpu/drm/i915/display/intel_ddi.c
+@@ -3433,26 +3433,8 @@ static void intel_ddi_get_config(struct intel_encoder *encoder,
+ 	pipe_config->has_audio =
+ 		intel_ddi_is_audio_enabled(dev_priv, cpu_transcoder);
+ 
+-	if (encoder->type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.bpp &&
+-	    pipe_config->pipe_bpp > dev_priv->vbt.edp.bpp) {
+-		/*
+-		 * This is a big fat ugly hack.
+-		 *
+-		 * Some machines in UEFI boot mode provide us a VBT that has 18
+-		 * bpp and 1.62 GHz link bandwidth for eDP, which for reasons
+-		 * unknown we fail to light up. Yet the same BIOS boots up with
+-		 * 24 bpp and 2.7 GHz link. Use the same bpp as the BIOS uses as
+-		 * max, not what it tells us to use.
+-		 *
+-		 * Note: This will still be broken if the eDP panel is not lit
+-		 * up by the BIOS, and thus we can't get the mode at module
+-		 * load.
+-		 */
+-		drm_dbg_kms(&dev_priv->drm,
+-			    "pipe has %d bpp for eDP panel, overriding BIOS-provided max %d bpp\n",
+-			    pipe_config->pipe_bpp, dev_priv->vbt.edp.bpp);
+-		dev_priv->vbt.edp.bpp = pipe_config->pipe_bpp;
+-	}
++	if (encoder->type == INTEL_OUTPUT_EDP)
++		intel_edp_fixup_vbt_bpp(encoder, pipe_config->pipe_bpp);
+ 
+ 	ddi_dotclock_get(pipe_config);
+ 
+diff --git a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c
+index 85f58dd3df722..b490acd0ab691 100644
+--- a/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c
++++ b/drivers/gpu/drm/i915/display/intel_ddi_buf_trans.c
+@@ -1062,17 +1062,18 @@ bool is_hobl_buf_trans(const struct intel_ddi_buf_trans *table)
+ 
+ static bool use_edp_hobl(struct intel_encoder *encoder)
+ {
+-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+ 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
++	struct intel_connector *connector = intel_dp->attached_connector;
+ 
+-	return i915->vbt.edp.hobl && !intel_dp->hobl_failed;
++	return connector->panel.vbt.edp.hobl && !intel_dp->hobl_failed;
+ }
+ 
+ static bool use_edp_low_vswing(struct intel_encoder *encoder)
+ {
+-	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
++	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
++	struct intel_connector *connector = intel_dp->attached_connector;
+ 
+-	return i915->vbt.edp.low_vswing;
++	return connector->panel.vbt.edp.low_vswing;
+ }
+ 
+ static const struct intel_ddi_buf_trans *
+diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
+index 408152f9f46a4..e2561c5d4953c 100644
+--- a/drivers/gpu/drm/i915/display/intel_display_types.h
++++ b/drivers/gpu/drm/i915/display/intel_display_types.h
+@@ -279,6 +279,73 @@ struct intel_panel_bl_funcs {
+ 	u32 (*hz_to_pwm)(struct intel_connector *connector, u32 hz);
+ };
+ 
++enum drrs_type {
++	DRRS_TYPE_NONE,
++	DRRS_TYPE_STATIC,
++	DRRS_TYPE_SEAMLESS,
++};
++
++struct intel_vbt_panel_data {
++	struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */
++	struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */
++
++	/* Feature bits */
++	unsigned int panel_type:4;
++	unsigned int lvds_dither:1;
++	unsigned int bios_lvds_val; /* initial [PCH_]LVDS reg val in VBIOS */
++
++	u8 seamless_drrs_min_refresh_rate;
++	enum drrs_type drrs_type;
++
++	struct {
++		int rate;
++		int lanes;
++		int preemphasis;
++		int vswing;
++		int bpp;
++		struct edp_power_seq pps;
++		u8 drrs_msa_timing_delay;
++		bool low_vswing;
++		bool initialized;
++		bool hobl;
++	} edp;
++
++	struct {
++		bool enable;
++		bool full_link;
++		bool require_aux_wakeup;
++		int idle_frames;
++		int tp1_wakeup_time_us;
++		int tp2_tp3_wakeup_time_us;
++		int psr2_tp2_tp3_wakeup_time_us;
++	} psr;
++
++	struct {
++		u16 pwm_freq_hz;
++		u16 brightness_precision_bits;
++		bool present;
++		bool active_low_pwm;
++		u8 min_brightness;	/* min_brightness/255 of max */
++		u8 controller;		/* brightness controller number */
++		enum intel_backlight_type type;
++	} backlight;
++
++	/* MIPI DSI */
++	struct {
++		u16 panel_id;
++		struct mipi_config *config;
++		struct mipi_pps_data *pps;
++		u16 bl_ports;
++		u16 cabc_ports;
++		u8 seq_version;
++		u32 size;
++		u8 *data;
++		const u8 *sequence[MIPI_SEQ_MAX];
++		u8 *deassert_seq; /* Used by fixup_mipi_sequences() */
++		enum drm_panel_orientation orientation;
++	} dsi;
++};
++
+ struct intel_panel {
+ 	struct list_head fixed_modes;
+ 
+@@ -318,6 +385,8 @@ struct intel_panel {
+ 		const struct intel_panel_bl_funcs *pwm_funcs;
+ 		void (*power)(struct intel_connector *, bool enable);
+ 	} backlight;
++
++	struct intel_vbt_panel_data vbt;
+ };
+ 
+ struct intel_digital_port;
+diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
+index fe8b6b72970a2..0efec6023fbe8 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp.c
++++ b/drivers/gpu/drm/i915/display/intel_dp.c
+@@ -1246,11 +1246,12 @@ static int intel_dp_max_bpp(struct intel_dp *intel_dp,
+ 	if (intel_dp_is_edp(intel_dp)) {
+ 		/* Get bpp from vbt only for panels that dont have bpp in edid */
+ 		if (intel_connector->base.display_info.bpc == 0 &&
+-		    dev_priv->vbt.edp.bpp && dev_priv->vbt.edp.bpp < bpp) {
++		    intel_connector->panel.vbt.edp.bpp &&
++		    intel_connector->panel.vbt.edp.bpp < bpp) {
+ 			drm_dbg_kms(&dev_priv->drm,
+ 				    "clamping bpp for eDP panel to BIOS-provided %i\n",
+-				    dev_priv->vbt.edp.bpp);
+-			bpp = dev_priv->vbt.edp.bpp;
++				    intel_connector->panel.vbt.edp.bpp);
++			bpp = intel_connector->panel.vbt.edp.bpp;
+ 		}
+ 	}
+ 
+@@ -1907,7 +1908,7 @@ intel_dp_drrs_compute_config(struct intel_connector *connector,
+ 	}
+ 
+ 	if (IS_IRONLAKE(i915) || IS_SANDYBRIDGE(i915) || IS_IVYBRIDGE(i915))
+-		pipe_config->msa_timing_delay = i915->vbt.edp.drrs_msa_timing_delay;
++		pipe_config->msa_timing_delay = connector->panel.vbt.edp.drrs_msa_timing_delay;
+ 
+ 	pipe_config->has_drrs = true;
+ 
+@@ -2737,6 +2738,33 @@ static void intel_edp_mso_mode_fixup(struct intel_connector *connector,
+ 		    DRM_MODE_ARG(mode));
+ }
+ 
++void intel_edp_fixup_vbt_bpp(struct intel_encoder *encoder, int pipe_bpp)
++{
++	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
++	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
++	struct intel_connector *connector = intel_dp->attached_connector;
++
++	if (connector->panel.vbt.edp.bpp && pipe_bpp > connector->panel.vbt.edp.bpp) {
++		/*
++		 * This is a big fat ugly hack.
++		 *
++		 * Some machines in UEFI boot mode provide us a VBT that has 18
++		 * bpp and 1.62 GHz link bandwidth for eDP, which for reasons
++		 * unknown we fail to light up. Yet the same BIOS boots up with
++		 * 24 bpp and 2.7 GHz link. Use the same bpp as the BIOS uses as
++		 * max, not what it tells us to use.
++		 *
++		 * Note: This will still be broken if the eDP panel is not lit
++		 * up by the BIOS, and thus we can't get the mode at module
++		 * load.
++		 */
++		drm_dbg_kms(&dev_priv->drm,
++			    "pipe has %d bpp for eDP panel, overriding BIOS-provided max %d bpp\n",
++			    pipe_bpp, connector->panel.vbt.edp.bpp);
++		connector->panel.vbt.edp.bpp = pipe_bpp;
++	}
++}
++
+ static void intel_edp_mso_init(struct intel_dp *intel_dp)
+ {
+ 	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+@@ -5212,8 +5240,10 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
+ 	}
+ 	intel_connector->edid = edid;
+ 
++	intel_bios_init_panel(dev_priv, &intel_connector->panel);
++
+ 	intel_panel_add_edid_fixed_modes(intel_connector,
+-					 dev_priv->vbt.drrs_type != DRRS_TYPE_NONE);
++					 intel_connector->panel.vbt.drrs_type != DRRS_TYPE_NONE);
+ 
+ 	/* MSO requires information from the EDID */
+ 	intel_edp_mso_init(intel_dp);
+diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h
+index d457e17bdc57e..a54902c713a34 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp.h
++++ b/drivers/gpu/drm/i915/display/intel_dp.h
+@@ -29,6 +29,7 @@ struct link_config_limits {
+ 	int min_bpp, max_bpp;
+ };
+ 
++void intel_edp_fixup_vbt_bpp(struct intel_encoder *encoder, int pipe_bpp);
+ void intel_dp_adjust_compliance_config(struct intel_dp *intel_dp,
+ 				       struct intel_crtc_state *pipe_config,
+ 				       struct link_config_limits *limits);
+@@ -63,6 +64,7 @@ enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *dig_port,
+ void intel_edp_backlight_on(const struct intel_crtc_state *crtc_state,
+ 			    const struct drm_connector_state *conn_state);
+ void intel_edp_backlight_off(const struct drm_connector_state *conn_state);
++void intel_edp_fixup_vbt_bpp(struct intel_encoder *encoder, int pipe_bpp);
+ void intel_dp_mst_suspend(struct drm_i915_private *dev_priv);
+ void intel_dp_mst_resume(struct drm_i915_private *dev_priv);
+ int intel_dp_max_link_rate(struct intel_dp *intel_dp);
+diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+index fb6cf30ee6281..c92d5bb2326a3 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
++++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
+@@ -370,7 +370,7 @@ static int intel_dp_aux_vesa_setup_backlight(struct intel_connector *connector,
+ 	int ret;
+ 
+ 	ret = drm_edp_backlight_init(&intel_dp->aux, &panel->backlight.edp.vesa.info,
+-				     i915->vbt.backlight.pwm_freq_hz, intel_dp->edp_dpcd,
++				     panel->vbt.backlight.pwm_freq_hz, intel_dp->edp_dpcd,
+ 				     &current_level, &current_mode);
+ 	if (ret < 0)
+ 		return ret;
+@@ -454,7 +454,7 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector)
+ 	case INTEL_DP_AUX_BACKLIGHT_OFF:
+ 		return -ENODEV;
+ 	case INTEL_DP_AUX_BACKLIGHT_AUTO:
+-		switch (i915->vbt.backlight.type) {
++		switch (panel->vbt.backlight.type) {
+ 		case INTEL_BACKLIGHT_VESA_EDP_AUX_INTERFACE:
+ 			try_vesa_interface = true;
+ 			break;
+@@ -466,7 +466,7 @@ int intel_dp_aux_init_backlight_funcs(struct intel_connector *connector)
+ 		}
+ 		break;
+ 	case INTEL_DP_AUX_BACKLIGHT_ON:
+-		if (i915->vbt.backlight.type != INTEL_BACKLIGHT_VESA_EDP_AUX_INTERFACE)
++		if (panel->vbt.backlight.type != INTEL_BACKLIGHT_VESA_EDP_AUX_INTERFACE)
+ 			try_intel_interface = true;
+ 
+ 		try_vesa_interface = true;
+diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c
+index 166caf293f7bc..7da4a9cbe4ba4 100644
+--- a/drivers/gpu/drm/i915/display/intel_drrs.c
++++ b/drivers/gpu/drm/i915/display/intel_drrs.c
+@@ -217,9 +217,6 @@ static void intel_drrs_frontbuffer_update(struct drm_i915_private *dev_priv,
+ {
+ 	struct intel_crtc *crtc;
+ 
+-	if (dev_priv->vbt.drrs_type != DRRS_TYPE_SEAMLESS)
+-		return;
+-
+ 	for_each_intel_crtc(&dev_priv->drm, crtc) {
+ 		unsigned int frontbuffer_bits;
+ 
+diff --git a/drivers/gpu/drm/i915/display/intel_dsi.c b/drivers/gpu/drm/i915/display/intel_dsi.c
+index 389a8c24cdc1e..35e121cd226c5 100644
+--- a/drivers/gpu/drm/i915/display/intel_dsi.c
++++ b/drivers/gpu/drm/i915/display/intel_dsi.c
+@@ -102,7 +102,7 @@ intel_dsi_get_panel_orientation(struct intel_connector *connector)
+ 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+ 	enum drm_panel_orientation orientation;
+ 
+-	orientation = dev_priv->vbt.dsi.orientation;
++	orientation = connector->panel.vbt.dsi.orientation;
+ 	if (orientation != DRM_MODE_PANEL_ORIENTATION_UNKNOWN)
+ 		return orientation;
+ 
+diff --git a/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c
+index 7d234429e71ef..1bc7118c56a2a 100644
+--- a/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c
++++ b/drivers/gpu/drm/i915/display/intel_dsi_dcs_backlight.c
+@@ -160,12 +160,10 @@ static void dcs_enable_backlight(const struct intel_crtc_state *crtc_state,
+ static int dcs_setup_backlight(struct intel_connector *connector,
+ 			       enum pipe unused)
+ {
+-	struct drm_device *dev = connector->base.dev;
+-	struct drm_i915_private *dev_priv = to_i915(dev);
+ 	struct intel_panel *panel = &connector->panel;
+ 
+-	if (dev_priv->vbt.backlight.brightness_precision_bits > 8)
+-		panel->backlight.max = (1 << dev_priv->vbt.backlight.brightness_precision_bits) - 1;
++	if (panel->vbt.backlight.brightness_precision_bits > 8)
++		panel->backlight.max = (1 << panel->vbt.backlight.brightness_precision_bits) - 1;
+ 	else
+ 		panel->backlight.max = PANEL_PWM_MAX_VALUE;
+ 
+@@ -185,11 +183,10 @@ static const struct intel_panel_bl_funcs dcs_bl_funcs = {
+ int intel_dsi_dcs_init_backlight_funcs(struct intel_connector *intel_connector)
+ {
+ 	struct drm_device *dev = intel_connector->base.dev;
+-	struct drm_i915_private *dev_priv = to_i915(dev);
+ 	struct intel_encoder *encoder = intel_attached_encoder(intel_connector);
+ 	struct intel_panel *panel = &intel_connector->panel;
+ 
+-	if (dev_priv->vbt.backlight.type != INTEL_BACKLIGHT_DSI_DCS)
++	if (panel->vbt.backlight.type != INTEL_BACKLIGHT_DSI_DCS)
+ 		return -ENODEV;
+ 
+ 	if (drm_WARN_ON(dev, encoder->type != INTEL_OUTPUT_DSI))
+diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+index dd24aef925f2e..75e8cc4337c93 100644
+--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
++++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+@@ -240,9 +240,10 @@ static const u8 *mipi_exec_delay(struct intel_dsi *intel_dsi, const u8 *data)
+ 	return data;
+ }
+ 
+-static void vlv_exec_gpio(struct drm_i915_private *dev_priv,
++static void vlv_exec_gpio(struct intel_connector *connector,
+ 			  u8 gpio_source, u8 gpio_index, bool value)
+ {
++	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+ 	struct gpio_map *map;
+ 	u16 pconf0, padval;
+ 	u32 tmp;
+@@ -256,7 +257,7 @@ static void vlv_exec_gpio(struct drm_i915_private *dev_priv,
+ 
+ 	map = &vlv_gpio_table[gpio_index];
+ 
+-	if (dev_priv->vbt.dsi.seq_version >= 3) {
++	if (connector->panel.vbt.dsi.seq_version >= 3) {
+ 		/* XXX: this assumes vlv_gpio_table only has NC GPIOs. */
+ 		port = IOSF_PORT_GPIO_NC;
+ 	} else {
+@@ -287,14 +288,15 @@ static void vlv_exec_gpio(struct drm_i915_private *dev_priv,
+ 	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO));
+ }
+ 
+-static void chv_exec_gpio(struct drm_i915_private *dev_priv,
++static void chv_exec_gpio(struct intel_connector *connector,
+ 			  u8 gpio_source, u8 gpio_index, bool value)
+ {
++	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+ 	u16 cfg0, cfg1;
+ 	u16 family_num;
+ 	u8 port;
+ 
+-	if (dev_priv->vbt.dsi.seq_version >= 3) {
++	if (connector->panel.vbt.dsi.seq_version >= 3) {
+ 		if (gpio_index >= CHV_GPIO_IDX_START_SE) {
+ 			/* XXX: it's unclear whether 255->57 is part of SE. */
+ 			gpio_index -= CHV_GPIO_IDX_START_SE;
+@@ -340,9 +342,10 @@ static void chv_exec_gpio(struct drm_i915_private *dev_priv,
+ 	vlv_iosf_sb_put(dev_priv, BIT(VLV_IOSF_SB_GPIO));
+ }
+ 
+-static void bxt_exec_gpio(struct drm_i915_private *dev_priv,
++static void bxt_exec_gpio(struct intel_connector *connector,
+ 			  u8 gpio_source, u8 gpio_index, bool value)
+ {
++	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
+ 	/* XXX: this table is a quick ugly hack. */
+ 	static struct gpio_desc *bxt_gpio_table[U8_MAX + 1];
+ 	struct gpio_desc *gpio_desc = bxt_gpio_table[gpio_index];
+@@ -366,9 +369,11 @@ static void bxt_exec_gpio(struct drm_i915_private *dev_priv,
+ 	gpiod_set_value(gpio_desc, value);
+ }
+ 
+-static void icl_exec_gpio(struct drm_i915_private *dev_priv,
++static void icl_exec_gpio(struct intel_connector *connector,
+ 			  u8 gpio_source, u8 gpio_index, bool value)
+ {
++	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
++
+ 	drm_dbg_kms(&dev_priv->drm, "Skipping ICL GPIO element execution\n");
+ }
+ 
+@@ -376,18 +381,19 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
+ {
+ 	struct drm_device *dev = intel_dsi->base.base.dev;
+ 	struct drm_i915_private *dev_priv = to_i915(dev);
++	struct intel_connector *connector = intel_dsi->attached_connector;
+ 	u8 gpio_source, gpio_index = 0, gpio_number;
+ 	bool value;
+ 
+ 	drm_dbg_kms(&dev_priv->drm, "\n");
+ 
+-	if (dev_priv->vbt.dsi.seq_version >= 3)
++	if (connector->panel.vbt.dsi.seq_version >= 3)
+ 		gpio_index = *data++;
+ 
+ 	gpio_number = *data++;
+ 
+ 	/* gpio source in sequence v2 only */
+-	if (dev_priv->vbt.dsi.seq_version == 2)
++	if (connector->panel.vbt.dsi.seq_version == 2)
+ 		gpio_source = (*data >> 1) & 3;
+ 	else
+ 		gpio_source = 0;
+@@ -396,13 +402,13 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
+ 	value = *data++ & 1;
+ 
+ 	if (DISPLAY_VER(dev_priv) >= 11)
+-		icl_exec_gpio(dev_priv, gpio_source, gpio_index, value);
++		icl_exec_gpio(connector, gpio_source, gpio_index, value);
+ 	else if (IS_VALLEYVIEW(dev_priv))
+-		vlv_exec_gpio(dev_priv, gpio_source, gpio_number, value);
++		vlv_exec_gpio(connector, gpio_source, gpio_number, value);
+ 	else if (IS_CHERRYVIEW(dev_priv))
+-		chv_exec_gpio(dev_priv, gpio_source, gpio_number, value);
++		chv_exec_gpio(connector, gpio_source, gpio_number, value);
+ 	else
+-		bxt_exec_gpio(dev_priv, gpio_source, gpio_index, value);
++		bxt_exec_gpio(connector, gpio_source, gpio_index, value);
+ 
+ 	return data;
+ }
+@@ -585,14 +591,15 @@ static void intel_dsi_vbt_exec(struct intel_dsi *intel_dsi,
+ 			       enum mipi_seq seq_id)
+ {
+ 	struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev);
++	struct intel_connector *connector = intel_dsi->attached_connector;
+ 	const u8 *data;
+ 	fn_mipi_elem_exec mipi_elem_exec;
+ 
+ 	if (drm_WARN_ON(&dev_priv->drm,
+-			seq_id >= ARRAY_SIZE(dev_priv->vbt.dsi.sequence)))
++			seq_id >= ARRAY_SIZE(connector->panel.vbt.dsi.sequence)))
+ 		return;
+ 
+-	data = dev_priv->vbt.dsi.sequence[seq_id];
++	data = connector->panel.vbt.dsi.sequence[seq_id];
+ 	if (!data)
+ 		return;
+ 
+@@ -605,7 +612,7 @@ static void intel_dsi_vbt_exec(struct intel_dsi *intel_dsi,
+ 	data++;
+ 
+ 	/* Skip Size of Sequence. */
+-	if (dev_priv->vbt.dsi.seq_version >= 3)
++	if (connector->panel.vbt.dsi.seq_version >= 3)
+ 		data += 4;
+ 
+ 	while (1) {
+@@ -621,7 +628,7 @@ static void intel_dsi_vbt_exec(struct intel_dsi *intel_dsi,
+ 			mipi_elem_exec = NULL;
+ 
+ 		/* Size of Operation. */
+-		if (dev_priv->vbt.dsi.seq_version >= 3)
++		if (connector->panel.vbt.dsi.seq_version >= 3)
+ 			operation_size = *data++;
+ 
+ 		if (mipi_elem_exec) {
+@@ -669,10 +676,10 @@ void intel_dsi_vbt_exec_sequence(struct intel_dsi *intel_dsi,
+ 
+ void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec)
+ {
+-	struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev);
++	struct intel_connector *connector = intel_dsi->attached_connector;
+ 
+ 	/* For v3 VBTs in vid-mode the delays are part of the VBT sequences */
+-	if (is_vid_mode(intel_dsi) && dev_priv->vbt.dsi.seq_version >= 3)
++	if (is_vid_mode(intel_dsi) && connector->panel.vbt.dsi.seq_version >= 3)
+ 		return;
+ 
+ 	msleep(msec);
+@@ -734,9 +741,10 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id)
+ {
+ 	struct drm_device *dev = intel_dsi->base.base.dev;
+ 	struct drm_i915_private *dev_priv = to_i915(dev);
+-	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
+-	struct mipi_pps_data *pps = dev_priv->vbt.dsi.pps;
+-	struct drm_display_mode *mode = dev_priv->vbt.lfp_lvds_vbt_mode;
++	struct intel_connector *connector = intel_dsi->attached_connector;
++	struct mipi_config *mipi_config = connector->panel.vbt.dsi.config;
++	struct mipi_pps_data *pps = connector->panel.vbt.dsi.pps;
++	struct drm_display_mode *mode = connector->panel.vbt.lfp_lvds_vbt_mode;
+ 	u16 burst_mode_ratio;
+ 	enum port port;
+ 
+@@ -872,7 +880,8 @@ void intel_dsi_vbt_gpio_init(struct intel_dsi *intel_dsi, bool panel_is_on)
+ {
+ 	struct drm_device *dev = intel_dsi->base.base.dev;
+ 	struct drm_i915_private *dev_priv = to_i915(dev);
+-	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
++	struct intel_connector *connector = intel_dsi->attached_connector;
++	struct mipi_config *mipi_config = connector->panel.vbt.dsi.config;
+ 	enum gpiod_flags flags = panel_is_on ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
+ 	bool want_backlight_gpio = false;
+ 	bool want_panel_gpio = false;
+@@ -927,7 +936,8 @@ void intel_dsi_vbt_gpio_cleanup(struct intel_dsi *intel_dsi)
+ {
+ 	struct drm_device *dev = intel_dsi->base.base.dev;
+ 	struct drm_i915_private *dev_priv = to_i915(dev);
+-	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
++	struct intel_connector *connector = intel_dsi->attached_connector;
++	struct mipi_config *mipi_config = connector->panel.vbt.dsi.config;
+ 
+ 	if (intel_dsi->gpio_panel) {
+ 		gpiod_put(intel_dsi->gpio_panel);
+diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c
+index e8478161f8b9b..9f250a70519aa 100644
+--- a/drivers/gpu/drm/i915/display/intel_lvds.c
++++ b/drivers/gpu/drm/i915/display/intel_lvds.c
+@@ -809,7 +809,7 @@ static bool compute_is_dual_link_lvds(struct intel_lvds_encoder *lvds_encoder)
+ 	else
+ 		val &= ~(LVDS_DETECTED | LVDS_PIPE_SEL_MASK);
+ 	if (val == 0)
+-		val = dev_priv->vbt.bios_lvds_val;
++		val = connector->panel.vbt.bios_lvds_val;
+ 
+ 	return (val & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP;
+ }
+@@ -967,9 +967,11 @@ void intel_lvds_init(struct drm_i915_private *dev_priv)
+ 	}
+ 	intel_connector->edid = edid;
+ 
++	intel_bios_init_panel(dev_priv, &intel_connector->panel);
++
+ 	/* Try EDID first */
+ 	intel_panel_add_edid_fixed_modes(intel_connector,
+-					 dev_priv->vbt.drrs_type != DRRS_TYPE_NONE);
++					 intel_connector->panel.vbt.drrs_type != DRRS_TYPE_NONE);
+ 
+ 	/* Failed to get EDID, what about VBT? */
+ 	if (!intel_panel_preferred_fixed_mode(intel_connector))
+diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c
+index d1d1b59102d69..d055e41185582 100644
+--- a/drivers/gpu/drm/i915/display/intel_panel.c
++++ b/drivers/gpu/drm/i915/display/intel_panel.c
+@@ -75,9 +75,8 @@ const struct drm_display_mode *
+ intel_panel_downclock_mode(struct intel_connector *connector,
+ 			   const struct drm_display_mode *adjusted_mode)
+ {
+-	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+ 	const struct drm_display_mode *fixed_mode, *best_mode = NULL;
+-	int min_vrefresh = i915->vbt.seamless_drrs_min_refresh_rate;
++	int min_vrefresh = connector->panel.vbt.seamless_drrs_min_refresh_rate;
+ 	int max_vrefresh = drm_mode_vrefresh(adjusted_mode);
+ 
+ 	/* pick the fixed_mode with the lowest refresh rate */
+@@ -113,13 +112,11 @@ int intel_panel_get_modes(struct intel_connector *connector)
+ 
+ enum drrs_type intel_panel_drrs_type(struct intel_connector *connector)
+ {
+-	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+-
+ 	if (list_empty(&connector->panel.fixed_modes) ||
+ 	    list_is_singular(&connector->panel.fixed_modes))
+ 		return DRRS_TYPE_NONE;
+ 
+-	return i915->vbt.drrs_type;
++	return connector->panel.vbt.drrs_type;
+ }
+ 
+ int intel_panel_compute_config(struct intel_connector *connector,
+@@ -260,7 +257,7 @@ void intel_panel_add_vbt_lfp_fixed_mode(struct intel_connector *connector)
+ 	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+ 	const struct drm_display_mode *mode;
+ 
+-	mode = i915->vbt.lfp_lvds_vbt_mode;
++	mode = connector->panel.vbt.lfp_lvds_vbt_mode;
+ 	if (!mode)
+ 		return;
+ 
+@@ -274,7 +271,7 @@ void intel_panel_add_vbt_sdvo_fixed_mode(struct intel_connector *connector)
+ 	struct drm_i915_private *i915 = to_i915(connector->base.dev);
+ 	const struct drm_display_mode *mode;
+ 
+-	mode = i915->vbt.sdvo_lvds_vbt_mode;
++	mode = connector->panel.vbt.sdvo_lvds_vbt_mode;
+ 	if (!mode)
+ 		return;
+ 
+@@ -639,6 +636,8 @@ void intel_panel_fini(struct intel_connector *connector)
+ 
+ 	intel_backlight_destroy(panel);
+ 
++	intel_bios_fini_panel(panel);
++
+ 	list_for_each_entry_safe(fixed_mode, next, &panel->fixed_modes, head) {
+ 		list_del(&fixed_mode->head);
+ 		drm_mode_destroy(connector->base.dev, fixed_mode);
+diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c
+index 5a598dd060391..a226e4e5c5698 100644
+--- a/drivers/gpu/drm/i915/display/intel_pps.c
++++ b/drivers/gpu/drm/i915/display/intel_pps.c
+@@ -209,7 +209,8 @@ static int
+ bxt_power_sequencer_idx(struct intel_dp *intel_dp)
+ {
+ 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+-	int backlight_controller = dev_priv->vbt.backlight.controller;
++	struct intel_connector *connector = intel_dp->attached_connector;
++	int backlight_controller = connector->panel.vbt.backlight.controller;
+ 
+ 	lockdep_assert_held(&dev_priv->pps_mutex);
+ 
+@@ -1159,53 +1160,84 @@ intel_pps_verify_state(struct intel_dp *intel_dp)
+ 	}
+ }
+ 
+-static void pps_init_delays(struct intel_dp *intel_dp)
++static void pps_init_delays_cur(struct intel_dp *intel_dp,
++				struct edp_power_seq *cur)
+ {
+ 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+-	struct edp_power_seq cur, vbt, spec,
+-		*final = &intel_dp->pps.pps_delays;
+ 
+ 	lockdep_assert_held(&dev_priv->pps_mutex);
+ 
+-	/* already initialized? */
+-	if (final->t11_t12 != 0)
+-		return;
++	intel_pps_readout_hw_state(intel_dp, cur);
++
++	intel_pps_dump_state(intel_dp, "cur", cur);
++}
+ 
+-	intel_pps_readout_hw_state(intel_dp, &cur);
++static void pps_init_delays_vbt(struct intel_dp *intel_dp,
++				struct edp_power_seq *vbt)
++{
++	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
++	struct intel_connector *connector = intel_dp->attached_connector;
+ 
+-	intel_pps_dump_state(intel_dp, "cur", &cur);
++	*vbt = connector->panel.vbt.edp.pps;
+ 
+-	vbt = dev_priv->vbt.edp.pps;
+ 	/* On Toshiba Satellite P50-C-18C system the VBT T12 delay
+ 	 * of 500ms appears to be too short. Ocassionally the panel
+ 	 * just fails to power back on. Increasing the delay to 800ms
+ 	 * seems sufficient to avoid this problem.
+ 	 */
+ 	if (dev_priv->quirks & QUIRK_INCREASE_T12_DELAY) {
+-		vbt.t11_t12 = max_t(u16, vbt.t11_t12, 1300 * 10);
++		vbt->t11_t12 = max_t(u16, vbt->t11_t12, 1300 * 10);
+ 		drm_dbg_kms(&dev_priv->drm,
+ 			    "Increasing T12 panel delay as per the quirk to %d\n",
+-			    vbt.t11_t12);
++			    vbt->t11_t12);
+ 	}
++
+ 	/* T11_T12 delay is special and actually in units of 100ms, but zero
+ 	 * based in the hw (so we need to add 100 ms). But the sw vbt
+ 	 * table multiplies it with 1000 to make it in units of 100usec,
+ 	 * too. */
+-	vbt.t11_t12 += 100 * 10;
++	vbt->t11_t12 += 100 * 10;
++
++	intel_pps_dump_state(intel_dp, "vbt", vbt);
++}
++
++static void pps_init_delays_spec(struct intel_dp *intel_dp,
++				 struct edp_power_seq *spec)
++{
++	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
++
++	lockdep_assert_held(&dev_priv->pps_mutex);
+ 
+ 	/* Upper limits from eDP 1.3 spec. Note that we use the clunky units of
+ 	 * our hw here, which are all in 100usec. */
+-	spec.t1_t3 = 210 * 10;
+-	spec.t8 = 50 * 10; /* no limit for t8, use t7 instead */
+-	spec.t9 = 50 * 10; /* no limit for t9, make it symmetric with t8 */
+-	spec.t10 = 500 * 10;
++	spec->t1_t3 = 210 * 10;
++	spec->t8 = 50 * 10; /* no limit for t8, use t7 instead */
++	spec->t9 = 50 * 10; /* no limit for t9, make it symmetric with t8 */
++	spec->t10 = 500 * 10;
+ 	/* This one is special and actually in units of 100ms, but zero
+ 	 * based in the hw (so we need to add 100 ms). But the sw vbt
+ 	 * table multiplies it with 1000 to make it in units of 100usec,
+ 	 * too. */
+-	spec.t11_t12 = (510 + 100) * 10;
++	spec->t11_t12 = (510 + 100) * 10;
++
++	intel_pps_dump_state(intel_dp, "spec", spec);
++}
++
++static void pps_init_delays(struct intel_dp *intel_dp)
++{
++	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
++	struct edp_power_seq cur, vbt, spec,
++		*final = &intel_dp->pps.pps_delays;
++
++	lockdep_assert_held(&dev_priv->pps_mutex);
++
++	/* already initialized? */
++	if (final->t11_t12 != 0)
++		return;
+ 
+-	intel_pps_dump_state(intel_dp, "vbt", &vbt);
++	pps_init_delays_cur(intel_dp, &cur);
++	pps_init_delays_vbt(intel_dp, &vbt);
++	pps_init_delays_spec(intel_dp, &spec);
+ 
+ 	/* Use the max of the register settings and vbt. If both are
+ 	 * unset, fall back to the spec limits. */
+diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
+index 06db407e2749f..8f09203e0cf03 100644
+--- a/drivers/gpu/drm/i915/display/intel_psr.c
++++ b/drivers/gpu/drm/i915/display/intel_psr.c
+@@ -86,10 +86,13 @@
+ 
+ static bool psr_global_enabled(struct intel_dp *intel_dp)
+ {
++	struct intel_connector *connector = intel_dp->attached_connector;
+ 	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+ 
+ 	switch (intel_dp->psr.debug & I915_PSR_DEBUG_MODE_MASK) {
+ 	case I915_PSR_DEBUG_DEFAULT:
++		if (i915->params.enable_psr == -1)
++			return connector->panel.vbt.psr.enable;
+ 		return i915->params.enable_psr;
+ 	case I915_PSR_DEBUG_DISABLE:
+ 		return false;
+@@ -399,6 +402,7 @@ static void intel_psr_enable_sink(struct intel_dp *intel_dp)
+ 
+ static u32 intel_psr1_get_tp_time(struct intel_dp *intel_dp)
+ {
++	struct intel_connector *connector = intel_dp->attached_connector;
+ 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+ 	u32 val = 0;
+ 
+@@ -411,20 +415,20 @@ static u32 intel_psr1_get_tp_time(struct intel_dp *intel_dp)
+ 		goto check_tp3_sel;
+ 	}
+ 
+-	if (dev_priv->vbt.psr.tp1_wakeup_time_us == 0)
++	if (connector->panel.vbt.psr.tp1_wakeup_time_us == 0)
+ 		val |= EDP_PSR_TP1_TIME_0us;
+-	else if (dev_priv->vbt.psr.tp1_wakeup_time_us <= 100)
++	else if (connector->panel.vbt.psr.tp1_wakeup_time_us <= 100)
+ 		val |= EDP_PSR_TP1_TIME_100us;
+-	else if (dev_priv->vbt.psr.tp1_wakeup_time_us <= 500)
++	else if (connector->panel.vbt.psr.tp1_wakeup_time_us <= 500)
+ 		val |= EDP_PSR_TP1_TIME_500us;
+ 	else
+ 		val |= EDP_PSR_TP1_TIME_2500us;
+ 
+-	if (dev_priv->vbt.psr.tp2_tp3_wakeup_time_us == 0)
++	if (connector->panel.vbt.psr.tp2_tp3_wakeup_time_us == 0)
+ 		val |= EDP_PSR_TP2_TP3_TIME_0us;
+-	else if (dev_priv->vbt.psr.tp2_tp3_wakeup_time_us <= 100)
++	else if (connector->panel.vbt.psr.tp2_tp3_wakeup_time_us <= 100)
+ 		val |= EDP_PSR_TP2_TP3_TIME_100us;
+-	else if (dev_priv->vbt.psr.tp2_tp3_wakeup_time_us <= 500)
++	else if (connector->panel.vbt.psr.tp2_tp3_wakeup_time_us <= 500)
+ 		val |= EDP_PSR_TP2_TP3_TIME_500us;
+ 	else
+ 		val |= EDP_PSR_TP2_TP3_TIME_2500us;
+@@ -441,13 +445,14 @@ check_tp3_sel:
+ 
+ static u8 psr_compute_idle_frames(struct intel_dp *intel_dp)
+ {
++	struct intel_connector *connector = intel_dp->attached_connector;
+ 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+ 	int idle_frames;
+ 
+ 	/* Let's use 6 as the minimum to cover all known cases including the
+ 	 * off-by-one issue that HW has in some cases.
+ 	 */
+-	idle_frames = max(6, dev_priv->vbt.psr.idle_frames);
++	idle_frames = max(6, connector->panel.vbt.psr.idle_frames);
+ 	idle_frames = max(idle_frames, intel_dp->psr.sink_sync_latency + 1);
+ 
+ 	if (drm_WARN_ON(&dev_priv->drm, idle_frames > 0xf))
+@@ -483,18 +488,19 @@ static void hsw_activate_psr1(struct intel_dp *intel_dp)
+ 
+ static u32 intel_psr2_get_tp_time(struct intel_dp *intel_dp)
+ {
++	struct intel_connector *connector = intel_dp->attached_connector;
+ 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+ 	u32 val = 0;
+ 
+ 	if (dev_priv->params.psr_safest_params)
+ 		return EDP_PSR2_TP2_TIME_2500us;
+ 
+-	if (dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us >= 0 &&
+-	    dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us <= 50)
++	if (connector->panel.vbt.psr.psr2_tp2_tp3_wakeup_time_us >= 0 &&
++	    connector->panel.vbt.psr.psr2_tp2_tp3_wakeup_time_us <= 50)
+ 		val |= EDP_PSR2_TP2_TIME_50us;
+-	else if (dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us <= 100)
++	else if (connector->panel.vbt.psr.psr2_tp2_tp3_wakeup_time_us <= 100)
+ 		val |= EDP_PSR2_TP2_TIME_100us;
+-	else if (dev_priv->vbt.psr.psr2_tp2_tp3_wakeup_time_us <= 500)
++	else if (connector->panel.vbt.psr.psr2_tp2_tp3_wakeup_time_us <= 500)
+ 		val |= EDP_PSR2_TP2_TIME_500us;
+ 	else
+ 		val |= EDP_PSR2_TP2_TIME_2500us;
+@@ -2344,6 +2350,7 @@ unlock:
+  */
+ void intel_psr_init(struct intel_dp *intel_dp)
+ {
++	struct intel_connector *connector = intel_dp->attached_connector;
+ 	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+ 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+ 
+@@ -2367,14 +2374,10 @@ void intel_psr_init(struct intel_dp *intel_dp)
+ 
+ 	intel_dp->psr.source_support = true;
+ 
+-	if (dev_priv->params.enable_psr == -1)
+-		if (!dev_priv->vbt.psr.enable)
+-			dev_priv->params.enable_psr = 0;
+-
+ 	/* Set link_standby x link_off defaults */
+ 	if (DISPLAY_VER(dev_priv) < 12)
+ 		/* For new platforms up to TGL let's respect VBT back again */
+-		intel_dp->psr.link_standby = dev_priv->vbt.psr.full_link;
++		intel_dp->psr.link_standby = connector->panel.vbt.psr.full_link;
+ 
+ 	INIT_WORK(&intel_dp->psr.work, intel_psr_work);
+ 	INIT_DELAYED_WORK(&intel_dp->psr.dc3co_work, tgl_dc3co_disable_work);
+diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
+index d81855d57cdc9..14a64bd61176d 100644
+--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
++++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
+@@ -2869,6 +2869,7 @@ static bool
+ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device)
+ {
+ 	struct drm_encoder *encoder = &intel_sdvo->base.base;
++	struct drm_i915_private *i915 = to_i915(encoder->dev);
+ 	struct drm_connector *connector;
+ 	struct intel_connector *intel_connector;
+ 	struct intel_sdvo_connector *intel_sdvo_connector;
+@@ -2900,6 +2901,8 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device)
+ 	if (!intel_sdvo_create_enhance_property(intel_sdvo, intel_sdvo_connector))
+ 		goto err;
+ 
++	intel_bios_init_panel(i915, &intel_connector->panel);
++
+ 	/*
+ 	 * Fetch modes from VBT. For SDVO prefer the VBT mode since some
+ 	 * SDVO->LVDS transcoders can't cope with the EDID mode.
+diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
+index 1954f07f0d3ec..02f75e95b2ec1 100644
+--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
++++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
+@@ -782,6 +782,7 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state,
+ {
+ 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder);
+ 	struct intel_crtc *crtc = to_intel_crtc(pipe_config->uapi.crtc);
++	struct intel_connector *connector = to_intel_connector(conn_state->connector);
+ 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+ 	enum pipe pipe = crtc->pipe;
+ 	enum port port;
+@@ -838,7 +839,7 @@ static void intel_dsi_pre_enable(struct intel_atomic_state *state,
+ 	 * the delay in that case. If there is no deassert-seq, then an
+ 	 * unconditional msleep is used to give the panel time to power-on.
+ 	 */
+-	if (dev_priv->vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET]) {
++	if (connector->panel.vbt.dsi.sequence[MIPI_SEQ_DEASSERT_RESET]) {
+ 		intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay);
+ 		intel_dsi_vbt_exec_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET);
+ 	} else {
+@@ -1690,7 +1691,8 @@ static void vlv_dphy_param_init(struct intel_dsi *intel_dsi)
+ {
+ 	struct drm_device *dev = intel_dsi->base.base.dev;
+ 	struct drm_i915_private *dev_priv = to_i915(dev);
+-	struct mipi_config *mipi_config = dev_priv->vbt.dsi.config;
++	struct intel_connector *connector = intel_dsi->attached_connector;
++	struct mipi_config *mipi_config = connector->panel.vbt.dsi.config;
+ 	u32 tlpx_ns, extra_byte_count, tlpx_ui;
+ 	u32 ui_num, ui_den;
+ 	u32 prepare_cnt, exit_zero_cnt, clk_zero_cnt, trail_cnt;
+@@ -1924,13 +1926,22 @@ void vlv_dsi_init(struct drm_i915_private *dev_priv)
+ 
+ 	intel_dsi->panel_power_off_time = ktime_get_boottime();
+ 
+-	if (dev_priv->vbt.dsi.config->dual_link)
++	intel_bios_init_panel(dev_priv, &intel_connector->panel);
++
++	if (intel_connector->panel.vbt.dsi.config->dual_link)
+ 		intel_dsi->ports = BIT(PORT_A) | BIT(PORT_C);
+ 	else
+ 		intel_dsi->ports = BIT(port);
+ 
+-	intel_dsi->dcs_backlight_ports = dev_priv->vbt.dsi.bl_ports;
+-	intel_dsi->dcs_cabc_ports = dev_priv->vbt.dsi.cabc_ports;
++	if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.bl_ports & ~intel_dsi->ports))
++		intel_connector->panel.vbt.dsi.bl_ports &= intel_dsi->ports;
++
++	intel_dsi->dcs_backlight_ports = intel_connector->panel.vbt.dsi.bl_ports;
++
++	if (drm_WARN_ON(&dev_priv->drm, intel_connector->panel.vbt.dsi.cabc_ports & ~intel_dsi->ports))
++		intel_connector->panel.vbt.dsi.cabc_ports &= intel_dsi->ports;
++
++	intel_dsi->dcs_cabc_ports = intel_connector->panel.vbt.dsi.cabc_ports;
+ 
+ 	/* Create a DSI host (and a device) for each port. */
+ 	for_each_dsi_port(port, intel_dsi->ports) {
+diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
+index 321af109d484f..8da42af0256ab 100644
+--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
+@@ -1269,6 +1269,10 @@ static void i915_gem_context_release_work(struct work_struct *work)
+ 	trace_i915_context_free(ctx);
+ 	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
+ 
++	spin_lock(&ctx->i915->gem.contexts.lock);
++	list_del(&ctx->link);
++	spin_unlock(&ctx->i915->gem.contexts.lock);
++
+ 	if (ctx->syncobj)
+ 		drm_syncobj_put(ctx->syncobj);
+ 
+@@ -1514,10 +1518,6 @@ static void context_close(struct i915_gem_context *ctx)
+ 
+ 	ctx->file_priv = ERR_PTR(-EBADF);
+ 
+-	spin_lock(&ctx->i915->gem.contexts.lock);
+-	list_del(&ctx->link);
+-	spin_unlock(&ctx->i915->gem.contexts.lock);
+-
+ 	client = ctx->client;
+ 	if (client) {
+ 		spin_lock(&client->ctx_lock);
+diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
+index 5184d70d48382..554d79bc0312d 100644
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -194,12 +194,6 @@ struct drm_i915_display_funcs {
+ 
+ #define I915_COLOR_UNEVICTABLE (-1) /* a non-vma sharing the address space */
+ 
+-enum drrs_type {
+-	DRRS_TYPE_NONE,
+-	DRRS_TYPE_STATIC,
+-	DRRS_TYPE_SEAMLESS,
+-};
+-
+ #define QUIRK_LVDS_SSC_DISABLE (1<<1)
+ #define QUIRK_INVERT_BRIGHTNESS (1<<2)
+ #define QUIRK_BACKLIGHT_PRESENT (1<<3)
+@@ -308,76 +302,19 @@ struct intel_vbt_data {
+ 	/* bdb version */
+ 	u16 version;
+ 
+-	struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */
+-	struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */
+-
+ 	/* Feature bits */
+ 	unsigned int int_tv_support:1;
+-	unsigned int lvds_dither:1;
+ 	unsigned int int_crt_support:1;
+ 	unsigned int lvds_use_ssc:1;
+ 	unsigned int int_lvds_support:1;
+ 	unsigned int display_clock_mode:1;
+ 	unsigned int fdi_rx_polarity_inverted:1;
+-	unsigned int panel_type:4;
+ 	int lvds_ssc_freq;
+-	unsigned int bios_lvds_val; /* initial [PCH_]LVDS reg val in VBIOS */
+ 	enum drm_panel_orientation orientation;
+ 
+ 	bool override_afc_startup;
+ 	u8 override_afc_startup_val;
+ 
+-	u8 seamless_drrs_min_refresh_rate;
+-	enum drrs_type drrs_type;
+-
+-	struct {
+-		int rate;
+-		int lanes;
+-		int preemphasis;
+-		int vswing;
+-		int bpp;
+-		struct edp_power_seq pps;
+-		u8 drrs_msa_timing_delay;
+-		bool low_vswing;
+-		bool initialized;
+-		bool hobl;
+-	} edp;
+-
+-	struct {
+-		bool enable;
+-		bool full_link;
+-		bool require_aux_wakeup;
+-		int idle_frames;
+-		int tp1_wakeup_time_us;
+-		int tp2_tp3_wakeup_time_us;
+-		int psr2_tp2_tp3_wakeup_time_us;
+-	} psr;
+-
+-	struct {
+-		u16 pwm_freq_hz;
+-		u16 brightness_precision_bits;
+-		bool present;
+-		bool active_low_pwm;
+-		u8 min_brightness;	/* min_brightness/255 of max */
+-		u8 controller;		/* brightness controller number */
+-		enum intel_backlight_type type;
+-	} backlight;
+-
+-	/* MIPI DSI */
+-	struct {
+-		u16 panel_id;
+-		struct mipi_config *config;
+-		struct mipi_pps_data *pps;
+-		u16 bl_ports;
+-		u16 cabc_ports;
+-		u8 seq_version;
+-		u32 size;
+-		u8 *data;
+-		const u8 *sequence[MIPI_SEQ_MAX];
+-		u8 *deassert_seq; /* Used by fixup_mipi_sequences() */
+-		enum drm_panel_orientation orientation;
+-	} dsi;
+-
+ 	int crt_ddc_pin;
+ 
+ 	struct list_head display_devices;
+diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
+index 702e5b89be226..b605d0ceaefad 100644
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -1191,7 +1191,8 @@ void i915_gem_driver_release(struct drm_i915_private *dev_priv)
+ 
+ 	intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc);
+ 
+-	i915_gem_drain_freed_objects(dev_priv);
++	/* Flush any outstanding work, including i915_gem_context.release_work. */
++	i915_gem_drain_workqueue(dev_priv);
+ 
+ 	drm_WARN_ON(&dev_priv->drm, !list_empty(&dev_priv->gem.contexts.list));
+ }
+diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
+index 5d7504a72b11c..e244aa408d9d4 100644
+--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
++++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
+@@ -151,7 +151,7 @@ static void mtk_dither_config(struct device *dev, unsigned int w,
+ {
+ 	struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev);
+ 
+-	mtk_ddp_write(cmdq_pkt, h << 16 | w, &priv->cmdq_reg, priv->regs, DISP_REG_DITHER_SIZE);
++	mtk_ddp_write(cmdq_pkt, w << 16 | h, &priv->cmdq_reg, priv->regs, DISP_REG_DITHER_SIZE);
+ 	mtk_ddp_write(cmdq_pkt, DITHER_RELAY_MODE, &priv->cmdq_reg, priv->regs,
+ 		      DISP_REG_DITHER_CFG);
+ 	mtk_dither_set_common(priv->regs, &priv->cmdq_reg, bpc, DISP_REG_DITHER_CFG,
+diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
+index af2f123e9a9a9..9a3b86c29b503 100644
+--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
++++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
+@@ -685,6 +685,16 @@ static void mtk_dsi_poweroff(struct mtk_dsi *dsi)
+ 	if (--dsi->refcount != 0)
+ 		return;
+ 
++	/*
++	 * mtk_dsi_stop() and mtk_dsi_start() is asymmetric, since
++	 * mtk_dsi_stop() should be called after mtk_drm_crtc_atomic_disable(),
++	 * which needs irq for vblank, and mtk_dsi_stop() will disable irq.
++	 * mtk_dsi_start() needs to be called in mtk_output_dsi_enable(),
++	 * after dsi is fully set.
++	 */
++	mtk_dsi_stop(dsi);
++
++	mtk_dsi_switch_to_cmd_mode(dsi, VM_DONE_INT_FLAG, 500);
+ 	mtk_dsi_reset_engine(dsi);
+ 	mtk_dsi_lane0_ulp_mode_enter(dsi);
+ 	mtk_dsi_clk_ulp_mode_enter(dsi);
+@@ -735,17 +745,6 @@ static void mtk_output_dsi_disable(struct mtk_dsi *dsi)
+ 	if (!dsi->enabled)
+ 		return;
+ 
+-	/*
+-	 * mtk_dsi_stop() and mtk_dsi_start() is asymmetric, since
+-	 * mtk_dsi_stop() should be called after mtk_drm_crtc_atomic_disable(),
+-	 * which needs irq for vblank, and mtk_dsi_stop() will disable irq.
+-	 * mtk_dsi_start() needs to be called in mtk_output_dsi_enable(),
+-	 * after dsi is fully set.
+-	 */
+-	mtk_dsi_stop(dsi);
+-
+-	mtk_dsi_switch_to_cmd_mode(dsi, VM_DONE_INT_FLAG, 500);
+-
+ 	dsi->enabled = false;
+ }
+ 
+@@ -808,10 +807,13 @@ static void mtk_dsi_bridge_atomic_post_disable(struct drm_bridge *bridge,
+ 
+ static const struct drm_bridge_funcs mtk_dsi_bridge_funcs = {
+ 	.attach = mtk_dsi_bridge_attach,
++	.atomic_destroy_state = drm_atomic_helper_bridge_destroy_state,
+ 	.atomic_disable = mtk_dsi_bridge_atomic_disable,
++	.atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
+ 	.atomic_enable = mtk_dsi_bridge_atomic_enable,
+ 	.atomic_pre_enable = mtk_dsi_bridge_atomic_pre_enable,
+ 	.atomic_post_disable = mtk_dsi_bridge_atomic_post_disable,
++	.atomic_reset = drm_atomic_helper_bridge_reset,
+ 	.mode_set = mtk_dsi_bridge_mode_set,
+ };
+ 
+diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
+index 4a2e580a2f7b7..0e001ce8a40fd 100644
+--- a/drivers/gpu/drm/panel/panel-simple.c
++++ b/drivers/gpu/drm/panel/panel-simple.c
+@@ -2136,7 +2136,7 @@ static const struct panel_desc innolux_g121i1_l01 = {
+ 		.enable = 200,
+ 		.disable = 20,
+ 	},
+-	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
++	.bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG,
+ 	.connector_type = DRM_MODE_CONNECTOR_LVDS,
+ };
+ 
+diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
+index c204e9b95c1f7..518ee13b1d6f4 100644
+--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
++++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
+@@ -283,8 +283,9 @@ static int cdn_dp_connector_get_modes(struct drm_connector *connector)
+ 	return ret;
+ }
+ 
+-static int cdn_dp_connector_mode_valid(struct drm_connector *connector,
+-				       struct drm_display_mode *mode)
++static enum drm_mode_status
++cdn_dp_connector_mode_valid(struct drm_connector *connector,
++			    struct drm_display_mode *mode)
+ {
+ 	struct cdn_dp_device *dp = connector_to_dp(connector);
+ 	struct drm_display_info *display_info = &dp->connector.display_info;
+diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
+index 547ae334e5cd8..027029efb0088 100644
+--- a/drivers/hv/vmbus_drv.c
++++ b/drivers/hv/vmbus_drv.c
+@@ -2309,7 +2309,7 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
+ 			bool fb_overlap_ok)
+ {
+ 	struct resource *iter, *shadow;
+-	resource_size_t range_min, range_max, start;
++	resource_size_t range_min, range_max, start, end;
+ 	const char *dev_n = dev_name(&device_obj->device);
+ 	int retval;
+ 
+@@ -2344,6 +2344,14 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
+ 		range_max = iter->end;
+ 		start = (range_min + align - 1) & ~(align - 1);
+ 		for (; start + size - 1 <= range_max; start += align) {
++			end = start + size - 1;
++
++			/* Skip the whole fb_mmio region if not fb_overlap_ok */
++			if (!fb_overlap_ok && fb_mmio &&
++			    (((start >= fb_mmio->start) && (start <= fb_mmio->end)) ||
++			     ((end >= fb_mmio->start) && (end <= fb_mmio->end))))
++				continue;
++
+ 			shadow = __request_region(iter, start, size, NULL,
+ 						  IORESOURCE_BUSY);
+ 			if (!shadow)
+diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
+index e47fa34656717..3082183bd66a4 100644
+--- a/drivers/i2c/busses/i2c-imx.c
++++ b/drivers/i2c/busses/i2c-imx.c
+@@ -1583,7 +1583,7 @@ static int i2c_imx_remove(struct platform_device *pdev)
+ 	if (i2c_imx->dma)
+ 		i2c_imx_dma_free(i2c_imx);
+ 
+-	if (ret == 0) {
++	if (ret >= 0) {
+ 		/* setup chip registers to defaults */
+ 		imx_i2c_write_reg(0, i2c_imx, IMX_I2C_IADR);
+ 		imx_i2c_write_reg(0, i2c_imx, IMX_I2C_IFDR);
+diff --git a/drivers/i2c/busses/i2c-mlxbf.c b/drivers/i2c/busses/i2c-mlxbf.c
+index 8716032f030a0..ad5efd7497d1c 100644
+--- a/drivers/i2c/busses/i2c-mlxbf.c
++++ b/drivers/i2c/busses/i2c-mlxbf.c
+@@ -6,6 +6,7 @@
+  */
+ 
+ #include <linux/acpi.h>
++#include <linux/bitfield.h>
+ #include <linux/delay.h>
+ #include <linux/err.h>
+ #include <linux/interrupt.h>
+@@ -63,13 +64,14 @@
+  */
+ #define MLXBF_I2C_TYU_PLL_OUT_FREQ  (400 * 1000 * 1000)
+ /* Reference clock for Bluefield - 156 MHz. */
+-#define MLXBF_I2C_PLL_IN_FREQ       (156 * 1000 * 1000)
++#define MLXBF_I2C_PLL_IN_FREQ       156250000ULL
+ 
+ /* Constant used to determine the PLL frequency. */
+-#define MLNXBF_I2C_COREPLL_CONST    16384
++#define MLNXBF_I2C_COREPLL_CONST    16384ULL
++
++#define MLXBF_I2C_FREQUENCY_1GHZ  1000000000ULL
+ 
+ /* PLL registers. */
+-#define MLXBF_I2C_CORE_PLL_REG0         0x0
+ #define MLXBF_I2C_CORE_PLL_REG1         0x4
+ #define MLXBF_I2C_CORE_PLL_REG2         0x8
+ 
+@@ -181,22 +183,15 @@
+ #define MLXBF_I2C_COREPLL_FREQ          MLXBF_I2C_TYU_PLL_OUT_FREQ
+ 
+ /* Core PLL TYU configuration. */
+-#define MLXBF_I2C_COREPLL_CORE_F_TYU_MASK   GENMASK(12, 0)
+-#define MLXBF_I2C_COREPLL_CORE_OD_TYU_MASK  GENMASK(3, 0)
+-#define MLXBF_I2C_COREPLL_CORE_R_TYU_MASK   GENMASK(5, 0)
+-
+-#define MLXBF_I2C_COREPLL_CORE_F_TYU_SHIFT  3
+-#define MLXBF_I2C_COREPLL_CORE_OD_TYU_SHIFT 16
+-#define MLXBF_I2C_COREPLL_CORE_R_TYU_SHIFT  20
++#define MLXBF_I2C_COREPLL_CORE_F_TYU_MASK   GENMASK(15, 3)
++#define MLXBF_I2C_COREPLL_CORE_OD_TYU_MASK  GENMASK(19, 16)
++#define MLXBF_I2C_COREPLL_CORE_R_TYU_MASK   GENMASK(25, 20)
+ 
+ /* Core PLL YU configuration. */
+ #define MLXBF_I2C_COREPLL_CORE_F_YU_MASK    GENMASK(25, 0)
+ #define MLXBF_I2C_COREPLL_CORE_OD_YU_MASK   GENMASK(3, 0)
+-#define MLXBF_I2C_COREPLL_CORE_R_YU_MASK    GENMASK(5, 0)
++#define MLXBF_I2C_COREPLL_CORE_R_YU_MASK    GENMASK(31, 26)
+ 
+-#define MLXBF_I2C_COREPLL_CORE_F_YU_SHIFT   0
+-#define MLXBF_I2C_COREPLL_CORE_OD_YU_SHIFT  1
+-#define MLXBF_I2C_COREPLL_CORE_R_YU_SHIFT   26
+ 
+ /* Core PLL frequency. */
+ static u64 mlxbf_i2c_corepll_frequency;
+@@ -479,8 +474,6 @@ static struct mutex mlxbf_i2c_bus_lock;
+ #define MLXBF_I2C_MASK_8    GENMASK(7, 0)
+ #define MLXBF_I2C_MASK_16   GENMASK(15, 0)
+ 
+-#define MLXBF_I2C_FREQUENCY_1GHZ  1000000000
+-
+ /*
+  * Function to poll a set of bits at a specific address; it checks whether
+  * the bits are equal to zero when eq_zero is set to 'true', and not equal
+@@ -669,7 +662,7 @@ static int mlxbf_i2c_smbus_enable(struct mlxbf_i2c_priv *priv, u8 slave,
+ 	/* Clear status bits. */
+ 	writel(0x0, priv->smbus->io + MLXBF_I2C_SMBUS_MASTER_STATUS);
+ 	/* Set the cause data. */
+-	writel(~0x0, priv->smbus->io + MLXBF_I2C_CAUSE_OR_CLEAR);
++	writel(~0x0, priv->mst_cause->io + MLXBF_I2C_CAUSE_OR_CLEAR);
+ 	/* Zero PEC byte. */
+ 	writel(0x0, priv->smbus->io + MLXBF_I2C_SMBUS_MASTER_PEC);
+ 	/* Zero byte count. */
+@@ -738,6 +731,9 @@ mlxbf_i2c_smbus_start_transaction(struct mlxbf_i2c_priv *priv,
+ 		if (flags & MLXBF_I2C_F_WRITE) {
+ 			write_en = 1;
+ 			write_len += operation->length;
++			if (data_idx + operation->length >
++					MLXBF_I2C_MASTER_DATA_DESC_SIZE)
++				return -ENOBUFS;
+ 			memcpy(data_desc + data_idx,
+ 			       operation->buffer, operation->length);
+ 			data_idx += operation->length;
+@@ -1407,24 +1403,19 @@ static int mlxbf_i2c_init_master(struct platform_device *pdev,
+ 	return 0;
+ }
+ 
+-static u64 mlxbf_calculate_freq_from_tyu(struct mlxbf_i2c_resource *corepll_res)
++static u64 mlxbf_i2c_calculate_freq_from_tyu(struct mlxbf_i2c_resource *corepll_res)
+ {
+-	u64 core_frequency, pad_frequency;
++	u64 core_frequency;
+ 	u8 core_od, core_r;
+ 	u32 corepll_val;
+ 	u16 core_f;
+ 
+-	pad_frequency = MLXBF_I2C_PLL_IN_FREQ;
+-
+ 	corepll_val = readl(corepll_res->io + MLXBF_I2C_CORE_PLL_REG1);
+ 
+ 	/* Get Core PLL configuration bits. */
+-	core_f = rol32(corepll_val, MLXBF_I2C_COREPLL_CORE_F_TYU_SHIFT) &
+-			MLXBF_I2C_COREPLL_CORE_F_TYU_MASK;
+-	core_od = rol32(corepll_val, MLXBF_I2C_COREPLL_CORE_OD_TYU_SHIFT) &
+-			MLXBF_I2C_COREPLL_CORE_OD_TYU_MASK;
+-	core_r = rol32(corepll_val, MLXBF_I2C_COREPLL_CORE_R_TYU_SHIFT) &
+-			MLXBF_I2C_COREPLL_CORE_R_TYU_MASK;
++	core_f = FIELD_GET(MLXBF_I2C_COREPLL_CORE_F_TYU_MASK, corepll_val);
++	core_od = FIELD_GET(MLXBF_I2C_COREPLL_CORE_OD_TYU_MASK, corepll_val);
++	core_r = FIELD_GET(MLXBF_I2C_COREPLL_CORE_R_TYU_MASK, corepll_val);
+ 
+ 	/*
+ 	 * Compute PLL output frequency as follow:
+@@ -1436,31 +1427,26 @@ static u64 mlxbf_calculate_freq_from_tyu(struct mlxbf_i2c_resource *corepll_res)
+ 	 * Where PLL_OUT_FREQ and PLL_IN_FREQ refer to CoreFrequency
+ 	 * and PadFrequency, respectively.
+ 	 */
+-	core_frequency = pad_frequency * (++core_f);
++	core_frequency = MLXBF_I2C_PLL_IN_FREQ * (++core_f);
+ 	core_frequency /= (++core_r) * (++core_od);
+ 
+ 	return core_frequency;
+ }
+ 
+-static u64 mlxbf_calculate_freq_from_yu(struct mlxbf_i2c_resource *corepll_res)
++static u64 mlxbf_i2c_calculate_freq_from_yu(struct mlxbf_i2c_resource *corepll_res)
+ {
+ 	u32 corepll_reg1_val, corepll_reg2_val;
+-	u64 corepll_frequency, pad_frequency;
++	u64 corepll_frequency;
+ 	u8 core_od, core_r;
+ 	u32 core_f;
+ 
+-	pad_frequency = MLXBF_I2C_PLL_IN_FREQ;
+-
+ 	corepll_reg1_val = readl(corepll_res->io + MLXBF_I2C_CORE_PLL_REG1);
+ 	corepll_reg2_val = readl(corepll_res->io + MLXBF_I2C_CORE_PLL_REG2);
+ 
+ 	/* Get Core PLL configuration bits */
+-	core_f = rol32(corepll_reg1_val, MLXBF_I2C_COREPLL_CORE_F_YU_SHIFT) &
+-			MLXBF_I2C_COREPLL_CORE_F_YU_MASK;
+-	core_r = rol32(corepll_reg1_val, MLXBF_I2C_COREPLL_CORE_R_YU_SHIFT) &
+-			MLXBF_I2C_COREPLL_CORE_R_YU_MASK;
+-	core_od = rol32(corepll_reg2_val,  MLXBF_I2C_COREPLL_CORE_OD_YU_SHIFT) &
+-			MLXBF_I2C_COREPLL_CORE_OD_YU_MASK;
++	core_f = FIELD_GET(MLXBF_I2C_COREPLL_CORE_F_YU_MASK, corepll_reg1_val);
++	core_r = FIELD_GET(MLXBF_I2C_COREPLL_CORE_R_YU_MASK, corepll_reg1_val);
++	core_od = FIELD_GET(MLXBF_I2C_COREPLL_CORE_OD_YU_MASK, corepll_reg2_val);
+ 
+ 	/*
+ 	 * Compute PLL output frequency as follow:
+@@ -1472,7 +1458,7 @@ static u64 mlxbf_calculate_freq_from_yu(struct mlxbf_i2c_resource *corepll_res)
+ 	 * Where PLL_OUT_FREQ and PLL_IN_FREQ refer to CoreFrequency
+ 	 * and PadFrequency, respectively.
+ 	 */
+-	corepll_frequency = (pad_frequency * core_f) / MLNXBF_I2C_COREPLL_CONST;
++	corepll_frequency = (MLXBF_I2C_PLL_IN_FREQ * core_f) / MLNXBF_I2C_COREPLL_CONST;
+ 	corepll_frequency /= (++core_r) * (++core_od);
+ 
+ 	return corepll_frequency;
+@@ -2180,14 +2166,14 @@ static struct mlxbf_i2c_chip_info mlxbf_i2c_chip[] = {
+ 			[1] = &mlxbf_i2c_corepll_res[MLXBF_I2C_CHIP_TYPE_1],
+ 			[2] = &mlxbf_i2c_gpio_res[MLXBF_I2C_CHIP_TYPE_1]
+ 		},
+-		.calculate_freq = mlxbf_calculate_freq_from_tyu
++		.calculate_freq = mlxbf_i2c_calculate_freq_from_tyu
+ 	},
+ 	[MLXBF_I2C_CHIP_TYPE_2] = {
+ 		.type = MLXBF_I2C_CHIP_TYPE_2,
+ 		.shared_res = {
+ 			[0] = &mlxbf_i2c_corepll_res[MLXBF_I2C_CHIP_TYPE_2]
+ 		},
+-		.calculate_freq = mlxbf_calculate_freq_from_yu
++		.calculate_freq = mlxbf_i2c_calculate_freq_from_yu
+ 	}
+ };
+ 
+diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c
+index 774507b54b57b..313904be5f3bd 100644
+--- a/drivers/i2c/i2c-mux.c
++++ b/drivers/i2c/i2c-mux.c
+@@ -243,9 +243,10 @@ struct i2c_mux_core *i2c_mux_alloc(struct i2c_adapter *parent,
+ 				   int (*deselect)(struct i2c_mux_core *, u32))
+ {
+ 	struct i2c_mux_core *muxc;
++	size_t mux_size;
+ 
+-	muxc = devm_kzalloc(dev, struct_size(muxc, adapter, max_adapters)
+-			    + sizeof_priv, GFP_KERNEL);
++	mux_size = struct_size(muxc, adapter, max_adapters);
++	muxc = devm_kzalloc(dev, size_add(mux_size, sizeof_priv), GFP_KERNEL);
+ 	if (!muxc)
+ 		return NULL;
+ 	if (sizeof_priv)
+diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
+index 861a239d905a4..3ed15e8ca6775 100644
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -419,7 +419,7 @@ static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu)
+ {
+ 	unsigned long fl_sagaw, sl_sagaw;
+ 
+-	fl_sagaw = BIT(2) | (cap_fl1gp_support(iommu->cap) ? BIT(3) : 0);
++	fl_sagaw = BIT(2) | (cap_5lp_support(iommu->cap) ? BIT(3) : 0);
+ 	sl_sagaw = cap_sagaw(iommu->cap);
+ 
+ 	/* Second level only. */
+diff --git a/drivers/media/usb/b2c2/flexcop-usb.c b/drivers/media/usb/b2c2/flexcop-usb.c
+index 7835bb0f32fc3..e012b21c4fd7a 100644
+--- a/drivers/media/usb/b2c2/flexcop-usb.c
++++ b/drivers/media/usb/b2c2/flexcop-usb.c
+@@ -511,7 +511,7 @@ static int flexcop_usb_init(struct flexcop_usb *fc_usb)
+ 
+ 	if (fc_usb->uintf->cur_altsetting->desc.bNumEndpoints < 1)
+ 		return -ENODEV;
+-	if (!usb_endpoint_is_isoc_in(&fc_usb->uintf->cur_altsetting->endpoint[1].desc))
++	if (!usb_endpoint_is_isoc_in(&fc_usb->uintf->cur_altsetting->endpoint[0].desc))
+ 		return -ENODEV;
+ 
+ 	switch (fc_usb->udev->speed) {
+diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
+index f8fdf88fb240c..ecbc46714e681 100644
+--- a/drivers/memstick/core/ms_block.c
++++ b/drivers/memstick/core/ms_block.c
+@@ -2188,7 +2188,6 @@ static void msb_remove(struct memstick_dev *card)
+ 
+ 	/* Remove the disk */
+ 	del_gendisk(msb->disk);
+-	blk_cleanup_queue(msb->queue);
+ 	blk_mq_free_tag_set(&msb->tag_set);
+ 	msb->queue = NULL;
+ 
+diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
+index 725ba74ded308..72e91c06c618b 100644
+--- a/drivers/memstick/core/mspro_block.c
++++ b/drivers/memstick/core/mspro_block.c
+@@ -1294,7 +1294,6 @@ static void mspro_block_remove(struct memstick_dev *card)
+ 	del_gendisk(msb->disk);
+ 	dev_dbg(&card->dev, "mspro block remove\n");
+ 
+-	blk_cleanup_queue(msb->queue);
+ 	blk_mq_free_tag_set(&msb->tag_set);
+ 	msb->queue = NULL;
+ 
+diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
+index 912a398a9a764..2f89ae55c1773 100644
+--- a/drivers/mmc/core/block.c
++++ b/drivers/mmc/core/block.c
+@@ -2509,7 +2509,6 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
+ 	return md;
+ 
+  err_cleanup_queue:
+-	blk_cleanup_queue(md->disk->queue);
+ 	blk_mq_free_tag_set(&md->queue.tag_set);
+  err_kfree:
+ 	kfree(md);
+diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
+index fa5324ceeebe4..f824cfdab75ac 100644
+--- a/drivers/mmc/core/queue.c
++++ b/drivers/mmc/core/queue.c
+@@ -494,7 +494,6 @@ void mmc_cleanup_queue(struct mmc_queue *mq)
+ 	if (blk_queue_quiesced(q))
+ 		blk_mq_unquiesce_queue(q);
+ 
+-	blk_cleanup_queue(q);
+ 	blk_mq_free_tag_set(&mq->tag_set);
+ 
+ 	/*
+diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
+index 1f0120cbe9e80..8ad095c19f271 100644
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -87,8 +87,9 @@ static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = {
+ static u16 ad_ticks_per_sec;
+ static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000;
+ 
+-static const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned =
+-	MULTICAST_LACPDU_ADDR;
++const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned = {
++	0x01, 0x80, 0xC2, 0x00, 0x00, 0x02
++};
+ 
+ /* ================= main 802.3ad protocol functions ================== */
+ static int ad_lacpdu_send(struct port *port);
+diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
+index bff0bfd10e235..ab7cb48f8dfdd 100644
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -865,12 +865,8 @@ static void bond_hw_addr_flush(struct net_device *bond_dev,
+ 	dev_uc_unsync(slave_dev, bond_dev);
+ 	dev_mc_unsync(slave_dev, bond_dev);
+ 
+-	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+-		/* del lacpdu mc addr from mc list */
+-		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
+-
+-		dev_mc_del(slave_dev, lacpdu_multicast);
+-	}
++	if (BOND_MODE(bond) == BOND_MODE_8023AD)
++		dev_mc_del(slave_dev, lacpdu_mcast_addr);
+ }
+ 
+ /*--------------------------- Active slave change ---------------------------*/
+@@ -890,7 +886,8 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
+ 		if (bond->dev->flags & IFF_ALLMULTI)
+ 			dev_set_allmulti(old_active->dev, -1);
+ 
+-		bond_hw_addr_flush(bond->dev, old_active->dev);
++		if (bond->dev->flags & IFF_UP)
++			bond_hw_addr_flush(bond->dev, old_active->dev);
+ 	}
+ 
+ 	if (new_active) {
+@@ -901,10 +898,12 @@ static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
+ 		if (bond->dev->flags & IFF_ALLMULTI)
+ 			dev_set_allmulti(new_active->dev, 1);
+ 
+-		netif_addr_lock_bh(bond->dev);
+-		dev_uc_sync(new_active->dev, bond->dev);
+-		dev_mc_sync(new_active->dev, bond->dev);
+-		netif_addr_unlock_bh(bond->dev);
++		if (bond->dev->flags & IFF_UP) {
++			netif_addr_lock_bh(bond->dev);
++			dev_uc_sync(new_active->dev, bond->dev);
++			dev_mc_sync(new_active->dev, bond->dev);
++			netif_addr_unlock_bh(bond->dev);
++		}
+ 	}
+ }
+ 
+@@ -2139,16 +2138,14 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
+ 			}
+ 		}
+ 
+-		netif_addr_lock_bh(bond_dev);
+-		dev_mc_sync_multiple(slave_dev, bond_dev);
+-		dev_uc_sync_multiple(slave_dev, bond_dev);
+-		netif_addr_unlock_bh(bond_dev);
+-
+-		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+-			/* add lacpdu mc addr to mc list */
+-			u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
++		if (bond_dev->flags & IFF_UP) {
++			netif_addr_lock_bh(bond_dev);
++			dev_mc_sync_multiple(slave_dev, bond_dev);
++			dev_uc_sync_multiple(slave_dev, bond_dev);
++			netif_addr_unlock_bh(bond_dev);
+ 
+-			dev_mc_add(slave_dev, lacpdu_multicast);
++			if (BOND_MODE(bond) == BOND_MODE_8023AD)
++				dev_mc_add(slave_dev, lacpdu_mcast_addr);
+ 		}
+ 	}
+ 
+@@ -2420,7 +2417,8 @@ static int __bond_release_one(struct net_device *bond_dev,
+ 		if (old_flags & IFF_ALLMULTI)
+ 			dev_set_allmulti(slave_dev, -1);
+ 
+-		bond_hw_addr_flush(bond_dev, slave_dev);
++		if (old_flags & IFF_UP)
++			bond_hw_addr_flush(bond_dev, slave_dev);
+ 	}
+ 
+ 	slave_disable_netpoll(slave);
+@@ -4157,6 +4155,12 @@ static int bond_open(struct net_device *bond_dev)
+ 	struct list_head *iter;
+ 	struct slave *slave;
+ 
++	if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN && !bond->rr_tx_counter) {
++		bond->rr_tx_counter = alloc_percpu(u32);
++		if (!bond->rr_tx_counter)
++			return -ENOMEM;
++	}
++
+ 	/* reset slave->backup and slave->inactive */
+ 	if (bond_has_slaves(bond)) {
+ 		bond_for_each_slave(bond, slave, iter) {
+@@ -4194,6 +4198,9 @@ static int bond_open(struct net_device *bond_dev)
+ 		/* register to receive LACPDUs */
+ 		bond->recv_probe = bond_3ad_lacpdu_recv;
+ 		bond_3ad_initiate_agg_selection(bond, 1);
++
++		bond_for_each_slave(bond, slave, iter)
++			dev_mc_add(slave->dev, lacpdu_mcast_addr);
+ 	}
+ 
+ 	if (bond_mode_can_use_xmit_hash(bond))
+@@ -4205,6 +4212,7 @@ static int bond_open(struct net_device *bond_dev)
+ static int bond_close(struct net_device *bond_dev)
+ {
+ 	struct bonding *bond = netdev_priv(bond_dev);
++	struct slave *slave;
+ 
+ 	bond_work_cancel_all(bond);
+ 	bond->send_peer_notif = 0;
+@@ -4212,6 +4220,19 @@ static int bond_close(struct net_device *bond_dev)
+ 		bond_alb_deinitialize(bond);
+ 	bond->recv_probe = NULL;
+ 
++	if (bond_uses_primary(bond)) {
++		rcu_read_lock();
++		slave = rcu_dereference(bond->curr_active_slave);
++		if (slave)
++			bond_hw_addr_flush(bond_dev, slave->dev);
++		rcu_read_unlock();
++	} else {
++		struct list_head *iter;
++
++		bond_for_each_slave(bond, slave, iter)
++			bond_hw_addr_flush(bond_dev, slave->dev);
++	}
++
+ 	return 0;
+ }
+ 
+@@ -6195,15 +6216,6 @@ static int bond_init(struct net_device *bond_dev)
+ 	if (!bond->wq)
+ 		return -ENOMEM;
+ 
+-	if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN) {
+-		bond->rr_tx_counter = alloc_percpu(u32);
+-		if (!bond->rr_tx_counter) {
+-			destroy_workqueue(bond->wq);
+-			bond->wq = NULL;
+-			return -ENOMEM;
+-		}
+-	}
+-
+ 	spin_lock_init(&bond->stats_lock);
+ 	netdev_lockdep_set_classes(bond_dev);
+ 
+diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c
+index d060088047f16..131467d37a45b 100644
+--- a/drivers/net/can/flexcan/flexcan-core.c
++++ b/drivers/net/can/flexcan/flexcan-core.c
+@@ -941,11 +941,6 @@ static struct sk_buff *flexcan_mailbox_read(struct can_rx_offload *offload,
+ 	u32 reg_ctrl, reg_id, reg_iflag1;
+ 	int i;
+ 
+-	if (unlikely(drop)) {
+-		skb = ERR_PTR(-ENOBUFS);
+-		goto mark_as_read;
+-	}
+-
+ 	mb = flexcan_get_mb(priv, n);
+ 
+ 	if (priv->devtype_data.quirks & FLEXCAN_QUIRK_USE_RX_MAILBOX) {
+@@ -974,6 +969,11 @@ static struct sk_buff *flexcan_mailbox_read(struct can_rx_offload *offload,
+ 		reg_ctrl = priv->read(&mb->can_ctrl);
+ 	}
+ 
++	if (unlikely(drop)) {
++		skb = ERR_PTR(-ENOBUFS);
++		goto mark_as_read;
++	}
++
+ 	if (reg_ctrl & FLEXCAN_MB_CNT_EDL)
+ 		skb = alloc_canfd_skb(offload->dev, &cfd);
+ 	else
+diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
+index d3a658b444b5f..092cd51b3926e 100644
+--- a/drivers/net/can/usb/gs_usb.c
++++ b/drivers/net/can/usb/gs_usb.c
+@@ -824,6 +824,7 @@ static int gs_can_open(struct net_device *netdev)
+ 		flags |= GS_CAN_MODE_TRIPLE_SAMPLE;
+ 
+ 	/* finally start device */
++	dev->can.state = CAN_STATE_ERROR_ACTIVE;
+ 	dm->mode = cpu_to_le32(GS_CAN_MODE_START);
+ 	dm->flags = cpu_to_le32(flags);
+ 	rc = usb_control_msg(interface_to_usbdev(dev->iface),
+@@ -835,13 +836,12 @@ static int gs_can_open(struct net_device *netdev)
+ 	if (rc < 0) {
+ 		netdev_err(netdev, "Couldn't start device (err=%d)\n", rc);
+ 		kfree(dm);
++		dev->can.state = CAN_STATE_STOPPED;
+ 		return rc;
+ 	}
+ 
+ 	kfree(dm);
+ 
+-	dev->can.state = CAN_STATE_ERROR_ACTIVE;
+-
+ 	parent->active_channels++;
+ 	if (!(dev->can.ctrlmode & CAN_CTRLMODE_LISTENONLY))
+ 		netif_start_queue(netdev);
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 964354536f9ce..111a952f880ee 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -662,7 +662,6 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+ 
+ 	for (i = 0; i < nr_pkts; i++) {
+ 		struct bnxt_sw_tx_bd *tx_buf;
+-		bool compl_deferred = false;
+ 		struct sk_buff *skb;
+ 		int j, last;
+ 
+@@ -671,6 +670,8 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+ 		skb = tx_buf->skb;
+ 		tx_buf->skb = NULL;
+ 
++		tx_bytes += skb->len;
++
+ 		if (tx_buf->is_push) {
+ 			tx_buf->is_push = 0;
+ 			goto next_tx_int;
+@@ -691,8 +692,9 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+ 		}
+ 		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
+ 			if (bp->flags & BNXT_FLAG_CHIP_P5) {
++				/* PTP worker takes ownership of the skb */
+ 				if (!bnxt_get_tx_ts_p5(bp, skb))
+-					compl_deferred = true;
++					skb = NULL;
+ 				else
+ 					atomic_inc(&bp->ptp_cfg->tx_avail);
+ 			}
+@@ -701,9 +703,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+ next_tx_int:
+ 		cons = NEXT_TX(cons);
+ 
+-		tx_bytes += skb->len;
+-		if (!compl_deferred)
+-			dev_kfree_skb_any(skb);
++		dev_kfree_skb_any(skb);
+ 	}
+ 
+ 	netdev_tx_completed_queue(txq, nr_pkts, tx_bytes);
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+index 7f3c0875b6f58..8e316367f6ced 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+@@ -317,9 +317,9 @@ void bnxt_ptp_cfg_tstamp_filters(struct bnxt *bp)
+ 
+ 	if (!(bp->fw_cap & BNXT_FW_CAP_RX_ALL_PKT_TS) && (ptp->tstamp_filters &
+ 	    (PORT_MAC_CFG_REQ_FLAGS_ALL_RX_TS_CAPTURE_ENABLE |
+-	     PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_DISABLE))) {
++	     PORT_MAC_CFG_REQ_FLAGS_ALL_RX_TS_CAPTURE_DISABLE))) {
+ 		ptp->tstamp_filters &= ~(PORT_MAC_CFG_REQ_FLAGS_ALL_RX_TS_CAPTURE_ENABLE |
+-					 PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_DISABLE);
++					 PORT_MAC_CFG_REQ_FLAGS_ALL_RX_TS_CAPTURE_DISABLE);
+ 		netdev_warn(bp->dev, "Unsupported FW for all RX pkts timestamp filter\n");
+ 	}
+ 
+diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile
+index a139f2e9d59f0..e0e8dfd137930 100644
+--- a/drivers/net/ethernet/freescale/enetc/Makefile
++++ b/drivers/net/ethernet/freescale/enetc/Makefile
+@@ -9,7 +9,6 @@ fsl-enetc-$(CONFIG_FSL_ENETC_QOS) += enetc_qos.o
+ 
+ obj-$(CONFIG_FSL_ENETC_VF) += fsl-enetc-vf.o
+ fsl-enetc-vf-y := enetc_vf.o $(common-objs)
+-fsl-enetc-vf-$(CONFIG_FSL_ENETC_QOS) += enetc_qos.o
+ 
+ obj-$(CONFIG_FSL_ENETC_IERB) += fsl-enetc-ierb.o
+ fsl-enetc-ierb-y := enetc_ierb.o
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
+index 4470a4a3e4c3e..9f5b921039bd4 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc.c
+@@ -2432,7 +2432,7 @@ int enetc_close(struct net_device *ndev)
+ 	return 0;
+ }
+ 
+-static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
++int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
+ {
+ 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ 	struct tc_mqprio_qopt *mqprio = type_data;
+@@ -2486,25 +2486,6 @@ static int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data)
+ 	return 0;
+ }
+ 
+-int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+-		   void *type_data)
+-{
+-	switch (type) {
+-	case TC_SETUP_QDISC_MQPRIO:
+-		return enetc_setup_tc_mqprio(ndev, type_data);
+-	case TC_SETUP_QDISC_TAPRIO:
+-		return enetc_setup_tc_taprio(ndev, type_data);
+-	case TC_SETUP_QDISC_CBS:
+-		return enetc_setup_tc_cbs(ndev, type_data);
+-	case TC_SETUP_QDISC_ETF:
+-		return enetc_setup_tc_txtime(ndev, type_data);
+-	case TC_SETUP_BLOCK:
+-		return enetc_setup_tc_psfp(ndev, type_data);
+-	default:
+-		return -EOPNOTSUPP;
+-	}
+-}
+-
+ static int enetc_setup_xdp_prog(struct net_device *dev, struct bpf_prog *prog,
+ 				struct netlink_ext_ack *extack)
+ {
+@@ -2600,29 +2581,6 @@ static int enetc_set_rss(struct net_device *ndev, int en)
+ 	return 0;
+ }
+ 
+-static int enetc_set_psfp(struct net_device *ndev, int en)
+-{
+-	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+-	int err;
+-
+-	if (en) {
+-		err = enetc_psfp_enable(priv);
+-		if (err)
+-			return err;
+-
+-		priv->active_offloads |= ENETC_F_QCI;
+-		return 0;
+-	}
+-
+-	err = enetc_psfp_disable(priv);
+-	if (err)
+-		return err;
+-
+-	priv->active_offloads &= ~ENETC_F_QCI;
+-
+-	return 0;
+-}
+-
+ static void enetc_enable_rxvlan(struct net_device *ndev, bool en)
+ {
+ 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+@@ -2641,11 +2599,9 @@ static void enetc_enable_txvlan(struct net_device *ndev, bool en)
+ 		enetc_bdr_enable_txvlan(&priv->si->hw, i, en);
+ }
+ 
+-int enetc_set_features(struct net_device *ndev,
+-		       netdev_features_t features)
++void enetc_set_features(struct net_device *ndev, netdev_features_t features)
+ {
+ 	netdev_features_t changed = ndev->features ^ features;
+-	int err = 0;
+ 
+ 	if (changed & NETIF_F_RXHASH)
+ 		enetc_set_rss(ndev, !!(features & NETIF_F_RXHASH));
+@@ -2657,11 +2613,6 @@ int enetc_set_features(struct net_device *ndev,
+ 	if (changed & NETIF_F_HW_VLAN_CTAG_TX)
+ 		enetc_enable_txvlan(ndev,
+ 				    !!(features & NETIF_F_HW_VLAN_CTAG_TX));
+-
+-	if (changed & NETIF_F_HW_TC)
+-		err = enetc_set_psfp(ndev, !!(features & NETIF_F_HW_TC));
+-
+-	return err;
+ }
+ 
+ #ifdef CONFIG_FSL_ENETC_PTP_CLOCK
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
+index 29922c20531f0..2cfe6944ebd32 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc.h
++++ b/drivers/net/ethernet/freescale/enetc/enetc.h
+@@ -393,11 +393,9 @@ void enetc_start(struct net_device *ndev);
+ void enetc_stop(struct net_device *ndev);
+ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev);
+ struct net_device_stats *enetc_get_stats(struct net_device *ndev);
+-int enetc_set_features(struct net_device *ndev,
+-		       netdev_features_t features);
++void enetc_set_features(struct net_device *ndev, netdev_features_t features);
+ int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd);
+-int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+-		   void *type_data);
++int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data);
+ int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp);
+ int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
+ 		   struct xdp_frame **frames, u32 flags);
+@@ -465,6 +463,7 @@ int enetc_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ int enetc_setup_tc_psfp(struct net_device *ndev, void *type_data);
+ int enetc_psfp_init(struct enetc_ndev_priv *priv);
+ int enetc_psfp_clean(struct enetc_ndev_priv *priv);
++int enetc_set_psfp(struct net_device *ndev, bool en);
+ 
+ static inline void enetc_get_max_cap(struct enetc_ndev_priv *priv)
+ {
+@@ -540,4 +539,9 @@ static inline int enetc_psfp_disable(struct enetc_ndev_priv *priv)
+ {
+ 	return 0;
+ }
++
++static inline int enetc_set_psfp(struct net_device *ndev, bool en)
++{
++	return 0;
++}
+ #endif
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+index c4a0e836d4f09..bb7750222691d 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+@@ -709,6 +709,13 @@ static int enetc_pf_set_features(struct net_device *ndev,
+ {
+ 	netdev_features_t changed = ndev->features ^ features;
+ 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
++	int err;
++
++	if (changed & NETIF_F_HW_TC) {
++		err = enetc_set_psfp(ndev, !!(features & NETIF_F_HW_TC));
++		if (err)
++			return err;
++	}
+ 
+ 	if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+ 		struct enetc_pf *pf = enetc_si_priv(priv->si);
+@@ -722,7 +729,28 @@ static int enetc_pf_set_features(struct net_device *ndev,
+ 	if (changed & NETIF_F_LOOPBACK)
+ 		enetc_set_loopback(ndev, !!(features & NETIF_F_LOOPBACK));
+ 
+-	return enetc_set_features(ndev, features);
++	enetc_set_features(ndev, features);
++
++	return 0;
++}
++
++static int enetc_pf_setup_tc(struct net_device *ndev, enum tc_setup_type type,
++			     void *type_data)
++{
++	switch (type) {
++	case TC_SETUP_QDISC_MQPRIO:
++		return enetc_setup_tc_mqprio(ndev, type_data);
++	case TC_SETUP_QDISC_TAPRIO:
++		return enetc_setup_tc_taprio(ndev, type_data);
++	case TC_SETUP_QDISC_CBS:
++		return enetc_setup_tc_cbs(ndev, type_data);
++	case TC_SETUP_QDISC_ETF:
++		return enetc_setup_tc_txtime(ndev, type_data);
++	case TC_SETUP_BLOCK:
++		return enetc_setup_tc_psfp(ndev, type_data);
++	default:
++		return -EOPNOTSUPP;
++	}
+ }
+ 
+ static const struct net_device_ops enetc_ndev_ops = {
+@@ -739,7 +767,7 @@ static const struct net_device_ops enetc_ndev_ops = {
+ 	.ndo_set_vf_spoofchk	= enetc_pf_set_vf_spoofchk,
+ 	.ndo_set_features	= enetc_pf_set_features,
+ 	.ndo_eth_ioctl		= enetc_ioctl,
+-	.ndo_setup_tc		= enetc_setup_tc,
++	.ndo_setup_tc		= enetc_pf_setup_tc,
+ 	.ndo_bpf		= enetc_setup_bpf,
+ 	.ndo_xdp_xmit		= enetc_xdp_xmit,
+ };
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+index 582a663ed0ba4..f8a2f02ce22de 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+@@ -1517,6 +1517,29 @@ int enetc_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ 	}
+ }
+ 
++int enetc_set_psfp(struct net_device *ndev, bool en)
++{
++	struct enetc_ndev_priv *priv = netdev_priv(ndev);
++	int err;
++
++	if (en) {
++		err = enetc_psfp_enable(priv);
++		if (err)
++			return err;
++
++		priv->active_offloads |= ENETC_F_QCI;
++		return 0;
++	}
++
++	err = enetc_psfp_disable(priv);
++	if (err)
++		return err;
++
++	priv->active_offloads &= ~ENETC_F_QCI;
++
++	return 0;
++}
++
+ int enetc_psfp_init(struct enetc_ndev_priv *priv)
+ {
+ 	if (epsfp.psfp_sfi_bitmap)
+diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+index 17924305afa2f..dfcaac302e245 100644
+--- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+@@ -88,7 +88,20 @@ static int enetc_vf_set_mac_addr(struct net_device *ndev, void *addr)
+ static int enetc_vf_set_features(struct net_device *ndev,
+ 				 netdev_features_t features)
+ {
+-	return enetc_set_features(ndev, features);
++	enetc_set_features(ndev, features);
++
++	return 0;
++}
++
++static int enetc_vf_setup_tc(struct net_device *ndev, enum tc_setup_type type,
++			     void *type_data)
++{
++	switch (type) {
++	case TC_SETUP_QDISC_MQPRIO:
++		return enetc_setup_tc_mqprio(ndev, type_data);
++	default:
++		return -EOPNOTSUPP;
++	}
+ }
+ 
+ /* Probing/ Init */
+@@ -100,7 +113,7 @@ static const struct net_device_ops enetc_ndev_ops = {
+ 	.ndo_set_mac_address	= enetc_vf_set_mac_addr,
+ 	.ndo_set_features	= enetc_vf_set_features,
+ 	.ndo_eth_ioctl		= enetc_ioctl,
+-	.ndo_setup_tc		= enetc_setup_tc,
++	.ndo_setup_tc		= enetc_vf_setup_tc,
+ };
+ 
+ static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
+diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+index 8c939628e2d85..2e6461b0ea8bc 100644
+--- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c
++++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c
+@@ -157,7 +157,7 @@ static int gve_alloc_page_dqo(struct gve_priv *priv,
+ 	int err;
+ 
+ 	err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page,
+-			     &buf_state->addr, DMA_FROM_DEVICE, GFP_KERNEL);
++			     &buf_state->addr, DMA_FROM_DEVICE, GFP_ATOMIC);
+ 	if (err)
+ 		return err;
+ 
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index 1aaf0c5ddf6cf..57e27f2024d38 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -5785,6 +5785,26 @@ static int i40e_get_link_speed(struct i40e_vsi *vsi)
+ 	}
+ }
+ 
++/**
++ * i40e_bw_bytes_to_mbits - Convert max_tx_rate from bytes to mbits
++ * @vsi: Pointer to vsi structure
++ * @max_tx_rate: max TX rate in bytes to be converted into Mbits
++ *
++ * Helper function to convert units before send to set BW limit
++ **/
++static u64 i40e_bw_bytes_to_mbits(struct i40e_vsi *vsi, u64 max_tx_rate)
++{
++	if (max_tx_rate < I40E_BW_MBPS_DIVISOR) {
++		dev_warn(&vsi->back->pdev->dev,
++			 "Setting max tx rate to minimum usable value of 50Mbps.\n");
++		max_tx_rate = I40E_BW_CREDIT_DIVISOR;
++	} else {
++		do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
++	}
++
++	return max_tx_rate;
++}
++
+ /**
+  * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
+  * @vsi: VSI to be configured
+@@ -5807,10 +5827,10 @@ int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate)
+ 			max_tx_rate, seid);
+ 		return -EINVAL;
+ 	}
+-	if (max_tx_rate && max_tx_rate < 50) {
++	if (max_tx_rate && max_tx_rate < I40E_BW_CREDIT_DIVISOR) {
+ 		dev_warn(&pf->pdev->dev,
+ 			 "Setting max tx rate to minimum usable value of 50Mbps.\n");
+-		max_tx_rate = 50;
++		max_tx_rate = I40E_BW_CREDIT_DIVISOR;
+ 	}
+ 
+ 	/* Tx rate credits are in values of 50Mbps, 0 is disabled */
+@@ -8101,9 +8121,9 @@ config_tc:
+ 
+ 	if (i40e_is_tc_mqprio_enabled(pf)) {
+ 		if (vsi->mqprio_qopt.max_rate[0]) {
+-			u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
++			u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi,
++						  vsi->mqprio_qopt.max_rate[0]);
+ 
+-			do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+ 			ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+ 			if (!ret) {
+ 				u64 credits = max_tx_rate;
+@@ -10848,10 +10868,10 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+ 	}
+ 
+ 	if (vsi->mqprio_qopt.max_rate[0]) {
+-		u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
++		u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi,
++						  vsi->mqprio_qopt.max_rate[0]);
+ 		u64 credits = 0;
+ 
+-		do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+ 		ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+ 		if (ret)
+ 			goto end_unlock;
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+index 86b0f21287dc8..67fbaaad39859 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+@@ -2038,6 +2038,25 @@ static void i40e_del_qch(struct i40e_vf *vf)
+ 	}
+ }
+ 
++/**
++ * i40e_vc_get_max_frame_size
++ * @vf: pointer to the VF
++ *
++ * Max frame size is determined based on the current port's max frame size and
++ * whether a port VLAN is configured on this VF. The VF is not aware whether
++ * it's in a port VLAN so the PF needs to account for this in max frame size
++ * checks and sending the max frame size to the VF.
++ **/
++static u16 i40e_vc_get_max_frame_size(struct i40e_vf *vf)
++{
++	u16 max_frame_size = vf->pf->hw.phy.link_info.max_frame_size;
++
++	if (vf->port_vlan_id)
++		max_frame_size -= VLAN_HLEN;
++
++	return max_frame_size;
++}
++
+ /**
+  * i40e_vc_get_vf_resources_msg
+  * @vf: pointer to the VF info
+@@ -2139,6 +2158,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
+ 	vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
+ 	vfres->rss_key_size = I40E_HKEY_ARRAY_SIZE;
+ 	vfres->rss_lut_size = I40E_VF_HLUT_ARRAY_SIZE;
++	vfres->max_mtu = i40e_vc_get_max_frame_size(vf);
+ 
+ 	if (vf->lan_vsi_idx) {
+ 		vfres->vsi_res[0].vsi_id = vf->lan_vsi_id;
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+index 06d18797d25a2..18b6a702a1d6d 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+@@ -114,8 +114,11 @@ u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw)
+ {
+ 	u32 head, tail;
+ 
++	/* underlying hardware might not allow access and/or always return
++	 * 0 for the head/tail registers so just use the cached values
++	 */
+ 	head = ring->next_to_clean;
+-	tail = readl(ring->tail);
++	tail = ring->next_to_use;
+ 
+ 	if (head != tail)
+ 		return (head < tail) ?
+@@ -1390,7 +1393,7 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
+ #endif
+ 	struct sk_buff *skb;
+ 
+-	if (!rx_buffer)
++	if (!rx_buffer || !size)
+ 		return NULL;
+ 	/* prefetch first cache line of first page */
+ 	va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+@@ -1548,7 +1551,7 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
+ 		/* exit if we failed to retrieve a buffer */
+ 		if (!skb) {
+ 			rx_ring->rx_stats.alloc_buff_failed++;
+-			if (rx_buffer)
++			if (rx_buffer && size)
+ 				rx_buffer->pagecnt_bias++;
+ 			break;
+ 		}
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index 1603e99bae4af..498797a0a0a95 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -273,11 +273,14 @@ int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter)
+ void iavf_configure_queues(struct iavf_adapter *adapter)
+ {
+ 	struct virtchnl_vsi_queue_config_info *vqci;
+-	struct virtchnl_queue_pair_info *vqpi;
++	int i, max_frame = adapter->vf_res->max_mtu;
+ 	int pairs = adapter->num_active_queues;
+-	int i, max_frame = IAVF_MAX_RXBUFFER;
++	struct virtchnl_queue_pair_info *vqpi;
+ 	size_t len;
+ 
++	if (max_frame > IAVF_MAX_RXBUFFER || !max_frame)
++		max_frame = IAVF_MAX_RXBUFFER;
++
+ 	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+ 		/* bail because we already have a command pending */
+ 		dev_err(&adapter->pdev->dev, "Cannot configure queues, command %d pending\n",
+diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
+index 6c4e1d45235ef..1169fd7811b09 100644
+--- a/drivers/net/ethernet/intel/ice/ice_lib.c
++++ b/drivers/net/ethernet/intel/ice/ice_lib.c
+@@ -911,7 +911,7 @@ static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt)
+  */
+ static int ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
+ {
+-	u16 offset = 0, qmap = 0, tx_count = 0, pow = 0;
++	u16 offset = 0, qmap = 0, tx_count = 0, rx_count = 0, pow = 0;
+ 	u16 num_txq_per_tc, num_rxq_per_tc;
+ 	u16 qcount_tx = vsi->alloc_txq;
+ 	u16 qcount_rx = vsi->alloc_rxq;
+@@ -978,23 +978,25 @@ static int ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
+ 	 * at least 1)
+ 	 */
+ 	if (offset)
+-		vsi->num_rxq = offset;
++		rx_count = offset;
+ 	else
+-		vsi->num_rxq = num_rxq_per_tc;
++		rx_count = num_rxq_per_tc;
+ 
+-	if (vsi->num_rxq > vsi->alloc_rxq) {
++	if (rx_count > vsi->alloc_rxq) {
+ 		dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n",
+-			vsi->num_rxq, vsi->alloc_rxq);
++			rx_count, vsi->alloc_rxq);
+ 		return -EINVAL;
+ 	}
+ 
+-	vsi->num_txq = tx_count;
+-	if (vsi->num_txq > vsi->alloc_txq) {
++	if (tx_count > vsi->alloc_txq) {
+ 		dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n",
+-			vsi->num_txq, vsi->alloc_txq);
++			tx_count, vsi->alloc_txq);
+ 		return -EINVAL;
+ 	}
+ 
++	vsi->num_txq = tx_count;
++	vsi->num_rxq = rx_count;
++
+ 	if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) {
+ 		dev_dbg(ice_pf_to_dev(vsi->back), "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n");
+ 		/* since there is a chance that num_rxq could have been changed
+@@ -3487,6 +3489,7 @@ ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt,
+ 	u16 pow, offset = 0, qcount_tx = 0, qcount_rx = 0, qmap;
+ 	u16 tc0_offset = vsi->mqprio_qopt.qopt.offset[0];
+ 	int tc0_qcount = vsi->mqprio_qopt.qopt.count[0];
++	u16 new_txq, new_rxq;
+ 	u8 netdev_tc = 0;
+ 	int i;
+ 
+@@ -3527,21 +3530,24 @@ ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt,
+ 		}
+ 	}
+ 
+-	/* Set actual Tx/Rx queue pairs */
+-	vsi->num_txq = offset + qcount_tx;
+-	if (vsi->num_txq > vsi->alloc_txq) {
++	new_txq = offset + qcount_tx;
++	if (new_txq > vsi->alloc_txq) {
+ 		dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n",
+-			vsi->num_txq, vsi->alloc_txq);
++			new_txq, vsi->alloc_txq);
+ 		return -EINVAL;
+ 	}
+ 
+-	vsi->num_rxq = offset + qcount_rx;
+-	if (vsi->num_rxq > vsi->alloc_rxq) {
++	new_rxq = offset + qcount_rx;
++	if (new_rxq > vsi->alloc_rxq) {
+ 		dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n",
+-			vsi->num_rxq, vsi->alloc_rxq);
++			new_rxq, vsi->alloc_rxq);
+ 		return -EINVAL;
+ 	}
+ 
++	/* Set actual Tx/Rx queue pairs */
++	vsi->num_txq = new_txq;
++	vsi->num_rxq = new_rxq;
++
+ 	/* Setup queue TC[0].qmap for given VSI context */
+ 	ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+ 	ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
+@@ -3573,6 +3579,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
+ {
+ 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ 	struct ice_pf *pf = vsi->back;
++	struct ice_tc_cfg old_tc_cfg;
+ 	struct ice_vsi_ctx *ctx;
+ 	struct device *dev;
+ 	int i, ret = 0;
+@@ -3597,6 +3604,7 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
+ 			max_txqs[i] = vsi->num_txq;
+ 	}
+ 
++	memcpy(&old_tc_cfg, &vsi->tc_cfg, sizeof(old_tc_cfg));
+ 	vsi->tc_cfg.ena_tc = ena_tc;
+ 	vsi->tc_cfg.numtc = num_tc;
+ 
+@@ -3613,8 +3621,10 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
+ 	else
+ 		ret = ice_vsi_setup_q_map(vsi, ctx);
+ 
+-	if (ret)
++	if (ret) {
++		memcpy(&vsi->tc_cfg, &old_tc_cfg, sizeof(vsi->tc_cfg));
+ 		goto out;
++	}
+ 
+ 	/* must to indicate which section of VSI context are being modified */
+ 	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index 4c6bb7482b362..48befe1e2872c 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -2399,8 +2399,6 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
+ 		return -EBUSY;
+ 	}
+ 
+-	ice_unplug_aux_dev(pf);
+-
+ 	switch (reset) {
+ 	case ICE_RESET_PFR:
+ 		set_bit(ICE_PFR_REQ, pf->state);
+@@ -6629,7 +6627,7 @@ static void ice_napi_disable_all(struct ice_vsi *vsi)
+  */
+ int ice_down(struct ice_vsi *vsi)
+ {
+-	int i, tx_err, rx_err, link_err = 0, vlan_err = 0;
++	int i, tx_err, rx_err, vlan_err = 0;
+ 
+ 	WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state));
+ 
+@@ -6663,20 +6661,13 @@ int ice_down(struct ice_vsi *vsi)
+ 
+ 	ice_napi_disable_all(vsi);
+ 
+-	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) {
+-		link_err = ice_force_phys_link_state(vsi, false);
+-		if (link_err)
+-			netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n",
+-				   vsi->vsi_num, link_err);
+-	}
+-
+ 	ice_for_each_txq(vsi, i)
+ 		ice_clean_tx_ring(vsi->tx_rings[i]);
+ 
+ 	ice_for_each_rxq(vsi, i)
+ 		ice_clean_rx_ring(vsi->rx_rings[i]);
+ 
+-	if (tx_err || rx_err || link_err || vlan_err) {
++	if (tx_err || rx_err || vlan_err) {
+ 		netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n",
+ 			   vsi->vsi_num, vsi->vsw->sw_id);
+ 		return -EIO;
+@@ -6838,6 +6829,8 @@ int ice_vsi_open(struct ice_vsi *vsi)
+ 	if (err)
+ 		goto err_setup_rx;
+ 
++	ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc);
++
+ 	if (vsi->type == ICE_VSI_PF) {
+ 		/* Notify the stack of the actual queue counts. */
+ 		err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq);
+@@ -8876,6 +8869,16 @@ int ice_stop(struct net_device *netdev)
+ 		return -EBUSY;
+ 	}
+ 
++	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) {
++		int link_err = ice_force_phys_link_state(vsi, false);
++
++		if (link_err) {
++			netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n",
++				   vsi->vsi_num, link_err);
++			return -EIO;
++		}
++	}
++
+ 	ice_vsi_close(vsi);
+ 
+ 	return 0;
+diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
+index 836dce8407124..97453d1dfafed 100644
+--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
++++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
+@@ -610,7 +610,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ 	if (test_bit(ICE_VSI_DOWN, vsi->state))
+ 		return -ENETDOWN;
+ 
+-	if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq)
++	if (!ice_is_xdp_ena_vsi(vsi))
+ 		return -ENXIO;
+ 
+ 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+@@ -621,6 +621,9 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+ 		xdp_ring = vsi->xdp_rings[queue_index];
+ 		spin_lock(&xdp_ring->tx_lock);
+ 	} else {
++		/* Generally, should not happen */
++		if (unlikely(queue_index >= vsi->num_xdp_txq))
++			return -ENXIO;
+ 		xdp_ring = vsi->xdp_rings[queue_index];
+ 	}
+ 
+diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
+index 85155cd9405c5..4aeb927c37153 100644
+--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
++++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_mdio.c
+@@ -179,6 +179,9 @@ static int mlxbf_gige_mdio_read(struct mii_bus *bus, int phy_add, int phy_reg)
+ 	/* Only return ad bits of the gw register */
+ 	ret &= MLXBF_GIGE_MDIO_GW_AD_MASK;
+ 
++	/* The MDIO lock is set on read. To release it, clear gw register */
++	writel(0, priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET);
++
+ 	return ret;
+ }
+ 
+@@ -203,6 +206,9 @@ static int mlxbf_gige_mdio_write(struct mii_bus *bus, int phy_add,
+ 					temp, !(temp & MLXBF_GIGE_MDIO_GW_BUSY_MASK),
+ 					5, 1000000);
+ 
++	/* The MDIO lock is set on read. To release it, clear gw register */
++	writel(0, priv->mdio_io + MLXBF_GIGE_MDIO_GW_OFFSET);
++
+ 	return ret;
+ }
+ 
+diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
+index 49b85ca578b01..9820efce72ffe 100644
+--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
++++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
+@@ -370,6 +370,11 @@ static void mana_gd_process_eq_events(void *arg)
+ 			break;
+ 		}
+ 
++		/* Per GDMA spec, rmb is necessary after checking owner_bits, before
++		 * reading eqe.
++		 */
++		rmb();
++
+ 		mana_gd_process_eqe(eq);
+ 
+ 		eq->head++;
+@@ -1107,6 +1112,11 @@ static int mana_gd_read_cqe(struct gdma_queue *cq, struct gdma_comp *comp)
+ 	if (WARN_ON_ONCE(owner_bits != new_bits))
+ 		return -1;
+ 
++	/* Per GDMA spec, rmb is necessary after checking owner_bits, before
++	 * reading completion info
++	 */
++	rmb();
++
+ 	comp->wq_num = cqe->cqe_info.wq_num;
+ 	comp->is_sq = cqe->cqe_info.is_sq;
+ 	memcpy(comp->cqe_data, cqe->cqe_data, GDMA_COMP_DATA_SIZE);
+diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
+index b357ac4c56c59..7e32b04eb0c75 100644
+--- a/drivers/net/ethernet/renesas/ravb_main.c
++++ b/drivers/net/ethernet/renesas/ravb_main.c
+@@ -1449,6 +1449,8 @@ static int ravb_phy_init(struct net_device *ndev)
+ 		phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
+ 	}
+ 
++	/* Indicate that the MAC is responsible for managing PHY PM */
++	phydev->mac_managed_pm = true;
+ 	phy_attached_info(phydev);
+ 
+ 	return 0;
+diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
+index 67ade78fb7671..7fd8828d3a846 100644
+--- a/drivers/net/ethernet/renesas/sh_eth.c
++++ b/drivers/net/ethernet/renesas/sh_eth.c
+@@ -2029,6 +2029,8 @@ static int sh_eth_phy_init(struct net_device *ndev)
+ 	if (mdp->cd->register_type != SH_ETH_REG_GIGABIT)
+ 		phy_set_max_speed(phydev, SPEED_100);
+ 
++	/* Indicate that the MAC is responsible for managing PHY PM */
++	phydev->mac_managed_pm = true;
+ 	phy_attached_info(phydev);
+ 
+ 	return 0;
+diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
+index 032b8c0bd7889..5b4d661ab9867 100644
+--- a/drivers/net/ethernet/sfc/efx_channels.c
++++ b/drivers/net/ethernet/sfc/efx_channels.c
+@@ -319,7 +319,7 @@ int efx_probe_interrupts(struct efx_nic *efx)
+ 		efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
+ 		efx->n_rx_channels = 1;
+ 		efx->n_tx_channels = 1;
+-		efx->tx_channel_offset = 1;
++		efx->tx_channel_offset = efx_separate_tx_channels ? 1 : 0;
+ 		efx->n_xdp_channels = 0;
+ 		efx->xdp_channel_offset = efx->n_channels;
+ 		efx->legacy_irq = efx->pci_dev->irq;
+diff --git a/drivers/net/ethernet/sfc/siena/efx_channels.c b/drivers/net/ethernet/sfc/siena/efx_channels.c
+index 017212a40df38..f54ebd0072868 100644
+--- a/drivers/net/ethernet/sfc/siena/efx_channels.c
++++ b/drivers/net/ethernet/sfc/siena/efx_channels.c
+@@ -320,7 +320,7 @@ int efx_siena_probe_interrupts(struct efx_nic *efx)
+ 		efx->n_channels = 1 + (efx_siena_separate_tx_channels ? 1 : 0);
+ 		efx->n_rx_channels = 1;
+ 		efx->n_tx_channels = 1;
+-		efx->tx_channel_offset = 1;
++		efx->tx_channel_offset = efx_siena_separate_tx_channels ? 1 : 0;
+ 		efx->n_xdp_channels = 0;
+ 		efx->xdp_channel_offset = efx->n_channels;
+ 		efx->legacy_irq = efx->pci_dev->irq;
+diff --git a/drivers/net/ethernet/sfc/siena/tx.c b/drivers/net/ethernet/sfc/siena/tx.c
+index e166dcb9b99ce..91e87594ed1ea 100644
+--- a/drivers/net/ethernet/sfc/siena/tx.c
++++ b/drivers/net/ethernet/sfc/siena/tx.c
+@@ -336,7 +336,7 @@ netdev_tx_t efx_siena_hard_start_xmit(struct sk_buff *skb,
+ 		 * previous packets out.
+ 		 */
+ 		if (!netdev_xmit_more())
+-			efx_tx_send_pending(tx_queue->channel);
++			efx_tx_send_pending(efx_get_tx_channel(efx, index));
+ 		return NETDEV_TX_OK;
+ 	}
+ 
+diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
+index 138bca6113415..80ed7f760bd30 100644
+--- a/drivers/net/ethernet/sfc/tx.c
++++ b/drivers/net/ethernet/sfc/tx.c
+@@ -549,7 +549,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
+ 		 * previous packets out.
+ 		 */
+ 		if (!netdev_xmit_more())
+-			efx_tx_send_pending(tx_queue->channel);
++			efx_tx_send_pending(efx_get_tx_channel(efx, index));
+ 		return NETDEV_TX_OK;
+ 	}
+ 
+diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
+index 8594ee839628b..88aa0d310aeef 100644
+--- a/drivers/net/ethernet/sun/sunhme.c
++++ b/drivers/net/ethernet/sun/sunhme.c
+@@ -2020,9 +2020,9 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev)
+ 
+ 			skb_reserve(copy_skb, 2);
+ 			skb_put(copy_skb, len);
+-			dma_sync_single_for_cpu(hp->dma_dev, dma_addr, len, DMA_FROM_DEVICE);
++			dma_sync_single_for_cpu(hp->dma_dev, dma_addr, len + 2, DMA_FROM_DEVICE);
+ 			skb_copy_from_linear_data(skb, copy_skb->data, len);
+-			dma_sync_single_for_device(hp->dma_dev, dma_addr, len, DMA_FROM_DEVICE);
++			dma_sync_single_for_device(hp->dma_dev, dma_addr, len + 2, DMA_FROM_DEVICE);
+ 			/* Reuse original ring buffer. */
+ 			hme_write_rxd(hp, this,
+ 				      (RXFLAG_OWN|((RX_BUF_ALLOC_SIZE-RX_OFFSET)<<16)),
+diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c
+index ec010cf2e816a..6f874f99b910c 100644
+--- a/drivers/net/ipa/ipa_qmi.c
++++ b/drivers/net/ipa/ipa_qmi.c
+@@ -308,12 +308,12 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
+ 	mem = ipa_mem_find(ipa, IPA_MEM_V4_ROUTE);
+ 	req.v4_route_tbl_info_valid = 1;
+ 	req.v4_route_tbl_info.start = ipa->mem_offset + mem->offset;
+-	req.v4_route_tbl_info.count = mem->size / sizeof(__le64);
++	req.v4_route_tbl_info.end = IPA_ROUTE_MODEM_COUNT - 1;
+ 
+ 	mem = ipa_mem_find(ipa, IPA_MEM_V6_ROUTE);
+ 	req.v6_route_tbl_info_valid = 1;
+ 	req.v6_route_tbl_info.start = ipa->mem_offset + mem->offset;
+-	req.v6_route_tbl_info.count = mem->size / sizeof(__le64);
++	req.v6_route_tbl_info.end = IPA_ROUTE_MODEM_COUNT - 1;
+ 
+ 	mem = ipa_mem_find(ipa, IPA_MEM_V4_FILTER);
+ 	req.v4_filter_tbl_start_valid = 1;
+@@ -352,7 +352,7 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
+ 		req.v4_hash_route_tbl_info_valid = 1;
+ 		req.v4_hash_route_tbl_info.start =
+ 				ipa->mem_offset + mem->offset;
+-		req.v4_hash_route_tbl_info.count = mem->size / sizeof(__le64);
++		req.v4_hash_route_tbl_info.end = IPA_ROUTE_MODEM_COUNT - 1;
+ 	}
+ 
+ 	mem = ipa_mem_find(ipa, IPA_MEM_V6_ROUTE_HASHED);
+@@ -360,7 +360,7 @@ init_modem_driver_req(struct ipa_qmi *ipa_qmi)
+ 		req.v6_hash_route_tbl_info_valid = 1;
+ 		req.v6_hash_route_tbl_info.start =
+ 			ipa->mem_offset + mem->offset;
+-		req.v6_hash_route_tbl_info.count = mem->size / sizeof(__le64);
++		req.v6_hash_route_tbl_info.end = IPA_ROUTE_MODEM_COUNT - 1;
+ 	}
+ 
+ 	mem = ipa_mem_find(ipa, IPA_MEM_V4_FILTER_HASHED);
+diff --git a/drivers/net/ipa/ipa_qmi_msg.c b/drivers/net/ipa/ipa_qmi_msg.c
+index 6838e8065072b..75d3fc0092e92 100644
+--- a/drivers/net/ipa/ipa_qmi_msg.c
++++ b/drivers/net/ipa/ipa_qmi_msg.c
+@@ -311,7 +311,7 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = {
+ 		.tlv_type	= 0x12,
+ 		.offset		= offsetof(struct ipa_init_modem_driver_req,
+ 					   v4_route_tbl_info),
+-		.ei_array	= ipa_mem_array_ei,
++		.ei_array	= ipa_mem_bounds_ei,
+ 	},
+ 	{
+ 		.data_type	= QMI_OPT_FLAG,
+@@ -332,7 +332,7 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = {
+ 		.tlv_type	= 0x13,
+ 		.offset		= offsetof(struct ipa_init_modem_driver_req,
+ 					   v6_route_tbl_info),
+-		.ei_array	= ipa_mem_array_ei,
++		.ei_array	= ipa_mem_bounds_ei,
+ 	},
+ 	{
+ 		.data_type	= QMI_OPT_FLAG,
+@@ -496,7 +496,7 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = {
+ 		.tlv_type	= 0x1b,
+ 		.offset		= offsetof(struct ipa_init_modem_driver_req,
+ 					   v4_hash_route_tbl_info),
+-		.ei_array	= ipa_mem_array_ei,
++		.ei_array	= ipa_mem_bounds_ei,
+ 	},
+ 	{
+ 		.data_type	= QMI_OPT_FLAG,
+@@ -517,7 +517,7 @@ struct qmi_elem_info ipa_init_modem_driver_req_ei[] = {
+ 		.tlv_type	= 0x1c,
+ 		.offset		= offsetof(struct ipa_init_modem_driver_req,
+ 					   v6_hash_route_tbl_info),
+-		.ei_array	= ipa_mem_array_ei,
++		.ei_array	= ipa_mem_bounds_ei,
+ 	},
+ 	{
+ 		.data_type	= QMI_OPT_FLAG,
+diff --git a/drivers/net/ipa/ipa_qmi_msg.h b/drivers/net/ipa/ipa_qmi_msg.h
+index 495e85abe50bd..9651aa59b5968 100644
+--- a/drivers/net/ipa/ipa_qmi_msg.h
++++ b/drivers/net/ipa/ipa_qmi_msg.h
+@@ -86,9 +86,11 @@ enum ipa_platform_type {
+ 	IPA_QMI_PLATFORM_TYPE_MSM_QNX_V01	= 0x5,	/* QNX MSM */
+ };
+ 
+-/* This defines the start and end offset of a range of memory.  Both
+- * fields are offsets relative to the start of IPA shared memory.
+- * The end value is the last addressable byte *within* the range.
++/* This defines the start and end offset of a range of memory.  The start
++ * value is a byte offset relative to the start of IPA shared memory.  The
++ * end value is the last addressable unit *within* the range.  Typically
++ * the end value is in units of bytes, however it can also be a maximum
++ * array index value.
+  */
+ struct ipa_mem_bounds {
+ 	u32 start;
+@@ -129,18 +131,19 @@ struct ipa_init_modem_driver_req {
+ 	u8			hdr_tbl_info_valid;
+ 	struct ipa_mem_bounds	hdr_tbl_info;
+ 
+-	/* Routing table information.  These define the location and size of
+-	 * non-hashable IPv4 and IPv6 filter tables.  The start values are
+-	 * offsets relative to the start of IPA shared memory.
++	/* Routing table information.  These define the location and maximum
++	 * *index* (not byte) for the modem portion of non-hashable IPv4 and
++	 * IPv6 routing tables.  The start values are byte offsets relative
++	 * to the start of IPA shared memory.
+ 	 */
+ 	u8			v4_route_tbl_info_valid;
+-	struct ipa_mem_array	v4_route_tbl_info;
++	struct ipa_mem_bounds	v4_route_tbl_info;
+ 	u8			v6_route_tbl_info_valid;
+-	struct ipa_mem_array	v6_route_tbl_info;
++	struct ipa_mem_bounds	v6_route_tbl_info;
+ 
+ 	/* Filter table information.  These define the location of the
+ 	 * non-hashable IPv4 and IPv6 filter tables.  The start values are
+-	 * offsets relative to the start of IPA shared memory.
++	 * byte offsets relative to the start of IPA shared memory.
+ 	 */
+ 	u8			v4_filter_tbl_start_valid;
+ 	u32			v4_filter_tbl_start;
+@@ -181,18 +184,20 @@ struct ipa_init_modem_driver_req {
+ 	u8			zip_tbl_info_valid;
+ 	struct ipa_mem_bounds	zip_tbl_info;
+ 
+-	/* Routing table information.  These define the location and size
+-	 * of hashable IPv4 and IPv6 filter tables.  The start values are
+-	 * offsets relative to the start of IPA shared memory.
++	/* Routing table information.  These define the location and maximum
++	 * *index* (not byte) for the modem portion of hashable IPv4 and IPv6
++	 * routing tables (if supported by hardware).  The start values are
++	 * byte offsets relative to the start of IPA shared memory.
+ 	 */
+ 	u8			v4_hash_route_tbl_info_valid;
+-	struct ipa_mem_array	v4_hash_route_tbl_info;
++	struct ipa_mem_bounds	v4_hash_route_tbl_info;
+ 	u8			v6_hash_route_tbl_info_valid;
+-	struct ipa_mem_array	v6_hash_route_tbl_info;
++	struct ipa_mem_bounds	v6_hash_route_tbl_info;
+ 
+ 	/* Filter table information.  These define the location and size
+-	 * of hashable IPv4 and IPv6 filter tables.  The start values are
+-	 * offsets relative to the start of IPA shared memory.
++	 * of hashable IPv4 and IPv6 filter tables (if supported by hardware).
++	 * The start values are byte offsets relative to the start of IPA
++	 * shared memory.
+ 	 */
+ 	u8			v4_hash_filter_tbl_start_valid;
+ 	u32			v4_hash_filter_tbl_start;
+diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
+index 2f5a58bfc529a..69efe672ca528 100644
+--- a/drivers/net/ipa/ipa_table.c
++++ b/drivers/net/ipa/ipa_table.c
+@@ -108,8 +108,6 @@
+ 
+ /* Assignment of route table entries to the modem and AP */
+ #define IPA_ROUTE_MODEM_MIN		0
+-#define IPA_ROUTE_MODEM_COUNT		8
+-
+ #define IPA_ROUTE_AP_MIN		IPA_ROUTE_MODEM_COUNT
+ #define IPA_ROUTE_AP_COUNT \
+ 		(IPA_ROUTE_COUNT_MAX - IPA_ROUTE_MODEM_COUNT)
+diff --git a/drivers/net/ipa/ipa_table.h b/drivers/net/ipa/ipa_table.h
+index b6a9a0d79d68e..1538e2e1732fe 100644
+--- a/drivers/net/ipa/ipa_table.h
++++ b/drivers/net/ipa/ipa_table.h
+@@ -13,6 +13,9 @@ struct ipa;
+ /* The maximum number of filter table entries (IPv4, IPv6; hashed or not) */
+ #define IPA_FILTER_COUNT_MAX	14
+ 
++/* The number of route table entries allotted to the modem */
++#define IPA_ROUTE_MODEM_COUNT	8
++
+ /* The maximum number of route table entries (IPv4, IPv6; hashed or not) */
+ #define IPA_ROUTE_COUNT_MAX	15
+ 
+diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
+index 6ffb27419e64b..c58123e136896 100644
+--- a/drivers/net/ipvlan/ipvlan_core.c
++++ b/drivers/net/ipvlan/ipvlan_core.c
+@@ -495,7 +495,6 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+ 
+ static int ipvlan_process_outbound(struct sk_buff *skb)
+ {
+-	struct ethhdr *ethh = eth_hdr(skb);
+ 	int ret = NET_XMIT_DROP;
+ 
+ 	/* The ipvlan is a pseudo-L2 device, so the packets that we receive
+@@ -505,6 +504,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb)
+ 	if (skb_mac_header_was_set(skb)) {
+ 		/* In this mode we dont care about
+ 		 * multicast and broadcast traffic */
++		struct ethhdr *ethh = eth_hdr(skb);
++
+ 		if (is_multicast_ether_addr(ethh->h_dest)) {
+ 			pr_debug_ratelimited(
+ 				"Dropped {multi|broad}cast of type=[%x]\n",
+@@ -589,7 +590,7 @@ out:
+ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
+ {
+ 	const struct ipvl_dev *ipvlan = netdev_priv(dev);
+-	struct ethhdr *eth = eth_hdr(skb);
++	struct ethhdr *eth = skb_eth_hdr(skb);
+ 	struct ipvl_addr *addr;
+ 	void *lyr3h;
+ 	int addr_type;
+@@ -619,6 +620,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
+ 		return dev_forward_skb(ipvlan->phy_dev, skb);
+ 
+ 	} else if (is_multicast_ether_addr(eth->h_dest)) {
++		skb_reset_mac_header(skb);
+ 		ipvlan_skb_crossing_ns(skb, NULL);
+ 		ipvlan_multicast_enqueue(ipvlan->port, skb, true);
+ 		return NET_XMIT_SUCCESS;
+diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c
+index 9e3c815a070f1..796e9c7857d09 100644
+--- a/drivers/net/mdio/of_mdio.c
++++ b/drivers/net/mdio/of_mdio.c
+@@ -231,6 +231,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
+ 	return 0;
+ 
+ unregister:
++	of_node_put(child);
+ 	mdiobus_unregister(mdio);
+ 	return rc;
+ }
+diff --git a/drivers/net/netdevsim/hwstats.c b/drivers/net/netdevsim/hwstats.c
+index 605a38e16db05..0e58aa7f0374e 100644
+--- a/drivers/net/netdevsim/hwstats.c
++++ b/drivers/net/netdevsim/hwstats.c
+@@ -433,11 +433,11 @@ int nsim_dev_hwstats_init(struct nsim_dev *nsim_dev)
+ 		goto err_remove_hwstats_recursive;
+ 	}
+ 
+-	debugfs_create_file("enable_ifindex", 0600, hwstats->l3_ddir, hwstats,
++	debugfs_create_file("enable_ifindex", 0200, hwstats->l3_ddir, hwstats,
+ 			    &nsim_dev_hwstats_l3_enable_fops.fops);
+-	debugfs_create_file("disable_ifindex", 0600, hwstats->l3_ddir, hwstats,
++	debugfs_create_file("disable_ifindex", 0200, hwstats->l3_ddir, hwstats,
+ 			    &nsim_dev_hwstats_l3_disable_fops.fops);
+-	debugfs_create_file("fail_next_enable", 0600, hwstats->l3_ddir, hwstats,
++	debugfs_create_file("fail_next_enable", 0200, hwstats->l3_ddir, hwstats,
+ 			    &nsim_dev_hwstats_l3_fail_fops.fops);
+ 
+ 	INIT_DELAYED_WORK(&hwstats->traffic_dw,
+diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c
+index c7047f5d7a9b0..8bc0957a0f6d3 100644
+--- a/drivers/net/phy/aquantia_main.c
++++ b/drivers/net/phy/aquantia_main.c
+@@ -90,6 +90,9 @@
+ #define VEND1_GLOBAL_FW_ID_MAJOR		GENMASK(15, 8)
+ #define VEND1_GLOBAL_FW_ID_MINOR		GENMASK(7, 0)
+ 
++#define VEND1_GLOBAL_GEN_STAT2			0xc831
++#define VEND1_GLOBAL_GEN_STAT2_OP_IN_PROG	BIT(15)
++
+ #define VEND1_GLOBAL_RSVD_STAT1			0xc885
+ #define VEND1_GLOBAL_RSVD_STAT1_FW_BUILD_ID	GENMASK(7, 4)
+ #define VEND1_GLOBAL_RSVD_STAT1_PROV_ID		GENMASK(3, 0)
+@@ -124,6 +127,12 @@
+ #define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL2	BIT(1)
+ #define VEND1_GLOBAL_INT_VEND_MASK_GLOBAL3	BIT(0)
+ 
++/* Sleep and timeout for checking if the Processor-Intensive
++ * MDIO operation is finished
++ */
++#define AQR107_OP_IN_PROG_SLEEP		1000
++#define AQR107_OP_IN_PROG_TIMEOUT	100000
++
+ struct aqr107_hw_stat {
+ 	const char *name;
+ 	int reg;
+@@ -596,16 +605,52 @@ static void aqr107_link_change_notify(struct phy_device *phydev)
+ 		phydev_info(phydev, "Aquantia 1000Base-T2 mode active\n");
+ }
+ 
++static int aqr107_wait_processor_intensive_op(struct phy_device *phydev)
++{
++	int val, err;
++
++	/* The datasheet notes to wait at least 1ms after issuing a
++	 * processor intensive operation before checking.
++	 * We cannot use the 'sleep_before_read' parameter of read_poll_timeout
++	 * because that just determines the maximum time slept, not the minimum.
++	 */
++	usleep_range(1000, 5000);
++
++	err = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1,
++					VEND1_GLOBAL_GEN_STAT2, val,
++					!(val & VEND1_GLOBAL_GEN_STAT2_OP_IN_PROG),
++					AQR107_OP_IN_PROG_SLEEP,
++					AQR107_OP_IN_PROG_TIMEOUT, false);
++	if (err) {
++		phydev_err(phydev, "timeout: processor-intensive MDIO operation\n");
++		return err;
++	}
++
++	return 0;
++}
++
+ static int aqr107_suspend(struct phy_device *phydev)
+ {
+-	return phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, MDIO_CTRL1,
+-				MDIO_CTRL1_LPOWER);
++	int err;
++
++	err = phy_set_bits_mmd(phydev, MDIO_MMD_VEND1, MDIO_CTRL1,
++			       MDIO_CTRL1_LPOWER);
++	if (err)
++		return err;
++
++	return aqr107_wait_processor_intensive_op(phydev);
+ }
+ 
+ static int aqr107_resume(struct phy_device *phydev)
+ {
+-	return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, MDIO_CTRL1,
+-				  MDIO_CTRL1_LPOWER);
++	int err;
++
++	err = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, MDIO_CTRL1,
++				 MDIO_CTRL1_LPOWER);
++	if (err)
++		return err;
++
++	return aqr107_wait_processor_intensive_op(phydev);
+ }
+ 
+ static int aqr107_probe(struct phy_device *phydev)
+diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
+index 34483a4bd688a..e8e1101911b2f 100644
+--- a/drivers/net/phy/micrel.c
++++ b/drivers/net/phy/micrel.c
+@@ -2662,16 +2662,19 @@ static int lan8804_config_init(struct phy_device *phydev)
+ static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev)
+ {
+ 	int irq_status, tsu_irq_status;
++	int ret = IRQ_NONE;
+ 
+ 	irq_status = phy_read(phydev, LAN8814_INTS);
+-	if (irq_status > 0 && (irq_status & LAN8814_INT_LINK))
+-		phy_trigger_machine(phydev);
+-
+ 	if (irq_status < 0) {
+ 		phy_error(phydev);
+ 		return IRQ_NONE;
+ 	}
+ 
++	if (irq_status & LAN8814_INT_LINK) {
++		phy_trigger_machine(phydev);
++		ret = IRQ_HANDLED;
++	}
++
+ 	while (1) {
+ 		tsu_irq_status = lanphy_read_page_reg(phydev, 4,
+ 						      LAN8814_INTR_STS_REG);
+@@ -2680,12 +2683,15 @@ static irqreturn_t lan8814_handle_interrupt(struct phy_device *phydev)
+ 		    (tsu_irq_status & (LAN8814_INTR_STS_REG_1588_TSU0_ |
+ 				       LAN8814_INTR_STS_REG_1588_TSU1_ |
+ 				       LAN8814_INTR_STS_REG_1588_TSU2_ |
+-				       LAN8814_INTR_STS_REG_1588_TSU3_)))
++				       LAN8814_INTR_STS_REG_1588_TSU3_))) {
+ 			lan8814_handle_ptp_interrupt(phydev);
+-		else
++			ret = IRQ_HANDLED;
++		} else {
+ 			break;
++		}
+ 	}
+-	return IRQ_HANDLED;
++
++	return ret;
+ }
+ 
+ static int lan8814_ack_interrupt(struct phy_device *phydev)
+diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
+index b07dde6f0abf2..b9899913d2467 100644
+--- a/drivers/net/team/team.c
++++ b/drivers/net/team/team.c
+@@ -1275,10 +1275,12 @@ static int team_port_add(struct team *team, struct net_device *port_dev,
+ 		}
+ 	}
+ 
+-	netif_addr_lock_bh(dev);
+-	dev_uc_sync_multiple(port_dev, dev);
+-	dev_mc_sync_multiple(port_dev, dev);
+-	netif_addr_unlock_bh(dev);
++	if (dev->flags & IFF_UP) {
++		netif_addr_lock_bh(dev);
++		dev_uc_sync_multiple(port_dev, dev);
++		dev_mc_sync_multiple(port_dev, dev);
++		netif_addr_unlock_bh(dev);
++	}
+ 
+ 	port->index = -1;
+ 	list_add_tail_rcu(&port->list, &team->port_list);
+@@ -1349,8 +1351,10 @@ static int team_port_del(struct team *team, struct net_device *port_dev)
+ 	netdev_rx_handler_unregister(port_dev);
+ 	team_port_disable_netpoll(port);
+ 	vlan_vids_del_by_dev(port_dev, dev);
+-	dev_uc_unsync(port_dev, dev);
+-	dev_mc_unsync(port_dev, dev);
++	if (dev->flags & IFF_UP) {
++		dev_uc_unsync(port_dev, dev);
++		dev_mc_unsync(port_dev, dev);
++	}
+ 	dev_close(port_dev);
+ 	team_port_leave(team, port);
+ 
+@@ -1700,6 +1704,14 @@ static int team_open(struct net_device *dev)
+ 
+ static int team_close(struct net_device *dev)
+ {
++	struct team *team = netdev_priv(dev);
++	struct team_port *port;
++
++	list_for_each_entry(port, &team->port_list, list) {
++		dev_uc_unsync(port->dev, dev);
++		dev_mc_unsync(port->dev, dev);
++	}
++
+ 	return 0;
+ }
+ 
+diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
+index d0f3b6d7f4089..5c804bcabfe6b 100644
+--- a/drivers/net/wireguard/netlink.c
++++ b/drivers/net/wireguard/netlink.c
+@@ -436,14 +436,13 @@ static int set_peer(struct wg_device *wg, struct nlattr **attrs)
+ 	if (attrs[WGPEER_A_ENDPOINT]) {
+ 		struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]);
+ 		size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]);
++		struct endpoint endpoint = { { { 0 } } };
+ 
+-		if ((len == sizeof(struct sockaddr_in) &&
+-		     addr->sa_family == AF_INET) ||
+-		    (len == sizeof(struct sockaddr_in6) &&
+-		     addr->sa_family == AF_INET6)) {
+-			struct endpoint endpoint = { { { 0 } } };
+-
+-			memcpy(&endpoint.addr, addr, len);
++		if (len == sizeof(struct sockaddr_in) && addr->sa_family == AF_INET) {
++			endpoint.addr4 = *(struct sockaddr_in *)addr;
++			wg_socket_set_peer_endpoint(peer, &endpoint);
++		} else if (len == sizeof(struct sockaddr_in6) && addr->sa_family == AF_INET6) {
++			endpoint.addr6 = *(struct sockaddr_in6 *)addr;
+ 			wg_socket_set_peer_endpoint(peer, &endpoint);
+ 		}
+ 	}
+diff --git a/drivers/net/wireguard/selftest/ratelimiter.c b/drivers/net/wireguard/selftest/ratelimiter.c
+index ba87d294604fe..d4bb40a695ab6 100644
+--- a/drivers/net/wireguard/selftest/ratelimiter.c
++++ b/drivers/net/wireguard/selftest/ratelimiter.c
+@@ -6,29 +6,28 @@
+ #ifdef DEBUG
+ 
+ #include <linux/jiffies.h>
+-#include <linux/hrtimer.h>
+ 
+ static const struct {
+ 	bool result;
+-	u64 nsec_to_sleep_before;
++	unsigned int msec_to_sleep_before;
+ } expected_results[] __initconst = {
+ 	[0 ... PACKETS_BURSTABLE - 1] = { true, 0 },
+ 	[PACKETS_BURSTABLE] = { false, 0 },
+-	[PACKETS_BURSTABLE + 1] = { true, NSEC_PER_SEC / PACKETS_PER_SECOND },
++	[PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND },
+ 	[PACKETS_BURSTABLE + 2] = { false, 0 },
+-	[PACKETS_BURSTABLE + 3] = { true, (NSEC_PER_SEC / PACKETS_PER_SECOND) * 2 },
++	[PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 },
+ 	[PACKETS_BURSTABLE + 4] = { true, 0 },
+ 	[PACKETS_BURSTABLE + 5] = { false, 0 }
+ };
+ 
+ static __init unsigned int maximum_jiffies_at_index(int index)
+ {
+-	u64 total_nsecs = 2 * NSEC_PER_SEC / PACKETS_PER_SECOND / 3;
++	unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3;
+ 	int i;
+ 
+ 	for (i = 0; i <= index; ++i)
+-		total_nsecs += expected_results[i].nsec_to_sleep_before;
+-	return nsecs_to_jiffies(total_nsecs);
++		total_msecs += expected_results[i].msec_to_sleep_before;
++	return msecs_to_jiffies(total_msecs);
+ }
+ 
+ static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4,
+@@ -43,12 +42,8 @@ static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4,
+ 	loop_start_time = jiffies;
+ 
+ 	for (i = 0; i < ARRAY_SIZE(expected_results); ++i) {
+-		if (expected_results[i].nsec_to_sleep_before) {
+-			ktime_t timeout = ktime_add(ktime_add_ns(ktime_get_coarse_boottime(), TICK_NSEC * 4 / 3),
+-						    ns_to_ktime(expected_results[i].nsec_to_sleep_before));
+-			set_current_state(TASK_UNINTERRUPTIBLE);
+-			schedule_hrtimeout_range_clock(&timeout, 0, HRTIMER_MODE_ABS, CLOCK_BOOTTIME);
+-		}
++		if (expected_results[i].msec_to_sleep_before)
++			msleep(expected_results[i].msec_to_sleep_before);
+ 
+ 		if (time_is_before_jiffies(loop_start_time +
+ 					   maximum_jiffies_at_index(i)))
+@@ -132,7 +127,7 @@ bool __init wg_ratelimiter_selftest(void)
+ 	if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN))
+ 		return true;
+ 
+-	BUILD_BUG_ON(NSEC_PER_SEC % PACKETS_PER_SECOND != 0);
++	BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0);
+ 
+ 	if (wg_ratelimiter_init())
+ 		goto out;
+@@ -172,7 +167,7 @@ bool __init wg_ratelimiter_selftest(void)
+ 	++test;
+ #endif
+ 
+-	for (trials = TRIALS_BEFORE_GIVING_UP;;) {
++	for (trials = TRIALS_BEFORE_GIVING_UP; IS_ENABLED(DEBUG_RATELIMITER_TIMINGS);) {
+ 		int test_count = 0, ret;
+ 
+ 		ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
+diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig
+index a647a406b87be..b20409f8c13ab 100644
+--- a/drivers/net/wireless/intel/iwlwifi/Kconfig
++++ b/drivers/net/wireless/intel/iwlwifi/Kconfig
+@@ -140,6 +140,7 @@ config IWLMEI
+ 	depends on INTEL_MEI
+ 	depends on PM
+ 	depends on CFG80211
++	depends on BROKEN
+ 	help
+ 	  Enables the iwlmei kernel module.
+ 
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+index 9e832b27170fe..a4eb025f504f3 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+@@ -1138,7 +1138,7 @@ u32 mt7615_mac_get_sta_tid_sn(struct mt7615_dev *dev, int wcid, u8 tid)
+ 	offset %= 32;
+ 
+ 	val = mt76_rr(dev, addr);
+-	val >>= (tid % 32);
++	val >>= offset;
+ 
+ 	if (offset > 20) {
+ 		addr += 4;
+diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
+index 629d10fcf53b2..b9f1a8e9f88cb 100644
+--- a/drivers/nvdimm/pmem.c
++++ b/drivers/nvdimm/pmem.c
+@@ -45,7 +45,7 @@ static struct nd_region *to_region(struct pmem_device *pmem)
+ 	return to_nd_region(to_dev(pmem)->parent);
+ }
+ 
+-static phys_addr_t to_phys(struct pmem_device *pmem, phys_addr_t offset)
++static phys_addr_t pmem_to_phys(struct pmem_device *pmem, phys_addr_t offset)
+ {
+ 	return pmem->phys_addr + offset;
+ }
+@@ -63,7 +63,7 @@ static phys_addr_t to_offset(struct pmem_device *pmem, sector_t sector)
+ static void pmem_mkpage_present(struct pmem_device *pmem, phys_addr_t offset,
+ 		unsigned int len)
+ {
+-	phys_addr_t phys = to_phys(pmem, offset);
++	phys_addr_t phys = pmem_to_phys(pmem, offset);
+ 	unsigned long pfn_start, pfn_end, pfn;
+ 
+ 	/* only pmem in the linear map supports HWPoison */
+@@ -97,7 +97,7 @@ static void pmem_clear_bb(struct pmem_device *pmem, sector_t sector, long blks)
+ static long __pmem_clear_poison(struct pmem_device *pmem,
+ 		phys_addr_t offset, unsigned int len)
+ {
+-	phys_addr_t phys = to_phys(pmem, offset);
++	phys_addr_t phys = pmem_to_phys(pmem, offset);
+ 	long cleared = nvdimm_clear_poison(to_dev(pmem), phys, len);
+ 
+ 	if (cleared > 0) {
+diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
+index d702d7d60235d..2d23b7d41f7e6 100644
+--- a/drivers/nvme/host/apple.c
++++ b/drivers/nvme/host/apple.c
+@@ -1502,7 +1502,7 @@ static int apple_nvme_probe(struct platform_device *pdev)
+ 
+ 	if (!blk_get_queue(anv->ctrl.admin_q)) {
+ 		nvme_start_admin_queue(&anv->ctrl);
+-		blk_cleanup_queue(anv->ctrl.admin_q);
++		blk_mq_destroy_queue(anv->ctrl.admin_q);
+ 		anv->ctrl.admin_q = NULL;
+ 		ret = -ENODEV;
+ 		goto put_dev;
+diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
+index 2f965356f3453..6d76fc608b741 100644
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -4105,7 +4105,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
+ 	if (!nvme_ns_head_multipath(ns->head))
+ 		nvme_cdev_del(&ns->cdev, &ns->cdev_device);
+ 	del_gendisk(ns->disk);
+-	blk_cleanup_queue(ns->queue);
+ 
+ 	down_write(&ns->ctrl->namespaces_rwsem);
+ 	list_del_init(&ns->list);
+diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
+index 4aff83b1b0c05..9a5ce70d7f215 100644
+--- a/drivers/nvme/host/fc.c
++++ b/drivers/nvme/host/fc.c
+@@ -2392,7 +2392,7 @@ nvme_fc_ctrl_free(struct kref *ref)
+ 	unsigned long flags;
+ 
+ 	if (ctrl->ctrl.tagset) {
+-		blk_cleanup_queue(ctrl->ctrl.connect_q);
++		blk_mq_destroy_queue(ctrl->ctrl.connect_q);
+ 		blk_mq_free_tag_set(&ctrl->tag_set);
+ 	}
+ 
+@@ -2402,8 +2402,8 @@ nvme_fc_ctrl_free(struct kref *ref)
+ 	spin_unlock_irqrestore(&ctrl->rport->lock, flags);
+ 
+ 	nvme_start_admin_queue(&ctrl->ctrl);
+-	blk_cleanup_queue(ctrl->ctrl.admin_q);
+-	blk_cleanup_queue(ctrl->ctrl.fabrics_q);
++	blk_mq_destroy_queue(ctrl->ctrl.admin_q);
++	blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
+ 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
+ 
+ 	kfree(ctrl->queues);
+@@ -2953,7 +2953,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
+ out_delete_hw_queues:
+ 	nvme_fc_delete_hw_io_queues(ctrl);
+ out_cleanup_blk_queue:
+-	blk_cleanup_queue(ctrl->ctrl.connect_q);
++	blk_mq_destroy_queue(ctrl->ctrl.connect_q);
+ out_free_tag_set:
+ 	blk_mq_free_tag_set(&ctrl->tag_set);
+ 	nvme_fc_free_io_queues(ctrl);
+@@ -3642,9 +3642,9 @@ fail_ctrl:
+ 	return ERR_PTR(-EIO);
+ 
+ out_cleanup_admin_q:
+-	blk_cleanup_queue(ctrl->ctrl.admin_q);
++	blk_mq_destroy_queue(ctrl->ctrl.admin_q);
+ out_cleanup_fabrics_q:
+-	blk_cleanup_queue(ctrl->ctrl.fabrics_q);
++	blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
+ out_free_admin_tag_set:
+ 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
+ out_free_queues:
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index 9f6614f7dbeb1..3516678d37541 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -1760,7 +1760,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
+ 		 * queue to flush these to completion.
+ 		 */
+ 		nvme_start_admin_queue(&dev->ctrl);
+-		blk_cleanup_queue(dev->ctrl.admin_q);
++		blk_mq_destroy_queue(dev->ctrl.admin_q);
+ 		blk_mq_free_tag_set(&dev->admin_tagset);
+ 	}
+ }
+diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
+index 46c2dcf72f7ea..240024dd5d857 100644
+--- a/drivers/nvme/host/rdma.c
++++ b/drivers/nvme/host/rdma.c
+@@ -840,8 +840,8 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
+ 		bool remove)
+ {
+ 	if (remove) {
+-		blk_cleanup_queue(ctrl->ctrl.admin_q);
+-		blk_cleanup_queue(ctrl->ctrl.fabrics_q);
++		blk_mq_destroy_queue(ctrl->ctrl.admin_q);
++		blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
+ 		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
+ 	}
+ 	if (ctrl->async_event_sqe.data) {
+@@ -935,10 +935,10 @@ out_stop_queue:
+ 	nvme_cancel_admin_tagset(&ctrl->ctrl);
+ out_cleanup_queue:
+ 	if (new)
+-		blk_cleanup_queue(ctrl->ctrl.admin_q);
++		blk_mq_destroy_queue(ctrl->ctrl.admin_q);
+ out_cleanup_fabrics_q:
+ 	if (new)
+-		blk_cleanup_queue(ctrl->ctrl.fabrics_q);
++		blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
+ out_free_tagset:
+ 	if (new)
+ 		blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
+@@ -957,7 +957,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
+ 		bool remove)
+ {
+ 	if (remove) {
+-		blk_cleanup_queue(ctrl->ctrl.connect_q);
++		blk_mq_destroy_queue(ctrl->ctrl.connect_q);
+ 		blk_mq_free_tag_set(ctrl->ctrl.tagset);
+ 	}
+ 	nvme_rdma_free_io_queues(ctrl);
+@@ -1012,7 +1012,7 @@ out_wait_freeze_timed_out:
+ out_cleanup_connect_q:
+ 	nvme_cancel_tagset(&ctrl->ctrl);
+ 	if (new)
+-		blk_cleanup_queue(ctrl->ctrl.connect_q);
++		blk_mq_destroy_queue(ctrl->ctrl.connect_q);
+ out_free_tag_set:
+ 	if (new)
+ 		blk_mq_free_tag_set(ctrl->ctrl.tagset);
+diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
+index daa0e160e1212..d7e5bbdb9b75a 100644
+--- a/drivers/nvme/host/tcp.c
++++ b/drivers/nvme/host/tcp.c
+@@ -1881,7 +1881,7 @@ static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
+ {
+ 	nvme_tcp_stop_io_queues(ctrl);
+ 	if (remove) {
+-		blk_cleanup_queue(ctrl->connect_q);
++		blk_mq_destroy_queue(ctrl->connect_q);
+ 		blk_mq_free_tag_set(ctrl->tagset);
+ 	}
+ 	nvme_tcp_free_io_queues(ctrl);
+@@ -1936,7 +1936,7 @@ out_wait_freeze_timed_out:
+ out_cleanup_connect_q:
+ 	nvme_cancel_tagset(ctrl);
+ 	if (new)
+-		blk_cleanup_queue(ctrl->connect_q);
++		blk_mq_destroy_queue(ctrl->connect_q);
+ out_free_tag_set:
+ 	if (new)
+ 		blk_mq_free_tag_set(ctrl->tagset);
+@@ -1949,8 +1949,8 @@ static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
+ {
+ 	nvme_tcp_stop_queue(ctrl, 0);
+ 	if (remove) {
+-		blk_cleanup_queue(ctrl->admin_q);
+-		blk_cleanup_queue(ctrl->fabrics_q);
++		blk_mq_destroy_queue(ctrl->admin_q);
++		blk_mq_destroy_queue(ctrl->fabrics_q);
+ 		blk_mq_free_tag_set(ctrl->admin_tagset);
+ 	}
+ 	nvme_tcp_free_admin_queue(ctrl);
+@@ -2008,10 +2008,10 @@ out_stop_queue:
+ 	nvme_cancel_admin_tagset(ctrl);
+ out_cleanup_queue:
+ 	if (new)
+-		blk_cleanup_queue(ctrl->admin_q);
++		blk_mq_destroy_queue(ctrl->admin_q);
+ out_cleanup_fabrics_q:
+ 	if (new)
+-		blk_cleanup_queue(ctrl->fabrics_q);
++		blk_mq_destroy_queue(ctrl->fabrics_q);
+ out_free_tagset:
+ 	if (new)
+ 		blk_mq_free_tag_set(ctrl->admin_tagset);
+diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
+index 59024af2da2e3..0f5c77e22a0a9 100644
+--- a/drivers/nvme/target/loop.c
++++ b/drivers/nvme/target/loop.c
+@@ -266,8 +266,8 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
+ 	if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags))
+ 		return;
+ 	nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
+-	blk_cleanup_queue(ctrl->ctrl.admin_q);
+-	blk_cleanup_queue(ctrl->ctrl.fabrics_q);
++	blk_mq_destroy_queue(ctrl->ctrl.admin_q);
++	blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
+ 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
+ }
+ 
+@@ -283,7 +283,7 @@ static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)
+ 	mutex_unlock(&nvme_loop_ctrl_mutex);
+ 
+ 	if (nctrl->tagset) {
+-		blk_cleanup_queue(ctrl->ctrl.connect_q);
++		blk_mq_destroy_queue(ctrl->ctrl.connect_q);
+ 		blk_mq_free_tag_set(&ctrl->tag_set);
+ 	}
+ 	kfree(ctrl->queues);
+@@ -410,9 +410,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
+ 
+ out_cleanup_queue:
+ 	clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
+-	blk_cleanup_queue(ctrl->ctrl.admin_q);
++	blk_mq_destroy_queue(ctrl->ctrl.admin_q);
+ out_cleanup_fabrics_q:
+-	blk_cleanup_queue(ctrl->ctrl.fabrics_q);
++	blk_mq_destroy_queue(ctrl->ctrl.fabrics_q);
+ out_free_tagset:
+ 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
+ out_free_sq:
+@@ -554,7 +554,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
+ 	return 0;
+ 
+ out_cleanup_connect_q:
+-	blk_cleanup_queue(ctrl->ctrl.connect_q);
++	blk_mq_destroy_queue(ctrl->ctrl.connect_q);
+ out_free_tagset:
+ 	blk_mq_free_tag_set(&ctrl->tag_set);
+ out_destroy_queues:
+diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
+index 80d8309652a4d..b80a9b74662b1 100644
+--- a/drivers/perf/arm-cmn.c
++++ b/drivers/perf/arm-cmn.c
+@@ -36,7 +36,7 @@
+ #define CMN_CI_CHILD_COUNT		GENMASK_ULL(15, 0)
+ #define CMN_CI_CHILD_PTR_OFFSET		GENMASK_ULL(31, 16)
+ 
+-#define CMN_CHILD_NODE_ADDR		GENMASK(27, 0)
++#define CMN_CHILD_NODE_ADDR		GENMASK(29, 0)
+ #define CMN_CHILD_NODE_EXTERNAL		BIT(31)
+ 
+ #define CMN_MAX_DIMENSION		12
+diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
+index a4d7d9bd100d3..67712c77d806f 100644
+--- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
++++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
+@@ -274,7 +274,6 @@ struct mvebu_a3700_comphy_lane {
+ 	int submode;
+ 	bool invert_tx;
+ 	bool invert_rx;
+-	bool needs_reset;
+ };
+ 
+ struct gbe_phy_init_data_fix {
+@@ -1097,40 +1096,12 @@ mvebu_a3700_comphy_pcie_power_off(struct mvebu_a3700_comphy_lane *lane)
+ 			    0x0, PU_PLL_BIT | PU_RX_BIT | PU_TX_BIT);
+ }
+ 
+-static int mvebu_a3700_comphy_reset(struct phy *phy)
++static void mvebu_a3700_comphy_usb3_power_off(struct mvebu_a3700_comphy_lane *lane)
+ {
+-	struct mvebu_a3700_comphy_lane *lane = phy_get_drvdata(phy);
+-	u16 mask, data;
+-
+-	dev_dbg(lane->dev, "resetting lane %d\n", lane->id);
+-
+-	/* COMPHY reset for internal logic */
+-	comphy_lane_reg_set(lane, COMPHY_SFT_RESET,
+-			    SFT_RST_NO_REG, SFT_RST_NO_REG);
+-
+-	/* COMPHY register reset (cleared automatically) */
+-	comphy_lane_reg_set(lane, COMPHY_SFT_RESET, SFT_RST, SFT_RST);
+-
+-	/* PIPE soft and register reset */
+-	data = PIPE_SOFT_RESET | PIPE_REG_RESET;
+-	mask = data;
+-	comphy_lane_reg_set(lane, COMPHY_PIPE_RST_CLK_CTRL, data, mask);
+-
+-	/* Release PIPE register reset */
+-	comphy_lane_reg_set(lane, COMPHY_PIPE_RST_CLK_CTRL,
+-			    0x0, PIPE_REG_RESET);
+-
+-	/* Reset SB configuration register (only for lanes 0 and 1) */
+-	if (lane->id == 0 || lane->id == 1) {
+-		u32 mask, data;
+-
+-		data = PIN_RESET_CORE_BIT | PIN_RESET_COMPHY_BIT |
+-		       PIN_PU_PLL_BIT | PIN_PU_RX_BIT | PIN_PU_TX_BIT;
+-		mask = data | PIN_PU_IVREF_BIT | PIN_TX_IDLE_BIT;
+-		comphy_periph_reg_set(lane, COMPHY_PHY_CFG1, data, mask);
+-	}
+-
+-	return 0;
++	/*
++	 * The USB3 MAC sets the USB3 PHY to low state, so we do not
++	 * need to power off USB3 PHY again.
++	 */
+ }
+ 
+ static bool mvebu_a3700_comphy_check_mode(int lane,
+@@ -1171,10 +1142,6 @@ static int mvebu_a3700_comphy_set_mode(struct phy *phy, enum phy_mode mode,
+ 	    (lane->mode != mode || lane->submode != submode))
+ 		return -EBUSY;
+ 
+-	/* If changing mode, ensure reset is called */
+-	if (lane->mode != PHY_MODE_INVALID && lane->mode != mode)
+-		lane->needs_reset = true;
+-
+ 	/* Just remember the mode, ->power_on() will do the real setup */
+ 	lane->mode = mode;
+ 	lane->submode = submode;
+@@ -1185,7 +1152,6 @@ static int mvebu_a3700_comphy_set_mode(struct phy *phy, enum phy_mode mode,
+ static int mvebu_a3700_comphy_power_on(struct phy *phy)
+ {
+ 	struct mvebu_a3700_comphy_lane *lane = phy_get_drvdata(phy);
+-	int ret;
+ 
+ 	if (!mvebu_a3700_comphy_check_mode(lane->id, lane->mode,
+ 					   lane->submode)) {
+@@ -1193,14 +1159,6 @@ static int mvebu_a3700_comphy_power_on(struct phy *phy)
+ 		return -EINVAL;
+ 	}
+ 
+-	if (lane->needs_reset) {
+-		ret = mvebu_a3700_comphy_reset(phy);
+-		if (ret)
+-			return ret;
+-
+-		lane->needs_reset = false;
+-	}
+-
+ 	switch (lane->mode) {
+ 	case PHY_MODE_USB_HOST_SS:
+ 		dev_dbg(lane->dev, "set lane %d to USB3 host mode\n", lane->id);
+@@ -1224,38 +1182,28 @@ static int mvebu_a3700_comphy_power_off(struct phy *phy)
+ {
+ 	struct mvebu_a3700_comphy_lane *lane = phy_get_drvdata(phy);
+ 
+-	switch (lane->mode) {
+-	case PHY_MODE_USB_HOST_SS:
+-		/*
+-		 * The USB3 MAC sets the USB3 PHY to low state, so we do not
+-		 * need to power off USB3 PHY again.
+-		 */
+-		break;
+-
+-	case PHY_MODE_SATA:
+-		mvebu_a3700_comphy_sata_power_off(lane);
+-		break;
+-
+-	case PHY_MODE_ETHERNET:
++	switch (lane->id) {
++	case 0:
++		mvebu_a3700_comphy_usb3_power_off(lane);
+ 		mvebu_a3700_comphy_ethernet_power_off(lane);
+-		break;
+-
+-	case PHY_MODE_PCIE:
++		return 0;
++	case 1:
+ 		mvebu_a3700_comphy_pcie_power_off(lane);
+-		break;
+-
++		mvebu_a3700_comphy_ethernet_power_off(lane);
++		return 0;
++	case 2:
++		mvebu_a3700_comphy_usb3_power_off(lane);
++		mvebu_a3700_comphy_sata_power_off(lane);
++		return 0;
+ 	default:
+ 		dev_err(lane->dev, "invalid COMPHY mode\n");
+ 		return -EINVAL;
+ 	}
+-
+-	return 0;
+ }
+ 
+ static const struct phy_ops mvebu_a3700_comphy_ops = {
+ 	.power_on	= mvebu_a3700_comphy_power_on,
+ 	.power_off	= mvebu_a3700_comphy_power_off,
+-	.reset		= mvebu_a3700_comphy_reset,
+ 	.set_mode	= mvebu_a3700_comphy_set_mode,
+ 	.owner		= THIS_MODULE,
+ };
+@@ -1393,8 +1341,7 @@ static int mvebu_a3700_comphy_probe(struct platform_device *pdev)
+ 		 * To avoid relying on the bootloader/firmware configuration,
+ 		 * power off all comphys.
+ 		 */
+-		mvebu_a3700_comphy_reset(phy);
+-		lane->needs_reset = false;
++		mvebu_a3700_comphy_power_off(phy);
+ 	}
+ 
+ 	provider = devm_of_phy_provider_register(&pdev->dev,
+diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
+index ba6d787896606..e8489331f12b8 100644
+--- a/drivers/s390/block/dasd.c
++++ b/drivers/s390/block/dasd.c
+@@ -3280,7 +3280,7 @@ static int dasd_alloc_queue(struct dasd_block *block)
+ static void dasd_free_queue(struct dasd_block *block)
+ {
+ 	if (block->request_queue) {
+-		blk_cleanup_queue(block->request_queue);
++		blk_mq_destroy_queue(block->request_queue);
+ 		blk_mq_free_tag_set(&block->tag_set);
+ 		block->request_queue = NULL;
+ 	}
+diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
+index dc78a523a69f2..b6b938aa66158 100644
+--- a/drivers/s390/block/dasd_alias.c
++++ b/drivers/s390/block/dasd_alias.c
+@@ -675,12 +675,12 @@ int dasd_alias_remove_device(struct dasd_device *device)
+ struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device)
+ {
+ 	struct dasd_eckd_private *alias_priv, *private = base_device->private;
+-	struct alias_pav_group *group = private->pavgroup;
+ 	struct alias_lcu *lcu = private->lcu;
+ 	struct dasd_device *alias_device;
++	struct alias_pav_group *group;
+ 	unsigned long flags;
+ 
+-	if (!group || !lcu)
++	if (!lcu)
+ 		return NULL;
+ 	if (lcu->pav == NO_PAV ||
+ 	    lcu->flags & (NEED_UAC_UPDATE | UPDATE_PENDING))
+@@ -697,6 +697,11 @@ struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device)
+ 	}
+ 
+ 	spin_lock_irqsave(&lcu->lock, flags);
++	group = private->pavgroup;
++	if (!group) {
++		spin_unlock_irqrestore(&lcu->lock, flags);
++		return NULL;
++	}
+ 	alias_device = group->next;
+ 	if (!alias_device) {
+ 		if (list_empty(&group->aliaslist)) {
+diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
+index a7a33ebf4bbe9..5a83f0a39901b 100644
+--- a/drivers/s390/block/dasd_genhd.c
++++ b/drivers/s390/block/dasd_genhd.c
+@@ -41,8 +41,8 @@ int dasd_gendisk_alloc(struct dasd_block *block)
+ 	if (base->devindex >= DASD_PER_MAJOR)
+ 		return -EBUSY;
+ 
+-	gdp = __alloc_disk_node(block->request_queue, NUMA_NO_NODE,
+-				&dasd_bio_compl_lkclass);
++	gdp = blk_mq_alloc_disk_for_queue(block->request_queue,
++					  &dasd_bio_compl_lkclass);
+ 	if (!gdp)
+ 		return -ENOMEM;
+ 
+diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
+index 8352f90d997df..ae9a107c520d0 100644
+--- a/drivers/scsi/hosts.c
++++ b/drivers/scsi/hosts.c
+@@ -182,6 +182,15 @@ void scsi_remove_host(struct Scsi_Host *shost)
+ 	mutex_unlock(&shost->scan_mutex);
+ 	scsi_proc_host_rm(shost);
+ 
++	/*
++	 * New SCSI devices cannot be attached anymore because of the SCSI host
++	 * state so drop the tag set refcnt. Wait until the tag set refcnt drops
++	 * to zero because .exit_cmd_priv implementations may need the host
++	 * pointer.
++	 */
++	kref_put(&shost->tagset_refcnt, scsi_mq_free_tags);
++	wait_for_completion(&shost->tagset_freed);
++
+ 	spin_lock_irqsave(shost->host_lock, flags);
+ 	if (scsi_host_set_state(shost, SHOST_DEL))
+ 		BUG_ON(scsi_host_set_state(shost, SHOST_DEL_RECOVERY));
+@@ -240,6 +249,9 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
+ 	if (error)
+ 		goto fail;
+ 
++	kref_init(&shost->tagset_refcnt);
++	init_completion(&shost->tagset_freed);
++
+ 	/*
+ 	 * Increase usage count temporarily here so that calling
+ 	 * scsi_autopm_put_host() will trigger runtime idle if there is
+@@ -312,6 +324,7 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
+ 	pm_runtime_disable(&shost->shost_gendev);
+ 	pm_runtime_set_suspended(&shost->shost_gendev);
+ 	pm_runtime_put_noidle(&shost->shost_gendev);
++	kref_put(&shost->tagset_refcnt, scsi_mq_free_tags);
+  fail:
+ 	return error;
+ }
+@@ -345,9 +358,6 @@ static void scsi_host_dev_release(struct device *dev)
+ 		kfree(dev_name(&shost->shost_dev));
+ 	}
+ 
+-	if (shost->tag_set.tags)
+-		scsi_mq_destroy_tags(shost);
+-
+ 	kfree(shost->shost_data);
+ 
+ 	ida_simple_remove(&host_index_ida, shost->host_no);
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
+index 9a1ae52bb621d..a6d3471a61057 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
+@@ -2993,7 +2993,7 @@ _base_config_dma_addressing(struct MPT3SAS_ADAPTER *ioc, struct pci_dev *pdev)
+ 
+ 	if (ioc->is_mcpu_endpoint ||
+ 	    sizeof(dma_addr_t) == 4 || ioc->use_32bit_dma ||
+-	    dma_get_required_mask(&pdev->dev) <= 32)
++	    dma_get_required_mask(&pdev->dev) <= DMA_BIT_MASK(32))
+ 		ioc->dma_mask = 32;
+ 	/* Set 63 bit DMA mask for all SAS3 and SAS35 controllers */
+ 	else if (ioc->hba_mpi_version_belonged > MPI2_VERSION)
+diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
+index 62666df1a59eb..4acff4e84b909 100644
+--- a/drivers/scsi/qla2xxx/qla_target.c
++++ b/drivers/scsi/qla2xxx/qla_target.c
+@@ -2151,8 +2151,10 @@ static int __qlt_24xx_handle_abts(struct scsi_qla_host *vha,
+ 
+ 	abort_cmd = ha->tgt.tgt_ops->find_cmd_by_tag(sess,
+ 				le32_to_cpu(abts->exchange_addr_to_abort));
+-	if (!abort_cmd)
++	if (!abort_cmd) {
++		mempool_free(mcmd, qla_tgt_mgmt_cmd_mempool);
+ 		return -EIO;
++	}
+ 	mcmd->unpacked_lun = abort_cmd->se_cmd.orig_fe_lun;
+ 
+ 	if (abort_cmd->qpair) {
+diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
+index f5c876d03c1ad..7e990f7a9f164 100644
+--- a/drivers/scsi/scsi_lib.c
++++ b/drivers/scsi/scsi_lib.c
+@@ -168,7 +168,7 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy)
+ 	 * Requeue this command.  It will go before all other commands
+ 	 * that are already in the queue. Schedule requeue work under
+ 	 * lock such that the kblockd_schedule_work() call happens
+-	 * before blk_cleanup_queue() finishes.
++	 * before blk_mq_destroy_queue() finishes.
+ 	 */
+ 	cmd->result = 0;
+ 
+@@ -429,9 +429,9 @@ static void scsi_starved_list_run(struct Scsi_Host *shost)
+ 		 * it and the queue.  Mitigate by taking a reference to the
+ 		 * queue and never touching the sdev again after we drop the
+ 		 * host lock.  Note: if __scsi_remove_device() invokes
+-		 * blk_cleanup_queue() before the queue is run from this
++		 * blk_mq_destroy_queue() before the queue is run from this
+ 		 * function then blk_run_queue() will return immediately since
+-		 * blk_cleanup_queue() marks the queue with QUEUE_FLAG_DYING.
++		 * blk_mq_destroy_queue() marks the queue with QUEUE_FLAG_DYING.
+ 		 */
+ 		slq = sdev->request_queue;
+ 		if (!blk_get_queue(slq))
+@@ -1995,9 +1995,13 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
+ 	return blk_mq_alloc_tag_set(tag_set);
+ }
+ 
+-void scsi_mq_destroy_tags(struct Scsi_Host *shost)
++void scsi_mq_free_tags(struct kref *kref)
+ {
++	struct Scsi_Host *shost = container_of(kref, typeof(*shost),
++					       tagset_refcnt);
++
+ 	blk_mq_free_tag_set(&shost->tag_set);
++	complete(&shost->tagset_freed);
+ }
+ 
+ /**
+diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
+index 5c4786310a31d..a0ee31d55f5f1 100644
+--- a/drivers/scsi/scsi_priv.h
++++ b/drivers/scsi/scsi_priv.h
+@@ -94,7 +94,7 @@ extern void scsi_run_host_queues(struct Scsi_Host *shost);
+ extern void scsi_requeue_run_queue(struct work_struct *work);
+ extern void scsi_start_queue(struct scsi_device *sdev);
+ extern int scsi_mq_setup_tags(struct Scsi_Host *shost);
+-extern void scsi_mq_destroy_tags(struct Scsi_Host *shost);
++extern void scsi_mq_free_tags(struct kref *kref);
+ extern void scsi_exit_queue(void);
+ extern void scsi_evt_thread(struct work_struct *work);
+ 
+diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
+index 91ac901a66826..5d27f5196de6f 100644
+--- a/drivers/scsi/scsi_scan.c
++++ b/drivers/scsi/scsi_scan.c
+@@ -340,6 +340,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
+ 		kfree(sdev);
+ 		goto out;
+ 	}
++	kref_get(&sdev->host->tagset_refcnt);
+ 	sdev->request_queue = q;
+ 	q->queuedata = sdev;
+ 	__scsi_init_queue(sdev->host, q);
+diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
+index 43949798a2e47..5d61f58399dca 100644
+--- a/drivers/scsi/scsi_sysfs.c
++++ b/drivers/scsi/scsi_sysfs.c
+@@ -1475,7 +1475,8 @@ void __scsi_remove_device(struct scsi_device *sdev)
+ 	scsi_device_set_state(sdev, SDEV_DEL);
+ 	mutex_unlock(&sdev->state_mutex);
+ 
+-	blk_cleanup_queue(sdev->request_queue);
++	blk_mq_destroy_queue(sdev->request_queue);
++	kref_put(&sdev->host->tagset_refcnt, scsi_mq_free_tags);
+ 	cancel_work_sync(&sdev->requeue_work);
+ 
+ 	if (sdev->host->hostt->slave_destroy)
+diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
+index a1a2ac09066fd..cb587e488601c 100644
+--- a/drivers/scsi/sd.c
++++ b/drivers/scsi/sd.c
+@@ -3440,8 +3440,8 @@ static int sd_probe(struct device *dev)
+ 	if (!sdkp)
+ 		goto out;
+ 
+-	gd = __alloc_disk_node(sdp->request_queue, NUMA_NO_NODE,
+-			       &sd_bio_compl_lkclass);
++	gd = blk_mq_alloc_disk_for_queue(sdp->request_queue,
++					 &sd_bio_compl_lkclass);
+ 	if (!gd)
+ 		goto out_free;
+ 
+diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
+index 32d3b8274f148..a278b739d0c5f 100644
+--- a/drivers/scsi/sr.c
++++ b/drivers/scsi/sr.c
+@@ -624,8 +624,8 @@ static int sr_probe(struct device *dev)
+ 	if (!cd)
+ 		goto fail;
+ 
+-	disk = __alloc_disk_node(sdev->request_queue, NUMA_NO_NODE,
+-				 &sr_bio_compl_lkclass);
++	disk = blk_mq_alloc_disk_for_queue(sdev->request_queue,
++					   &sr_bio_compl_lkclass);
+ 	if (!disk)
+ 		goto fail_free;
+ 	mutex_init(&cd->lock);
+diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c
+index fff0c740c8f33..6f088dd0ba4f3 100644
+--- a/drivers/thunderbolt/icm.c
++++ b/drivers/thunderbolt/icm.c
+@@ -2527,6 +2527,7 @@ struct tb *icm_probe(struct tb_nhi *nhi)
+ 		tb->cm_ops = &icm_icl_ops;
+ 		break;
+ 
++	case PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_2C_NHI:
+ 	case PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_4C_NHI:
+ 		icm->is_supported = icm_tgl_is_supported;
+ 		icm->get_mode = icm_ar_get_mode;
+diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h
+index 69083aab2736c..5091677b3f4ba 100644
+--- a/drivers/thunderbolt/nhi.h
++++ b/drivers/thunderbolt/nhi.h
+@@ -55,6 +55,7 @@ extern const struct tb_nhi_ops icl_nhi_ops;
+  * need for the PCI quirk anymore as we will use ICM also on Apple
+  * hardware.
+  */
++#define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_2C_NHI		0x1134
+ #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_4C_NHI		0x1137
+ #define PCI_DEVICE_ID_INTEL_WIN_RIDGE_2C_NHI            0x157d
+ #define PCI_DEVICE_ID_INTEL_WIN_RIDGE_2C_BRIDGE         0x157e
+diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
+index 2945c1b890880..cb83c66bd8a82 100644
+--- a/drivers/tty/serial/fsl_lpuart.c
++++ b/drivers/tty/serial/fsl_lpuart.c
+@@ -2706,14 +2706,15 @@ static int lpuart_probe(struct platform_device *pdev)
+ 		lpuart_reg.cons = LPUART_CONSOLE;
+ 		handler = lpuart_int;
+ 	}
+-	ret = uart_add_one_port(&lpuart_reg, &sport->port);
+-	if (ret)
+-		goto failed_attach_port;
+ 
+ 	ret = lpuart_global_reset(sport);
+ 	if (ret)
+ 		goto failed_reset;
+ 
++	ret = uart_add_one_port(&lpuart_reg, &sport->port);
++	if (ret)
++		goto failed_attach_port;
++
+ 	ret = uart_get_rs485_mode(&sport->port);
+ 	if (ret)
+ 		goto failed_get_rs485;
+@@ -2736,9 +2737,9 @@ static int lpuart_probe(struct platform_device *pdev)
+ 
+ failed_irq_request:
+ failed_get_rs485:
+-failed_reset:
+ 	uart_remove_one_port(&lpuart_reg, &sport->port);
+ failed_attach_port:
++failed_reset:
+ 	lpuart_disable_clks(sport);
+ 	return ret;
+ }
+diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c
+index d942ab152f5a4..24aa1dcc5ef7a 100644
+--- a/drivers/tty/serial/serial-tegra.c
++++ b/drivers/tty/serial/serial-tegra.c
+@@ -525,7 +525,7 @@ static void tegra_uart_tx_dma_complete(void *args)
+ 	count = tup->tx_bytes_requested - state.residue;
+ 	async_tx_ack(tup->tx_dma_desc);
+ 	spin_lock_irqsave(&tup->uport.lock, flags);
+-	xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
++	uart_xmit_advance(&tup->uport, count);
+ 	tup->tx_in_progress = 0;
+ 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+ 		uart_write_wakeup(&tup->uport);
+@@ -613,7 +613,6 @@ static unsigned int tegra_uart_tx_empty(struct uart_port *u)
+ static void tegra_uart_stop_tx(struct uart_port *u)
+ {
+ 	struct tegra_uart_port *tup = to_tegra_uport(u);
+-	struct circ_buf *xmit = &tup->uport.state->xmit;
+ 	struct dma_tx_state state;
+ 	unsigned int count;
+ 
+@@ -624,7 +623,7 @@ static void tegra_uart_stop_tx(struct uart_port *u)
+ 	dmaengine_tx_status(tup->tx_dma_chan, tup->tx_cookie, &state);
+ 	count = tup->tx_bytes_requested - state.residue;
+ 	async_tx_ack(tup->tx_dma_desc);
+-	xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
++	uart_xmit_advance(&tup->uport, count);
+ 	tup->tx_in_progress = 0;
+ }
+ 
+diff --git a/drivers/tty/serial/tegra-tcu.c b/drivers/tty/serial/tegra-tcu.c
+index 4877c54c613d1..889b701ba7c62 100644
+--- a/drivers/tty/serial/tegra-tcu.c
++++ b/drivers/tty/serial/tegra-tcu.c
+@@ -101,7 +101,7 @@ static void tegra_tcu_uart_start_tx(struct uart_port *port)
+ 			break;
+ 
+ 		tegra_tcu_write(tcu, &xmit->buf[xmit->tail], count);
+-		xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
++		uart_xmit_advance(port, count);
+ 	}
+ 
+ 	uart_write_wakeup(port);
+diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
+index 829da9cb14a86..55bb0d0422d52 100644
+--- a/drivers/ufs/core/ufshcd.c
++++ b/drivers/ufs/core/ufshcd.c
+@@ -9519,7 +9519,7 @@ void ufshcd_remove(struct ufs_hba *hba)
+ 	ufs_bsg_remove(hba);
+ 	ufshpb_remove(hba);
+ 	ufs_sysfs_remove_nodes(hba->dev);
+-	blk_cleanup_queue(hba->tmf_queue);
++	blk_mq_destroy_queue(hba->tmf_queue);
+ 	blk_mq_free_tag_set(&hba->tmf_tag_set);
+ 	scsi_remove_host(hba->host);
+ 	/* disable interrupts */
+@@ -9815,7 +9815,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
+ 	return 0;
+ 
+ free_tmf_queue:
+-	blk_cleanup_queue(hba->tmf_queue);
++	blk_mq_destroy_queue(hba->tmf_queue);
+ free_tmf_tag_set:
+ 	blk_mq_free_tag_set(&hba->tmf_tag_set);
+ out_remove_scsi_host:
+diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
+index dfef85a18eb55..80b29f937c605 100644
+--- a/drivers/usb/core/hub.c
++++ b/drivers/usb/core/hub.c
+@@ -6049,7 +6049,7 @@ re_enumerate:
+  *
+  * Return: The same as for usb_reset_and_verify_device().
+  * However, if a reset is already in progress (for instance, if a
+- * driver doesn't have pre_ or post_reset() callbacks, and while
++ * driver doesn't have pre_reset() or post_reset() callbacks, and while
+  * being unbound or re-bound during the ongoing reset its disconnect()
+  * or probe() routine tries to perform a second, nested reset), the
+  * routine returns -EINPROGRESS.
+diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
+index 1db9f51f98aef..08ca65ffe57b7 100644
+--- a/drivers/usb/dwc3/core.c
++++ b/drivers/usb/dwc3/core.c
+@@ -1718,12 +1718,6 @@ static int dwc3_probe(struct platform_device *pdev)
+ 
+ 	dwc3_get_properties(dwc);
+ 
+-	if (!dwc->sysdev_is_parent) {
+-		ret = dma_set_mask_and_coherent(dwc->sysdev, DMA_BIT_MASK(64));
+-		if (ret)
+-			return ret;
+-	}
+-
+ 	dwc->reset = devm_reset_control_array_get_optional_shared(dev);
+ 	if (IS_ERR(dwc->reset))
+ 		return PTR_ERR(dwc->reset);
+@@ -1789,6 +1783,13 @@ static int dwc3_probe(struct platform_device *pdev)
+ 	platform_set_drvdata(pdev, dwc);
+ 	dwc3_cache_hwparams(dwc);
+ 
++	if (!dwc->sysdev_is_parent &&
++	    DWC3_GHWPARAMS0_AWIDTH(dwc->hwparams.hwparams0) == 64) {
++		ret = dma_set_mask_and_coherent(dwc->sysdev, DMA_BIT_MASK(64));
++		if (ret)
++			goto disable_clks;
++	}
++
+ 	spin_lock_init(&dwc->lock);
+ 	mutex_init(&dwc->mutex);
+ 
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index a5e8374a8d710..697683e3fbffa 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -256,6 +256,7 @@ static void option_instat_callback(struct urb *urb);
+ #define QUECTEL_PRODUCT_EM060K			0x030b
+ #define QUECTEL_PRODUCT_EM12			0x0512
+ #define QUECTEL_PRODUCT_RM500Q			0x0800
++#define QUECTEL_PRODUCT_RM520N			0x0801
+ #define QUECTEL_PRODUCT_EC200S_CN		0x6002
+ #define QUECTEL_PRODUCT_EC200T			0x6026
+ #define QUECTEL_PRODUCT_RM500K			0x7001
+@@ -1138,6 +1139,8 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0xff, 0xff),
+ 	  .driver_info = NUMEP2 },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0, 0) },
++	{ USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, 0x0203, 0xff), /* BG95-M3 */
++	  .driver_info = ZLP },
+ 	{ USB_DEVICE(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_BG96),
+ 	  .driver_info = RSVD(4) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff),
+@@ -1159,6 +1162,9 @@ static const struct usb_device_id option_ids[] = {
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10),
+ 	  .driver_info = ZLP },
++	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0xff, 0x30) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0x40) },
++	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) },
+ 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) },
+diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
+index d5f3f763717ea..d4b2519257962 100644
+--- a/drivers/xen/xenbus/xenbus_client.c
++++ b/drivers/xen/xenbus/xenbus_client.c
+@@ -382,9 +382,10 @@ int xenbus_setup_ring(struct xenbus_device *dev, gfp_t gfp, void **vaddr,
+ 	unsigned long ring_size = nr_pages * XEN_PAGE_SIZE;
+ 	grant_ref_t gref_head;
+ 	unsigned int i;
++	void *addr;
+ 	int ret;
+ 
+-	*vaddr = alloc_pages_exact(ring_size, gfp | __GFP_ZERO);
++	addr = *vaddr = alloc_pages_exact(ring_size, gfp | __GFP_ZERO);
+ 	if (!*vaddr) {
+ 		ret = -ENOMEM;
+ 		goto err;
+@@ -401,13 +402,15 @@ int xenbus_setup_ring(struct xenbus_device *dev, gfp_t gfp, void **vaddr,
+ 		unsigned long gfn;
+ 
+ 		if (is_vmalloc_addr(*vaddr))
+-			gfn = pfn_to_gfn(vmalloc_to_pfn(vaddr[i]));
++			gfn = pfn_to_gfn(vmalloc_to_pfn(addr));
+ 		else
+-			gfn = virt_to_gfn(vaddr[i]);
++			gfn = virt_to_gfn(addr);
+ 
+ 		grefs[i] = gnttab_claim_grant_reference(&gref_head);
+ 		gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id,
+ 						gfn, 0);
++
++		addr += XEN_PAGE_SIZE;
+ 	}
+ 
+ 	return 0;
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 781952c5a5c23..20ad619a8a973 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -4586,6 +4586,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
+ 
+ 	set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
+ 
++	/*
++	 * If we had UNFINISHED_DROPS we could still be processing them, so
++	 * clear that bit and wake up relocation so it can stop.
++	 * We must do this before stopping the block group reclaim task, because
++	 * at btrfs_relocate_block_group() we wait for this bit, and after the
++	 * wait we stop with -EINTR if btrfs_fs_closing() returns non-zero - we
++	 * have just set BTRFS_FS_CLOSING_START, so btrfs_fs_closing() will
++	 * return 1.
++	 */
++	btrfs_wake_unfinished_drop(fs_info);
++
+ 	/*
+ 	 * We may have the reclaim task running and relocating a data block group,
+ 	 * in which case it may create delayed iputs. So stop it before we park
+@@ -4604,12 +4615,6 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
+ 	 */
+ 	kthread_park(fs_info->cleaner_kthread);
+ 
+-	/*
+-	 * If we had UNFINISHED_DROPS we could still be processing them, so
+-	 * clear that bit and wake up relocation so it can stop.
+-	 */
+-	btrfs_wake_unfinished_drop(fs_info);
+-
+ 	/* wait for the qgroup rescan worker to stop */
+ 	btrfs_qgroup_wait_for_completion(fs_info, false);
+ 
+@@ -4632,6 +4637,31 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
+ 	/* clear out the rbtree of defraggable inodes */
+ 	btrfs_cleanup_defrag_inodes(fs_info);
+ 
++	/*
++	 * After we parked the cleaner kthread, ordered extents may have
++	 * completed and created new delayed iputs. If one of the async reclaim
++	 * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
++	 * can hang forever trying to stop it, because if a delayed iput is
++	 * added after it ran btrfs_run_delayed_iputs() and before it called
++	 * btrfs_wait_on_delayed_iputs(), it will hang forever since there is
++	 * no one else to run iputs.
++	 *
++	 * So wait for all ongoing ordered extents to complete and then run
++	 * delayed iputs. This works because once we reach this point no one
++	 * can either create new ordered extents nor create delayed iputs
++	 * through some other means.
++	 *
++	 * Also note that btrfs_wait_ordered_roots() is not safe here, because
++	 * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
++	 * but the delayed iput for the respective inode is made only when doing
++	 * the final btrfs_put_ordered_extent() (which must happen at
++	 * btrfs_finish_ordered_io() when we are unmounting).
++	 */
++	btrfs_flush_workqueue(fs_info->endio_write_workers);
++	/* Ordered extents for free space inodes. */
++	btrfs_flush_workqueue(fs_info->endio_freespace_worker);
++	btrfs_run_delayed_iputs(fs_info);
++
+ 	cancel_work_sync(&fs_info->async_reclaim_work);
+ 	cancel_work_sync(&fs_info->async_data_reclaim_work);
+ 	cancel_work_sync(&fs_info->preempt_reclaim_work);
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 1386362fad3b8..4448b7b6ea221 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1918,10 +1918,44 @@ out_unlock:
+ 	return ret;
+ }
+ 
++static void wait_eb_writebacks(struct btrfs_block_group *block_group)
++{
++	struct btrfs_fs_info *fs_info = block_group->fs_info;
++	const u64 end = block_group->start + block_group->length;
++	struct radix_tree_iter iter;
++	struct extent_buffer *eb;
++	void __rcu **slot;
++
++	rcu_read_lock();
++	radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter,
++				 block_group->start >> fs_info->sectorsize_bits) {
++		eb = radix_tree_deref_slot(slot);
++		if (!eb)
++			continue;
++		if (radix_tree_deref_retry(eb)) {
++			slot = radix_tree_iter_retry(&iter);
++			continue;
++		}
++
++		if (eb->start < block_group->start)
++			continue;
++		if (eb->start >= end)
++			break;
++
++		slot = radix_tree_iter_resume(slot, &iter);
++		rcu_read_unlock();
++		wait_on_extent_buffer_writeback(eb);
++		rcu_read_lock();
++	}
++	rcu_read_unlock();
++}
++
+ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written)
+ {
+ 	struct btrfs_fs_info *fs_info = block_group->fs_info;
+ 	struct map_lookup *map;
++	const bool is_metadata = (block_group->flags &
++			(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM));
+ 	int ret = 0;
+ 	int i;
+ 
+@@ -1932,8 +1966,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
+ 	}
+ 
+ 	/* Check if we have unwritten allocated space */
+-	if ((block_group->flags &
+-	     (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
++	if (is_metadata &&
+ 	    block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) {
+ 		spin_unlock(&block_group->lock);
+ 		return -EAGAIN;
+@@ -1958,6 +1991,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
+ 		/* No need to wait for NOCOW writers. Zoned mode does not allow that */
+ 		btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
+ 					 block_group->length);
++		/* Wait for extent buffers to be written. */
++		if (is_metadata)
++			wait_eb_writebacks(block_group);
+ 
+ 		spin_lock(&block_group->lock);
+ 
+diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
+index 8f2e003e05907..97278c43f8dc0 100644
+--- a/fs/cifs/cifsfs.c
++++ b/fs/cifs/cifsfs.c
+@@ -1232,6 +1232,12 @@ ssize_t cifs_file_copychunk_range(unsigned int xid,
+ 	lock_two_nondirectories(target_inode, src_inode);
+ 
+ 	cifs_dbg(FYI, "about to flush pages\n");
++
++	rc = filemap_write_and_wait_range(src_inode->i_mapping, off,
++					  off + len - 1);
++	if (rc)
++		goto out;
++
+ 	/* should we flush first and last page first */
+ 	truncate_inode_pages(&target_inode->i_data, 0);
+ 
+diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
+index e8a8daa82ed76..cc180d37b8ce1 100644
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -1886,17 +1886,8 @@ smb2_copychunk_range(const unsigned int xid,
+ 	int chunks_copied = 0;
+ 	bool chunk_sizes_updated = false;
+ 	ssize_t bytes_written, total_bytes_written = 0;
+-	struct inode *inode;
+ 
+ 	pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL);
+-
+-	/*
+-	 * We need to flush all unwritten data before we can send the
+-	 * copychunk ioctl to the server.
+-	 */
+-	inode = d_inode(trgtfile->dentry);
+-	filemap_write_and_wait(inode->i_mapping);
+-
+ 	if (pcchunk == NULL)
+ 		return -ENOMEM;
+ 
+@@ -3961,39 +3952,50 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon,
+ {
+ 	int rc;
+ 	unsigned int xid;
+-	struct inode *inode;
++	struct inode *inode = file_inode(file);
+ 	struct cifsFileInfo *cfile = file->private_data;
+-	struct cifsInodeInfo *cifsi;
++	struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ 	__le64 eof;
++	loff_t old_eof;
+ 
+ 	xid = get_xid();
+ 
+-	inode = d_inode(cfile->dentry);
+-	cifsi = CIFS_I(inode);
++	inode_lock(inode);
+ 
+-	if (off >= i_size_read(inode) ||
+-	    off + len >= i_size_read(inode)) {
++	old_eof = i_size_read(inode);
++	if ((off >= old_eof) ||
++	    off + len >= old_eof) {
+ 		rc = -EINVAL;
+ 		goto out;
+ 	}
+ 
++	filemap_invalidate_lock(inode->i_mapping);
++	rc = filemap_write_and_wait_range(inode->i_mapping, off, old_eof - 1);
++	if (rc < 0)
++		goto out_2;
++
++	truncate_pagecache_range(inode, off, old_eof);
++
+ 	rc = smb2_copychunk_range(xid, cfile, cfile, off + len,
+-				  i_size_read(inode) - off - len, off);
++				  old_eof - off - len, off);
+ 	if (rc < 0)
+-		goto out;
++		goto out_2;
+ 
+-	eof = cpu_to_le64(i_size_read(inode) - len);
++	eof = cpu_to_le64(old_eof - len);
+ 	rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
+ 			  cfile->fid.volatile_fid, cfile->pid, &eof);
+ 	if (rc < 0)
+-		goto out;
++		goto out_2;
+ 
+ 	rc = 0;
+ 
+ 	cifsi->server_eof = i_size_read(inode) - len;
+ 	truncate_setsize(inode, cifsi->server_eof);
+ 	fscache_resize_cookie(cifs_inode_cookie(inode), cifsi->server_eof);
++out_2:
++	filemap_invalidate_unlock(inode->i_mapping);
+  out:
++	inode_unlock(inode);
+ 	free_xid(xid);
+ 	return rc;
+ }
+@@ -4004,34 +4006,47 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon,
+ 	int rc;
+ 	unsigned int xid;
+ 	struct cifsFileInfo *cfile = file->private_data;
++	struct inode *inode = file_inode(file);
+ 	__le64 eof;
+-	__u64  count;
++	__u64  count, old_eof;
+ 
+ 	xid = get_xid();
+ 
+-	if (off >= i_size_read(file->f_inode)) {
++	inode_lock(inode);
++
++	old_eof = i_size_read(inode);
++	if (off >= old_eof) {
+ 		rc = -EINVAL;
+ 		goto out;
+ 	}
+ 
+-	count = i_size_read(file->f_inode) - off;
+-	eof = cpu_to_le64(i_size_read(file->f_inode) + len);
++	count = old_eof - off;
++	eof = cpu_to_le64(old_eof + len);
++
++	filemap_invalidate_lock(inode->i_mapping);
++	rc = filemap_write_and_wait_range(inode->i_mapping, off, old_eof + len - 1);
++	if (rc < 0)
++		goto out_2;
++	truncate_pagecache_range(inode, off, old_eof);
+ 
+ 	rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
+ 			  cfile->fid.volatile_fid, cfile->pid, &eof);
+ 	if (rc < 0)
+-		goto out;
++		goto out_2;
+ 
+ 	rc = smb2_copychunk_range(xid, cfile, cfile, off, count, off + len);
+ 	if (rc < 0)
+-		goto out;
++		goto out_2;
+ 
+-	rc = smb3_zero_range(file, tcon, off, len, 1);
++	rc = smb3_zero_data(file, tcon, off, len, xid);
+ 	if (rc < 0)
+-		goto out;
++		goto out_2;
+ 
+ 	rc = 0;
++out_2:
++	filemap_invalidate_unlock(inode->i_mapping);
+  out:
++	inode_unlock(inode);
+ 	free_xid(xid);
+ 	return rc;
+ }
+diff --git a/fs/dax.c b/fs/dax.c
+index 4155a6107fa10..7ab248ed21aa3 100644
+--- a/fs/dax.c
++++ b/fs/dax.c
+@@ -1241,6 +1241,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
+ 	loff_t done = 0;
+ 	int ret;
+ 
++	if (!iomi.len)
++		return 0;
++
+ 	if (iov_iter_rw(iter) == WRITE) {
+ 		lockdep_assert_held_write(&iomi.inode->i_rwsem);
+ 		iomi.flags |= IOMAP_WRITE;
+diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
+index 9de6a6b844c9e..e541a004f8efa 100644
+--- a/fs/exfat/fatent.c
++++ b/fs/exfat/fatent.c
+@@ -270,8 +270,7 @@ int exfat_zeroed_cluster(struct inode *dir, unsigned int clu)
+ 	struct super_block *sb = dir->i_sb;
+ 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+ 	struct buffer_head *bh;
+-	sector_t blknr, last_blknr;
+-	int i;
++	sector_t blknr, last_blknr, i;
+ 
+ 	blknr = exfat_cluster_to_sector(sbi, clu);
+ 	last_blknr = blknr + sbi->sect_per_clus;
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index adfc30ee4b7be..0d86931269bfc 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -167,8 +167,6 @@ enum SHIFT_DIRECTION {
+ #define EXT4_MB_CR0_OPTIMIZED		0x8000
+ /* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
+ #define EXT4_MB_CR1_OPTIMIZED		0x00010000
+-/* Perform linear traversal for one group */
+-#define EXT4_MB_SEARCH_NEXT_LINEAR	0x00020000
+ struct ext4_allocation_request {
+ 	/* target inode for block we're allocating */
+ 	struct inode *inode;
+@@ -1589,8 +1587,8 @@ struct ext4_sb_info {
+ 	struct list_head s_discard_list;
+ 	struct work_struct s_discard_work;
+ 	atomic_t s_retry_alloc_pending;
+-	struct rb_root s_mb_avg_fragment_size_root;
+-	rwlock_t s_mb_rb_lock;
++	struct list_head *s_mb_avg_fragment_size;
++	rwlock_t *s_mb_avg_fragment_size_locks;
+ 	struct list_head *s_mb_largest_free_orders;
+ 	rwlock_t *s_mb_largest_free_orders_locks;
+ 
+@@ -3402,6 +3400,8 @@ struct ext4_group_info {
+ 	ext4_grpblk_t	bb_first_free;	/* first free block */
+ 	ext4_grpblk_t	bb_free;	/* total free blocks */
+ 	ext4_grpblk_t	bb_fragments;	/* nr of freespace fragments */
++	int		bb_avg_fragment_size_order;	/* order of average
++							   fragment in BG */
+ 	ext4_grpblk_t	bb_largest_free_order;/* order of largest frag in BG */
+ 	ext4_group_t	bb_group;	/* Group number */
+ 	struct          list_head bb_prealloc_list;
+@@ -3409,7 +3409,7 @@ struct ext4_group_info {
+ 	void            *bb_bitmap;
+ #endif
+ 	struct rw_semaphore alloc_sem;
+-	struct rb_node	bb_avg_fragment_size_rb;
++	struct list_head bb_avg_fragment_size_node;
+ 	struct list_head bb_largest_free_order_node;
+ 	ext4_grpblk_t	bb_counters[];	/* Nr of free power-of-two-block
+ 					 * regions, index is order.
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index c148bb97b5273..5235974126bd3 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -460,6 +460,10 @@ static int __ext4_ext_check(const char *function, unsigned int line,
+ 		error_msg = "invalid eh_entries";
+ 		goto corrupted;
+ 	}
++	if (unlikely((eh->eh_entries == 0) && (depth > 0))) {
++		error_msg = "eh_entries is 0 but eh_depth is > 0";
++		goto corrupted;
++	}
+ 	if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
+ 		error_msg = "invalid extent entries";
+ 		goto corrupted;
+diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
+index f73e5eb43eae1..208b87ce88588 100644
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -510,7 +510,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
+ 		goto fallback;
+ 	}
+ 
+-	max_dirs = ndirs / ngroups + inodes_per_group / 16;
++	max_dirs = ndirs / ngroups + inodes_per_group*flex_size / 16;
+ 	min_inodes = avefreei - inodes_per_group*flex_size / 4;
+ 	if (min_inodes < 1)
+ 		min_inodes = 1;
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index 38e7dc2531b17..fd29e15d1c3b5 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -140,13 +140,15 @@
+  *    number of buddy bitmap orders possible) number of lists. Group-infos are
+  *    placed in appropriate lists.
+  *
+- * 2) Average fragment size rb tree (sbi->s_mb_avg_fragment_size_root)
++ * 2) Average fragment size lists (sbi->s_mb_avg_fragment_size)
+  *
+- *    Locking: sbi->s_mb_rb_lock (rwlock)
++ *    Locking: sbi->s_mb_avg_fragment_size_locks(array of rw locks)
+  *
+- *    This is a red black tree consisting of group infos and the tree is sorted
+- *    by average fragment sizes (which is calculated as ext4_group_info->bb_free
+- *    / ext4_group_info->bb_fragments).
++ *    This is an array of lists where in the i-th list there are groups with
++ *    average fragment size >= 2^i and < 2^(i+1). The average fragment size
++ *    is computed as ext4_group_info->bb_free / ext4_group_info->bb_fragments.
++ *    Note that we don't bother with a special list for completely empty groups
++ *    so we only have MB_NUM_ORDERS(sb) lists.
+  *
+  * When "mb_optimize_scan" mount option is set, mballoc consults the above data
+  * structures to decide the order in which groups are to be traversed for
+@@ -160,7 +162,8 @@
+  *
+  * At CR = 1, we only consider groups where average fragment size > request
+  * size. So, we lookup a group which has average fragment size just above or
+- * equal to request size using our rb tree (data structure 2) in O(log N) time.
++ * equal to request size using our average fragment size group lists (data
++ * structure 2) in O(1) time.
+  *
+  * If "mb_optimize_scan" mount option is not set, mballoc traverses groups in
+  * linear order which requires O(N) search time for each CR 0 and CR 1 phase.
+@@ -802,65 +805,51 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
+ 	}
+ }
+ 
+-static void ext4_mb_rb_insert(struct rb_root *root, struct rb_node *new,
+-			int (*cmp)(struct rb_node *, struct rb_node *))
++static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
+ {
+-	struct rb_node **iter = &root->rb_node, *parent = NULL;
++	int order;
+ 
+-	while (*iter) {
+-		parent = *iter;
+-		if (cmp(new, *iter) > 0)
+-			iter = &((*iter)->rb_left);
+-		else
+-			iter = &((*iter)->rb_right);
+-	}
+-
+-	rb_link_node(new, parent, iter);
+-	rb_insert_color(new, root);
+-}
+-
+-static int
+-ext4_mb_avg_fragment_size_cmp(struct rb_node *rb1, struct rb_node *rb2)
+-{
+-	struct ext4_group_info *grp1 = rb_entry(rb1,
+-						struct ext4_group_info,
+-						bb_avg_fragment_size_rb);
+-	struct ext4_group_info *grp2 = rb_entry(rb2,
+-						struct ext4_group_info,
+-						bb_avg_fragment_size_rb);
+-	int num_frags_1, num_frags_2;
+-
+-	num_frags_1 = grp1->bb_fragments ?
+-		grp1->bb_free / grp1->bb_fragments : 0;
+-	num_frags_2 = grp2->bb_fragments ?
+-		grp2->bb_free / grp2->bb_fragments : 0;
+-
+-	return (num_frags_2 - num_frags_1);
++	/*
++	 * We don't bother with a special lists groups with only 1 block free
++	 * extents and for completely empty groups.
++	 */
++	order = fls(len) - 2;
++	if (order < 0)
++		return 0;
++	if (order == MB_NUM_ORDERS(sb))
++		order--;
++	return order;
+ }
+ 
+-/*
+- * Reinsert grpinfo into the avg_fragment_size tree with new average
+- * fragment size.
+- */
++/* Move group to appropriate avg_fragment_size list */
+ static void
+ mb_update_avg_fragment_size(struct super_block *sb, struct ext4_group_info *grp)
+ {
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
++	int new_order;
+ 
+ 	if (!test_opt2(sb, MB_OPTIMIZE_SCAN) || grp->bb_free == 0)
+ 		return;
+ 
+-	write_lock(&sbi->s_mb_rb_lock);
+-	if (!RB_EMPTY_NODE(&grp->bb_avg_fragment_size_rb)) {
+-		rb_erase(&grp->bb_avg_fragment_size_rb,
+-				&sbi->s_mb_avg_fragment_size_root);
+-		RB_CLEAR_NODE(&grp->bb_avg_fragment_size_rb);
+-	}
++	new_order = mb_avg_fragment_size_order(sb,
++					grp->bb_free / grp->bb_fragments);
++	if (new_order == grp->bb_avg_fragment_size_order)
++		return;
+ 
+-	ext4_mb_rb_insert(&sbi->s_mb_avg_fragment_size_root,
+-		&grp->bb_avg_fragment_size_rb,
+-		ext4_mb_avg_fragment_size_cmp);
+-	write_unlock(&sbi->s_mb_rb_lock);
++	if (grp->bb_avg_fragment_size_order != -1) {
++		write_lock(&sbi->s_mb_avg_fragment_size_locks[
++					grp->bb_avg_fragment_size_order]);
++		list_del(&grp->bb_avg_fragment_size_node);
++		write_unlock(&sbi->s_mb_avg_fragment_size_locks[
++					grp->bb_avg_fragment_size_order]);
++	}
++	grp->bb_avg_fragment_size_order = new_order;
++	write_lock(&sbi->s_mb_avg_fragment_size_locks[
++					grp->bb_avg_fragment_size_order]);
++	list_add_tail(&grp->bb_avg_fragment_size_node,
++		&sbi->s_mb_avg_fragment_size[grp->bb_avg_fragment_size_order]);
++	write_unlock(&sbi->s_mb_avg_fragment_size_locks[
++					grp->bb_avg_fragment_size_order]);
+ }
+ 
+ /*
+@@ -909,86 +898,55 @@ static void ext4_mb_choose_next_group_cr0(struct ext4_allocation_context *ac,
+ 		*new_cr = 1;
+ 	} else {
+ 		*group = grp->bb_group;
+-		ac->ac_last_optimal_group = *group;
+ 		ac->ac_flags |= EXT4_MB_CR0_OPTIMIZED;
+ 	}
+ }
+ 
+ /*
+- * Choose next group by traversing average fragment size tree. Updates *new_cr
+- * if cr lvel needs an update. Sets EXT4_MB_SEARCH_NEXT_LINEAR to indicate that
+- * the linear search should continue for one iteration since there's lock
+- * contention on the rb tree lock.
++ * Choose next group by traversing average fragment size list of suitable
++ * order. Updates *new_cr if cr level needs an update.
+  */
+ static void ext4_mb_choose_next_group_cr1(struct ext4_allocation_context *ac,
+ 		int *new_cr, ext4_group_t *group, ext4_group_t ngroups)
+ {
+ 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+-	int avg_fragment_size, best_so_far;
+-	struct rb_node *node, *found;
+-	struct ext4_group_info *grp;
+-
+-	/*
+-	 * If there is contention on the lock, instead of waiting for the lock
+-	 * to become available, just continue searching lineraly. We'll resume
+-	 * our rb tree search later starting at ac->ac_last_optimal_group.
+-	 */
+-	if (!read_trylock(&sbi->s_mb_rb_lock)) {
+-		ac->ac_flags |= EXT4_MB_SEARCH_NEXT_LINEAR;
+-		return;
+-	}
++	struct ext4_group_info *grp = NULL, *iter;
++	int i;
+ 
+ 	if (unlikely(ac->ac_flags & EXT4_MB_CR1_OPTIMIZED)) {
+ 		if (sbi->s_mb_stats)
+ 			atomic_inc(&sbi->s_bal_cr1_bad_suggestions);
+-		/* We have found something at CR 1 in the past */
+-		grp = ext4_get_group_info(ac->ac_sb, ac->ac_last_optimal_group);
+-		for (found = rb_next(&grp->bb_avg_fragment_size_rb); found != NULL;
+-		     found = rb_next(found)) {
+-			grp = rb_entry(found, struct ext4_group_info,
+-				       bb_avg_fragment_size_rb);
++	}
++
++	for (i = mb_avg_fragment_size_order(ac->ac_sb, ac->ac_g_ex.fe_len);
++	     i < MB_NUM_ORDERS(ac->ac_sb); i++) {
++		if (list_empty(&sbi->s_mb_avg_fragment_size[i]))
++			continue;
++		read_lock(&sbi->s_mb_avg_fragment_size_locks[i]);
++		if (list_empty(&sbi->s_mb_avg_fragment_size[i])) {
++			read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
++			continue;
++		}
++		list_for_each_entry(iter, &sbi->s_mb_avg_fragment_size[i],
++				    bb_avg_fragment_size_node) {
+ 			if (sbi->s_mb_stats)
+ 				atomic64_inc(&sbi->s_bal_cX_groups_considered[1]);
+-			if (likely(ext4_mb_good_group(ac, grp->bb_group, 1)))
++			if (likely(ext4_mb_good_group(ac, iter->bb_group, 1))) {
++				grp = iter;
+ 				break;
+-		}
+-		goto done;
+-	}
+-
+-	node = sbi->s_mb_avg_fragment_size_root.rb_node;
+-	best_so_far = 0;
+-	found = NULL;
+-
+-	while (node) {
+-		grp = rb_entry(node, struct ext4_group_info,
+-			       bb_avg_fragment_size_rb);
+-		avg_fragment_size = 0;
+-		if (ext4_mb_good_group(ac, grp->bb_group, 1)) {
+-			avg_fragment_size = grp->bb_fragments ?
+-				grp->bb_free / grp->bb_fragments : 0;
+-			if (!best_so_far || avg_fragment_size < best_so_far) {
+-				best_so_far = avg_fragment_size;
+-				found = node;
+ 			}
+ 		}
+-		if (avg_fragment_size > ac->ac_g_ex.fe_len)
+-			node = node->rb_right;
+-		else
+-			node = node->rb_left;
++		read_unlock(&sbi->s_mb_avg_fragment_size_locks[i]);
++		if (grp)
++			break;
+ 	}
+ 
+-done:
+-	if (found) {
+-		grp = rb_entry(found, struct ext4_group_info,
+-			       bb_avg_fragment_size_rb);
++	if (grp) {
+ 		*group = grp->bb_group;
+ 		ac->ac_flags |= EXT4_MB_CR1_OPTIMIZED;
+ 	} else {
+ 		*new_cr = 2;
+ 	}
+-
+-	read_unlock(&sbi->s_mb_rb_lock);
+-	ac->ac_last_optimal_group = *group;
+ }
+ 
+ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
+@@ -1017,11 +975,6 @@ next_linear_group(struct ext4_allocation_context *ac, int group, int ngroups)
+ 		goto inc_and_return;
+ 	}
+ 
+-	if (ac->ac_flags & EXT4_MB_SEARCH_NEXT_LINEAR) {
+-		ac->ac_flags &= ~EXT4_MB_SEARCH_NEXT_LINEAR;
+-		goto inc_and_return;
+-	}
+-
+ 	return group;
+ inc_and_return:
+ 	/*
+@@ -1049,8 +1002,10 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
+ {
+ 	*new_cr = ac->ac_criteria;
+ 
+-	if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining)
++	if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
++		*group = next_linear_group(ac, *group, ngroups);
+ 		return;
++	}
+ 
+ 	if (*new_cr == 0) {
+ 		ext4_mb_choose_next_group_cr0(ac, new_cr, group, ngroups);
+@@ -1075,23 +1030,25 @@ mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	int i;
+ 
+-	if (test_opt2(sb, MB_OPTIMIZE_SCAN) && grp->bb_largest_free_order >= 0) {
++	for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--)
++		if (grp->bb_counters[i] > 0)
++			break;
++	/* No need to move between order lists? */
++	if (!test_opt2(sb, MB_OPTIMIZE_SCAN) ||
++	    i == grp->bb_largest_free_order) {
++		grp->bb_largest_free_order = i;
++		return;
++	}
++
++	if (grp->bb_largest_free_order >= 0) {
+ 		write_lock(&sbi->s_mb_largest_free_orders_locks[
+ 					      grp->bb_largest_free_order]);
+ 		list_del_init(&grp->bb_largest_free_order_node);
+ 		write_unlock(&sbi->s_mb_largest_free_orders_locks[
+ 					      grp->bb_largest_free_order]);
+ 	}
+-	grp->bb_largest_free_order = -1; /* uninit */
+-
+-	for (i = MB_NUM_ORDERS(sb) - 1; i >= 0; i--) {
+-		if (grp->bb_counters[i] > 0) {
+-			grp->bb_largest_free_order = i;
+-			break;
+-		}
+-	}
+-	if (test_opt2(sb, MB_OPTIMIZE_SCAN) &&
+-	    grp->bb_largest_free_order >= 0 && grp->bb_free) {
++	grp->bb_largest_free_order = i;
++	if (grp->bb_largest_free_order >= 0 && grp->bb_free) {
+ 		write_lock(&sbi->s_mb_largest_free_orders_locks[
+ 					      grp->bb_largest_free_order]);
+ 		list_add_tail(&grp->bb_largest_free_order_node,
+@@ -1148,13 +1105,13 @@ void ext4_mb_generate_buddy(struct super_block *sb,
+ 					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
+ 	}
+ 	mb_set_largest_free_order(sb, grp);
++	mb_update_avg_fragment_size(sb, grp);
+ 
+ 	clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
+ 
+ 	period = get_cycles() - period;
+ 	atomic_inc(&sbi->s_mb_buddies_generated);
+ 	atomic64_add(period, &sbi->s_mb_generation_time);
+-	mb_update_avg_fragment_size(sb, grp);
+ }
+ 
+ /* The buddy information is attached the buddy cache inode
+@@ -2630,7 +2587,7 @@ static noinline_for_stack int
+ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+ {
+ 	ext4_group_t prefetch_grp = 0, ngroups, group, i;
+-	int cr = -1;
++	int cr = -1, new_cr;
+ 	int err = 0, first_err = 0;
+ 	unsigned int nr = 0, prefetch_ios = 0;
+ 	struct ext4_sb_info *sbi;
+@@ -2701,17 +2658,14 @@ repeat:
+ 		 * from the goal value specified
+ 		 */
+ 		group = ac->ac_g_ex.fe_group;
+-		ac->ac_last_optimal_group = group;
+ 		ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
+ 		prefetch_grp = group;
+ 
+-		for (i = 0; i < ngroups; group = next_linear_group(ac, group, ngroups),
+-			     i++) {
+-			int ret = 0, new_cr;
++		for (i = 0, new_cr = cr; i < ngroups; i++,
++		     ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups)) {
++			int ret = 0;
+ 
+ 			cond_resched();
+-
+-			ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups);
+ 			if (new_cr != cr) {
+ 				cr = new_cr;
+ 				goto repeat;
+@@ -2985,9 +2939,7 @@ __acquires(&EXT4_SB(sb)->s_mb_rb_lock)
+ 	struct super_block *sb = pde_data(file_inode(seq->file));
+ 	unsigned long position;
+ 
+-	read_lock(&EXT4_SB(sb)->s_mb_rb_lock);
+-
+-	if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
++	if (*pos < 0 || *pos >= 2*MB_NUM_ORDERS(sb))
+ 		return NULL;
+ 	position = *pos + 1;
+ 	return (void *) ((unsigned long) position);
+@@ -2999,7 +2951,7 @@ static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, lof
+ 	unsigned long position;
+ 
+ 	++*pos;
+-	if (*pos < 0 || *pos >= MB_NUM_ORDERS(sb) + 1)
++	if (*pos < 0 || *pos >= 2*MB_NUM_ORDERS(sb))
+ 		return NULL;
+ 	position = *pos + 1;
+ 	return (void *) ((unsigned long) position);
+@@ -3011,29 +2963,22 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	unsigned long position = ((unsigned long) v);
+ 	struct ext4_group_info *grp;
+-	struct rb_node *n;
+-	unsigned int count, min, max;
++	unsigned int count;
+ 
+ 	position--;
+ 	if (position >= MB_NUM_ORDERS(sb)) {
+-		seq_puts(seq, "fragment_size_tree:\n");
+-		n = rb_first(&sbi->s_mb_avg_fragment_size_root);
+-		if (!n) {
+-			seq_puts(seq, "\ttree_min: 0\n\ttree_max: 0\n\ttree_nodes: 0\n");
+-			return 0;
+-		}
+-		grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
+-		min = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
+-		count = 1;
+-		while (rb_next(n)) {
+-			count++;
+-			n = rb_next(n);
+-		}
+-		grp = rb_entry(n, struct ext4_group_info, bb_avg_fragment_size_rb);
+-		max = grp->bb_fragments ? grp->bb_free / grp->bb_fragments : 0;
++		position -= MB_NUM_ORDERS(sb);
++		if (position == 0)
++			seq_puts(seq, "avg_fragment_size_lists:\n");
+ 
+-		seq_printf(seq, "\ttree_min: %u\n\ttree_max: %u\n\ttree_nodes: %u\n",
+-			   min, max, count);
++		count = 0;
++		read_lock(&sbi->s_mb_avg_fragment_size_locks[position]);
++		list_for_each_entry(grp, &sbi->s_mb_avg_fragment_size[position],
++				    bb_avg_fragment_size_node)
++			count++;
++		read_unlock(&sbi->s_mb_avg_fragment_size_locks[position]);
++		seq_printf(seq, "\tlist_order_%u_groups: %u\n",
++					(unsigned int)position, count);
+ 		return 0;
+ 	}
+ 
+@@ -3043,9 +2988,11 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
+ 		seq_puts(seq, "max_free_order_lists:\n");
+ 	}
+ 	count = 0;
++	read_lock(&sbi->s_mb_largest_free_orders_locks[position]);
+ 	list_for_each_entry(grp, &sbi->s_mb_largest_free_orders[position],
+ 			    bb_largest_free_order_node)
+ 		count++;
++	read_unlock(&sbi->s_mb_largest_free_orders_locks[position]);
+ 	seq_printf(seq, "\tlist_order_%u_groups: %u\n",
+ 		   (unsigned int)position, count);
+ 
+@@ -3053,11 +3000,7 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v)
+ }
+ 
+ static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v)
+-__releases(&EXT4_SB(sb)->s_mb_rb_lock)
+ {
+-	struct super_block *sb = pde_data(file_inode(seq->file));
+-
+-	read_unlock(&EXT4_SB(sb)->s_mb_rb_lock);
+ }
+ 
+ const struct seq_operations ext4_mb_seq_structs_summary_ops = {
+@@ -3170,8 +3113,9 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
+ 	init_rwsem(&meta_group_info[i]->alloc_sem);
+ 	meta_group_info[i]->bb_free_root = RB_ROOT;
+ 	INIT_LIST_HEAD(&meta_group_info[i]->bb_largest_free_order_node);
+-	RB_CLEAR_NODE(&meta_group_info[i]->bb_avg_fragment_size_rb);
++	INIT_LIST_HEAD(&meta_group_info[i]->bb_avg_fragment_size_node);
+ 	meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
++	meta_group_info[i]->bb_avg_fragment_size_order = -1;  /* uninit */
+ 	meta_group_info[i]->bb_group = group;
+ 
+ 	mb_group_bb_bitmap_alloc(sb, meta_group_info[i], group);
+@@ -3420,7 +3364,24 @@ int ext4_mb_init(struct super_block *sb)
+ 		i++;
+ 	} while (i < MB_NUM_ORDERS(sb));
+ 
+-	sbi->s_mb_avg_fragment_size_root = RB_ROOT;
++	sbi->s_mb_avg_fragment_size =
++		kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
++			GFP_KERNEL);
++	if (!sbi->s_mb_avg_fragment_size) {
++		ret = -ENOMEM;
++		goto out;
++	}
++	sbi->s_mb_avg_fragment_size_locks =
++		kmalloc_array(MB_NUM_ORDERS(sb), sizeof(rwlock_t),
++			GFP_KERNEL);
++	if (!sbi->s_mb_avg_fragment_size_locks) {
++		ret = -ENOMEM;
++		goto out;
++	}
++	for (i = 0; i < MB_NUM_ORDERS(sb); i++) {
++		INIT_LIST_HEAD(&sbi->s_mb_avg_fragment_size[i]);
++		rwlock_init(&sbi->s_mb_avg_fragment_size_locks[i]);
++	}
+ 	sbi->s_mb_largest_free_orders =
+ 		kmalloc_array(MB_NUM_ORDERS(sb), sizeof(struct list_head),
+ 			GFP_KERNEL);
+@@ -3439,7 +3400,6 @@ int ext4_mb_init(struct super_block *sb)
+ 		INIT_LIST_HEAD(&sbi->s_mb_largest_free_orders[i]);
+ 		rwlock_init(&sbi->s_mb_largest_free_orders_locks[i]);
+ 	}
+-	rwlock_init(&sbi->s_mb_rb_lock);
+ 
+ 	spin_lock_init(&sbi->s_md_lock);
+ 	sbi->s_mb_free_pending = 0;
+@@ -3510,6 +3470,8 @@ out_free_locality_groups:
+ 	free_percpu(sbi->s_locality_groups);
+ 	sbi->s_locality_groups = NULL;
+ out:
++	kfree(sbi->s_mb_avg_fragment_size);
++	kfree(sbi->s_mb_avg_fragment_size_locks);
+ 	kfree(sbi->s_mb_largest_free_orders);
+ 	kfree(sbi->s_mb_largest_free_orders_locks);
+ 	kfree(sbi->s_mb_offsets);
+@@ -3576,6 +3538,8 @@ int ext4_mb_release(struct super_block *sb)
+ 		kvfree(group_info);
+ 		rcu_read_unlock();
+ 	}
++	kfree(sbi->s_mb_avg_fragment_size);
++	kfree(sbi->s_mb_avg_fragment_size_locks);
+ 	kfree(sbi->s_mb_largest_free_orders);
+ 	kfree(sbi->s_mb_largest_free_orders_locks);
+ 	kfree(sbi->s_mb_offsets);
+@@ -5187,6 +5151,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
+ 	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
+ 	int bsbits = ac->ac_sb->s_blocksize_bits;
+ 	loff_t size, isize;
++	bool inode_pa_eligible, group_pa_eligible;
+ 
+ 	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
+ 		return;
+@@ -5194,25 +5159,27 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
+ 	if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
+ 		return;
+ 
++	group_pa_eligible = sbi->s_mb_group_prealloc > 0;
++	inode_pa_eligible = true;
+ 	size = ac->ac_o_ex.fe_logical + EXT4_C2B(sbi, ac->ac_o_ex.fe_len);
+ 	isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
+ 		>> bsbits;
+ 
++	/* No point in using inode preallocation for closed files */
+ 	if ((size == isize) && !ext4_fs_is_busy(sbi) &&
+-	    !inode_is_open_for_write(ac->ac_inode)) {
+-		ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
+-		return;
+-	}
++	    !inode_is_open_for_write(ac->ac_inode))
++		inode_pa_eligible = false;
+ 
+-	if (sbi->s_mb_group_prealloc <= 0) {
+-		ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
+-		return;
+-	}
+-
+-	/* don't use group allocation for large files */
+ 	size = max(size, isize);
+-	if (size > sbi->s_mb_stream_request) {
+-		ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
++	/* Don't use group allocation for large files */
++	if (size > sbi->s_mb_stream_request)
++		group_pa_eligible = false;
++
++	if (!group_pa_eligible) {
++		if (inode_pa_eligible)
++			ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
++		else
++			ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
+ 		return;
+ 	}
+ 
+@@ -5559,6 +5526,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
+ 	ext4_fsblk_t block = 0;
+ 	unsigned int inquota = 0;
+ 	unsigned int reserv_clstrs = 0;
++	int retries = 0;
+ 	u64 seq;
+ 
+ 	might_sleep();
+@@ -5661,7 +5629,8 @@ repeat:
+ 			ar->len = ac->ac_b_ex.fe_len;
+ 		}
+ 	} else {
+-		if (ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
++		if (++retries < 3 &&
++		    ext4_mb_discard_preallocations_should_retry(sb, ac, &seq))
+ 			goto repeat;
+ 		/*
+ 		 * If block allocation fails then the pa allocated above
+diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
+index 39da92ceabf88..dcda2a943cee0 100644
+--- a/fs/ext4/mballoc.h
++++ b/fs/ext4/mballoc.h
+@@ -178,7 +178,6 @@ struct ext4_allocation_context {
+ 	/* copy of the best found extent taken before preallocation efforts */
+ 	struct ext4_free_extent ac_f_ex;
+ 
+-	ext4_group_t ac_last_optimal_group;
+ 	__u32 ac_groups_considered;
+ 	__u32 ac_flags;		/* allocation hints */
+ 	__u16 ac_groups_scanned;
+diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
+index 7515a465ec03a..7c90b1ab3e00d 100644
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -543,10 +543,9 @@
+  */
+ #ifdef CONFIG_CFI_CLANG
+ #define TEXT_CFI_JT							\
+-		. = ALIGN(PMD_SIZE);					\
++		ALIGN_FUNCTION();					\
+ 		__cfi_jt_start = .;					\
+ 		*(.text..L.cfi.jumptable .text..L.cfi.jumptable.*)	\
+-		. = ALIGN(PMD_SIZE);					\
+ 		__cfi_jt_end = .;
+ #else
+ #define TEXT_CFI_JT
+diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
+index e2d9daf7e8dd0..0fd96e92c6c65 100644
+--- a/include/linux/blk-mq.h
++++ b/include/linux/blk-mq.h
+@@ -686,10 +686,13 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
+ 									\
+ 	__blk_mq_alloc_disk(set, queuedata, &__key);			\
+ })
++struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q,
++		struct lock_class_key *lkclass);
+ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
+ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
+ 		struct request_queue *q);
+ void blk_mq_unregister_dev(struct device *, struct request_queue *);
++void blk_mq_destroy_queue(struct request_queue *);
+ 
+ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);
+ int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index 62e3ff52ab033..83eb8869a8c94 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -148,6 +148,7 @@ struct gendisk {
+ #define GD_NATIVE_CAPACITY		3
+ #define GD_ADDED			4
+ #define GD_SUPPRESS_PART_SCAN		5
++#define GD_OWNS_QUEUE			6
+ 
+ 	struct mutex open_mutex;	/* open/close mutex */
+ 	unsigned open_partitions;	/* number of open partitions */
+@@ -559,7 +560,6 @@ struct request_queue {
+ #define QUEUE_FLAG_NOXMERGES	9	/* No extended merges */
+ #define QUEUE_FLAG_ADD_RANDOM	10	/* Contributes to random pool */
+ #define QUEUE_FLAG_SAME_FORCE	12	/* force complete on same CPU */
+-#define QUEUE_FLAG_DEAD		13	/* queue tear-down finished */
+ #define QUEUE_FLAG_INIT_DONE	14	/* queue is initialized */
+ #define QUEUE_FLAG_STABLE_WRITES 15	/* don't modify blks until WB is done */
+ #define QUEUE_FLAG_POLL		16	/* IO polling enabled if set */
+@@ -587,7 +587,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
+ #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
+ #define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
+ #define blk_queue_has_srcu(q)	test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags)
+-#define blk_queue_dead(q)	test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
+ #define blk_queue_init_done(q)	test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
+ #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
+ #define blk_queue_noxmerges(q)	\
+@@ -812,8 +811,6 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb)
+ 
+ int bdev_disk_changed(struct gendisk *disk, bool invalidate);
+ 
+-struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
+-		struct lock_class_key *lkclass);
+ void put_disk(struct gendisk *disk);
+ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass);
+ 
+@@ -955,7 +952,6 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q,
+ /*
+  * Access functions for manipulating queue properties
+  */
+-extern void blk_cleanup_queue(struct request_queue *);
+ void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce limit);
+ extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
+ extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
+diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
+index 4592d08459417..57aa459c6618a 100644
+--- a/include/linux/cpumask.h
++++ b/include/linux/cpumask.h
+@@ -1083,9 +1083,10 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
+  * cover a worst-case of every other cpu being on one of two nodes for a
+  * very large NR_CPUS.
+  *
+- *  Use PAGE_SIZE as a minimum for smaller configurations.
++ *  Use PAGE_SIZE as a minimum for smaller configurations while avoiding
++ *  unsigned comparison to -1.
+  */
+-#define CPUMAP_FILE_MAX_BYTES  ((((NR_CPUS * 9)/32 - 1) > PAGE_SIZE) \
++#define CPUMAP_FILE_MAX_BYTES  (((NR_CPUS * 9)/32 > PAGE_SIZE) \
+ 					? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE)
+ #define CPULIST_FILE_MAX_BYTES  (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE)
+ 
+diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
+index fde258b3decd5..037a8d81a66cf 100644
+--- a/include/linux/serial_core.h
++++ b/include/linux/serial_core.h
+@@ -302,6 +302,23 @@ struct uart_state {
+ /* number of characters left in xmit buffer before we ask for more */
+ #define WAKEUP_CHARS		256
+ 
++/**
++ * uart_xmit_advance - Advance xmit buffer and account Tx'ed chars
++ * @up: uart_port structure describing the port
++ * @chars: number of characters sent
++ *
++ * This function advances the tail of circular xmit buffer by the number of
++ * @chars transmitted and handles accounting of transmitted bytes (into
++ * @up's icount.tx).
++ */
++static inline void uart_xmit_advance(struct uart_port *up, unsigned int chars)
++{
++	struct circ_buf *xmit = &up->state->xmit;
++
++	xmit->tail = (xmit->tail + chars) & (UART_XMIT_SIZE - 1);
++	up->icount.tx += chars;
++}
++
+ struct module;
+ struct tty_driver;
+ 
+diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
+index 184105d682942..f2273bd5a4c58 100644
+--- a/include/net/bond_3ad.h
++++ b/include/net/bond_3ad.h
+@@ -15,8 +15,6 @@
+ #define PKT_TYPE_LACPDU         cpu_to_be16(ETH_P_SLOW)
+ #define AD_TIMER_INTERVAL       100 /*msec*/
+ 
+-#define MULTICAST_LACPDU_ADDR    {0x01, 0x80, 0xC2, 0x00, 0x00, 0x02}
+-
+ #define AD_LACP_SLOW 0
+ #define AD_LACP_FAST 1
+ 
+diff --git a/include/net/bonding.h b/include/net/bonding.h
+index 3b816ae8b1f3b..7ac1773b99224 100644
+--- a/include/net/bonding.h
++++ b/include/net/bonding.h
+@@ -785,6 +785,9 @@ extern struct rtnl_link_ops bond_link_ops;
+ /* exported from bond_sysfs_slave.c */
+ extern const struct sysfs_ops slave_sysfs_ops;
+ 
++/* exported from bond_3ad.c */
++extern const u8 lacpdu_mcast_addr[];
++
+ static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
+ {
+ 	dev_core_stats_tx_dropped_inc(dev);
+diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
+index 667d889b92b52..3e1cea155049b 100644
+--- a/include/scsi/scsi_host.h
++++ b/include/scsi/scsi_host.h
+@@ -557,6 +557,8 @@ struct Scsi_Host {
+ 	struct scsi_host_template *hostt;
+ 	struct scsi_transport_template *transportt;
+ 
++	struct kref		tagset_refcnt;
++	struct completion	tagset_freed;
+ 	/* Area to keep a shared tag map */
+ 	struct blk_mq_tag_set	tag_set;
+ 
+diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
+index 65e13a099b1a0..a9f5d884560ac 100644
+--- a/include/uapi/linux/xfrm.h
++++ b/include/uapi/linux/xfrm.h
+@@ -296,7 +296,7 @@ enum xfrm_attr_type_t {
+ 	XFRMA_ETIMER_THRESH,
+ 	XFRMA_SRCADDR,		/* xfrm_address_t */
+ 	XFRMA_COADDR,		/* xfrm_address_t */
+-	XFRMA_LASTUSED,		/* unsigned long  */
++	XFRMA_LASTUSED,		/* __u64 */
+ 	XFRMA_POLICY_TYPE,	/* struct xfrm_userpolicy_type */
+ 	XFRMA_MIGRATE,
+ 	XFRMA_ALG_AEAD,		/* struct xfrm_algo_aead */
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 602da2cfd57c8..15a6f1e93e5af 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -10951,6 +10951,9 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
+ 		io_poll_remove_all(ctx, NULL, true);
+ 		/* if we failed setting up the ctx, we might not have any rings */
+ 		io_iopoll_try_reap_events(ctx);
++		/* drop cached put refs after potentially doing completions */
++		if (current->io_uring)
++			io_uring_drop_tctx_refs(current);
+ 	}
+ 
+ 	INIT_WORK(&ctx->exit_work, io_ring_exit_work);
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index e702ca368539a..80c23f48f3b4b 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -6026,6 +6026,9 @@ struct cgroup *cgroup_get_from_id(u64 id)
+ 	if (!kn)
+ 		goto out;
+ 
++	if (kernfs_type(kn) != KERNFS_DIR)
++		goto put;
++
+ 	rcu_read_lock();
+ 
+ 	cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
+@@ -6033,7 +6036,7 @@ struct cgroup *cgroup_get_from_id(u64 id)
+ 		cgrp = NULL;
+ 
+ 	rcu_read_unlock();
+-
++put:
+ 	kernfs_put(kn);
+ out:
+ 	return cgrp;
+diff --git a/kernel/workqueue.c b/kernel/workqueue.c
+index aa8a82bc67384..fc6e4f2523452 100644
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -3066,10 +3066,8 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
+ 	if (WARN_ON(!work->func))
+ 		return false;
+ 
+-	if (!from_cancel) {
+-		lock_map_acquire(&work->lockdep_map);
+-		lock_map_release(&work->lockdep_map);
+-	}
++	lock_map_acquire(&work->lockdep_map);
++	lock_map_release(&work->lockdep_map);
+ 
+ 	if (start_flush_work(work, &barr, from_cancel)) {
+ 		wait_for_completion(&barr.done);
+diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
+index 2e24db4bff192..c399ab486557f 100644
+--- a/lib/Kconfig.debug
++++ b/lib/Kconfig.debug
+@@ -264,8 +264,10 @@ config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT
+ config DEBUG_INFO_DWARF4
+ 	bool "Generate DWARF Version 4 debuginfo"
+ 	select DEBUG_INFO
++	depends on !CC_IS_CLANG || (CC_IS_CLANG && (AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502)))
+ 	help
+-	  Generate DWARF v4 debug info. This requires gcc 4.5+ and gdb 7.0+.
++	  Generate DWARF v4 debug info. This requires gcc 4.5+, binutils 2.35.2
++	  if using clang without clang's integrated assembler, and gdb 7.0+.
+ 
+ 	  If you have consumers of DWARF debug info that are not ready for
+ 	  newer revisions of DWARF, you may wish to choose this or have your
+diff --git a/mm/slab_common.c b/mm/slab_common.c
+index dbd4b6f9b0e79..29ae1358d5f07 100644
+--- a/mm/slab_common.c
++++ b/mm/slab_common.c
+@@ -503,6 +503,7 @@ void slab_kmem_cache_release(struct kmem_cache *s)
+ void kmem_cache_destroy(struct kmem_cache *s)
+ {
+ 	int refcnt;
++	bool rcu_set;
+ 
+ 	if (unlikely(!s) || !kasan_check_byte(s))
+ 		return;
+@@ -510,6 +511,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
+ 	cpus_read_lock();
+ 	mutex_lock(&slab_mutex);
+ 
++	rcu_set = s->flags & SLAB_TYPESAFE_BY_RCU;
++
+ 	refcnt = --s->refcount;
+ 	if (refcnt)
+ 		goto out_unlock;
+@@ -520,7 +523,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
+ out_unlock:
+ 	mutex_unlock(&slab_mutex);
+ 	cpus_read_unlock();
+-	if (!refcnt && !(s->flags & SLAB_TYPESAFE_BY_RCU))
++	if (!refcnt && !rcu_set)
+ 		kmem_cache_release(s);
+ }
+ EXPORT_SYMBOL(kmem_cache_destroy);
+diff --git a/mm/slub.c b/mm/slub.c
+index b1281b8654bd3..1eec942b8336c 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -310,6 +310,11 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
+  */
+ static nodemask_t slab_nodes;
+ 
++/*
++ * Workqueue used for flush_cpu_slab().
++ */
++static struct workqueue_struct *flushwq;
++
+ /********************************************************************
+  * 			Core slab cache functions
+  *******************************************************************/
+@@ -2730,7 +2735,7 @@ static void flush_all_cpus_locked(struct kmem_cache *s)
+ 		INIT_WORK(&sfw->work, flush_cpu_slab);
+ 		sfw->skip = false;
+ 		sfw->s = s;
+-		schedule_work_on(cpu, &sfw->work);
++		queue_work_on(cpu, flushwq, &sfw->work);
+ 	}
+ 
+ 	for_each_online_cpu(cpu) {
+@@ -4880,6 +4885,8 @@ void __init kmem_cache_init(void)
+ 
+ void __init kmem_cache_init_late(void)
+ {
++	flushwq = alloc_workqueue("slub_flushwq", WQ_MEM_RECLAIM, 0);
++	WARN_ON(!flushwq);
+ }
+ 
+ struct kmem_cache *
+@@ -4950,6 +4957,8 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
+ 	/* Honor the call site pointer we received. */
+ 	trace_kmalloc(caller, ret, size, s->size, gfpflags);
+ 
++	ret = kasan_kmalloc(s, ret, size, gfpflags);
++
+ 	return ret;
+ }
+ EXPORT_SYMBOL(__kmalloc_track_caller);
+@@ -4981,6 +4990,8 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
+ 	/* Honor the call site pointer we received. */
+ 	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
+ 
++	ret = kasan_kmalloc(s, ret, size, gfpflags);
++
+ 	return ret;
+ }
+ EXPORT_SYMBOL(__kmalloc_node_track_caller);
+@@ -5914,7 +5925,8 @@ static char *create_unique_id(struct kmem_cache *s)
+ 	char *name = kmalloc(ID_STR_LENGTH, GFP_KERNEL);
+ 	char *p = name;
+ 
+-	BUG_ON(!name);
++	if (!name)
++		return ERR_PTR(-ENOMEM);
+ 
+ 	*p++ = ':';
+ 	/*
+@@ -5972,6 +5984,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
+ 		 * for the symlinks.
+ 		 */
+ 		name = create_unique_id(s);
++		if (IS_ERR(name))
++			return PTR_ERR(name);
+ 	}
+ 
+ 	s->kobj.kset = kset;
+diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
+index b8f8da7ee3dea..41c1ad33d009f 100644
+--- a/net/batman-adv/hard-interface.c
++++ b/net/batman-adv/hard-interface.c
+@@ -10,6 +10,7 @@
+ #include <linux/atomic.h>
+ #include <linux/byteorder/generic.h>
+ #include <linux/container_of.h>
++#include <linux/errno.h>
+ #include <linux/gfp.h>
+ #include <linux/if.h>
+ #include <linux/if_arp.h>
+@@ -700,6 +701,9 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
+ 	int max_header_len = batadv_max_header_len();
+ 	int ret;
+ 
++	if (hard_iface->net_dev->mtu < ETH_MIN_MTU + max_header_len)
++		return -EINVAL;
++
+ 	if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
+ 		goto out;
+ 
+diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
+index 9a0ae59cdc500..4f385d52a1c49 100644
+--- a/net/bridge/netfilter/ebtables.c
++++ b/net/bridge/netfilter/ebtables.c
+@@ -1040,8 +1040,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
+ 		goto free_iterate;
+ 	}
+ 
+-	if (repl->valid_hooks != t->valid_hooks)
++	if (repl->valid_hooks != t->valid_hooks) {
++		ret = -EINVAL;
+ 		goto free_unlock;
++	}
+ 
+ 	if (repl->num_counters && repl->num_counters != t->private->nentries) {
+ 		ret = -EINVAL;
+diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
+index 6aee04f75e3e4..bcba61ef5b378 100644
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -1572,9 +1572,8 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
+ 
+ 	switch (keys->control.addr_type) {
+ 	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+-		addr_diff = (__force u32)keys->addrs.v4addrs.dst -
+-			    (__force u32)keys->addrs.v4addrs.src;
+-		if (addr_diff < 0)
++		if ((__force u32)keys->addrs.v4addrs.dst <
++		    (__force u32)keys->addrs.v4addrs.src)
+ 			swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
+ 
+ 		if ((__force u16)keys->ports.dst <
+diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
+index 9f6f4a41245d4..1012012a061fe 100644
+--- a/net/ipv6/af_inet6.c
++++ b/net/ipv6/af_inet6.c
+@@ -1069,13 +1069,13 @@ static int __init inet6_init(void)
+ 	for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
+ 		INIT_LIST_HEAD(r);
+ 
++	raw_hashinfo_init(&raw_v6_hashinfo);
++
+ 	if (disable_ipv6_mod) {
+ 		pr_info("Loaded, but administratively disabled, reboot required to enable\n");
+ 		goto out;
+ 	}
+ 
+-	raw_hashinfo_init(&raw_v6_hashinfo);
+-
+ 	err = proto_register(&tcpv6_prot, 1);
+ 	if (err)
+ 		goto out;
+diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
+index 0d9332e9cf71a..617f744a2e3a3 100644
+--- a/net/netfilter/nf_conntrack_ftp.c
++++ b/net/netfilter/nf_conntrack_ftp.c
+@@ -33,6 +33,7 @@ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+ MODULE_DESCRIPTION("ftp connection tracking helper");
+ MODULE_ALIAS("ip_conntrack_ftp");
+ MODULE_ALIAS_NFCT_HELPER(HELPER_NAME);
++static DEFINE_SPINLOCK(nf_ftp_lock);
+ 
+ #define MAX_PORTS 8
+ static u_int16_t ports[MAX_PORTS];
+@@ -409,7 +410,8 @@ static int help(struct sk_buff *skb,
+ 	}
+ 	datalen = skb->len - dataoff;
+ 
+-	spin_lock_bh(&ct->lock);
++	/* seqadj (nat) uses ct->lock internally, nf_nat_ftp would cause deadlock */
++	spin_lock_bh(&nf_ftp_lock);
+ 	fb_ptr = skb->data + dataoff;
+ 
+ 	ends_in_nl = (fb_ptr[datalen - 1] == '\n');
+@@ -538,7 +540,7 @@ out_update_nl:
+ 	if (ends_in_nl)
+ 		update_nl_seq(ct, seq, ct_ftp_info, dir, skb);
+  out:
+-	spin_unlock_bh(&ct->lock);
++	spin_unlock_bh(&nf_ftp_lock);
+ 	return ret;
+ }
+ 
+diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
+index 992decbcaa5c1..5703846bea3b6 100644
+--- a/net/netfilter/nf_conntrack_irc.c
++++ b/net/netfilter/nf_conntrack_irc.c
+@@ -157,15 +157,37 @@ static int help(struct sk_buff *skb, unsigned int protoff,
+ 	data = ib_ptr;
+ 	data_limit = ib_ptr + datalen;
+ 
+-	/* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
+-	 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
+-	while (data < data_limit - (19 + MINMATCHLEN)) {
+-		if (memcmp(data, "\1DCC ", 5)) {
++	/* Skip any whitespace */
++	while (data < data_limit - 10) {
++		if (*data == ' ' || *data == '\r' || *data == '\n')
++			data++;
++		else
++			break;
++	}
++
++	/* strlen("PRIVMSG x ")=10 */
++	if (data < data_limit - 10) {
++		if (strncasecmp("PRIVMSG ", data, 8))
++			goto out;
++		data += 8;
++	}
++
++	/* strlen(" :\1DCC SENT t AAAAAAAA P\1\n")=26
++	 * 7+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=26
++	 */
++	while (data < data_limit - (21 + MINMATCHLEN)) {
++		/* Find first " :", the start of message */
++		if (memcmp(data, " :", 2)) {
+ 			data++;
+ 			continue;
+ 		}
++		data += 2;
++
++		/* then check that place only for the DCC command */
++		if (memcmp(data, "\1DCC ", 5))
++			goto out;
+ 		data += 5;
+-		/* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
++		/* we have at least (21+MINMATCHLEN)-(2+5) bytes valid data left */
+ 
+ 		iph = ip_hdr(skb);
+ 		pr_debug("DCC found in master %pI4:%u %pI4:%u\n",
+@@ -181,7 +203,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
+ 			pr_debug("DCC %s detected\n", dccprotos[i]);
+ 
+ 			/* we have at least
+-			 * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
++			 * (21+MINMATCHLEN)-7-dccprotos[i].matchlen bytes valid
+ 			 * data left (== 14/13 bytes) */
+ 			if (parse_dcc(data, data_limit, &dcc_ip,
+ 				       &dcc_port, &addr_beg_p, &addr_end_p)) {
+diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
+index b83dc9bf0a5dd..78fd9122b70c7 100644
+--- a/net/netfilter/nf_conntrack_sip.c
++++ b/net/netfilter/nf_conntrack_sip.c
+@@ -477,7 +477,7 @@ static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr,
+ 				return ret;
+ 			if (ret == 0)
+ 				break;
+-			dataoff += *matchoff;
++			dataoff = *matchoff;
+ 		}
+ 		*in_header = 0;
+ 	}
+@@ -489,7 +489,7 @@ static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr,
+ 			break;
+ 		if (ret == 0)
+ 			return ret;
+-		dataoff += *matchoff;
++		dataoff = *matchoff;
+ 	}
+ 
+ 	if (in_header)
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 848cc81d69926..2fde193c3d26a 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -2197,7 +2197,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ 			      struct netlink_ext_ack *extack)
+ {
+ 	const struct nlattr * const *nla = ctx->nla;
+-	struct nft_stats __percpu *stats = NULL;
+ 	struct nft_table *table = ctx->table;
+ 	struct nft_base_chain *basechain;
+ 	struct net *net = ctx->net;
+@@ -2212,6 +2211,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ 		return -EOVERFLOW;
+ 
+ 	if (nla[NFTA_CHAIN_HOOK]) {
++		struct nft_stats __percpu *stats = NULL;
+ 		struct nft_chain_hook hook;
+ 
+ 		if (flags & NFT_CHAIN_BINDING)
+@@ -2243,8 +2243,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ 		if (err < 0) {
+ 			nft_chain_release_hook(&hook);
+ 			kfree(basechain);
++			free_percpu(stats);
+ 			return err;
+ 		}
++		if (stats)
++			static_branch_inc(&nft_counters_enabled);
+ 	} else {
+ 		if (flags & NFT_CHAIN_BASE)
+ 			return -EINVAL;
+@@ -2319,9 +2322,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
+ 		goto err_unregister_hook;
+ 	}
+ 
+-	if (stats)
+-		static_branch_inc(&nft_counters_enabled);
+-
+ 	table->use++;
+ 
+ 	return 0;
+diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
+index 0fa2e20304272..ee6840bd59337 100644
+--- a/net/netfilter/nfnetlink_osf.c
++++ b/net/netfilter/nfnetlink_osf.c
+@@ -269,6 +269,7 @@ bool nf_osf_find(const struct sk_buff *skb,
+ 	struct nf_osf_hdr_ctx ctx;
+ 	const struct tcphdr *tcp;
+ 	struct tcphdr _tcph;
++	bool found = false;
+ 
+ 	memset(&ctx, 0, sizeof(ctx));
+ 
+@@ -283,10 +284,11 @@ bool nf_osf_find(const struct sk_buff *skb,
+ 
+ 		data->genre = f->genre;
+ 		data->version = f->version;
++		found = true;
+ 		break;
+ 	}
+ 
+-	return true;
++	return found;
+ }
+ EXPORT_SYMBOL_GPL(nf_osf_find);
+ 
+diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
+index ac366c99086fd..7d7f7bac0216a 100644
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -2136,6 +2136,7 @@ replay:
+ 	}
+ 
+ 	if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
++		tfilter_put(tp, fh);
+ 		NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
+ 		err = -EINVAL;
+ 		goto errout;
+diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
+index 0b941dd63d268..86675a79da1e4 100644
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -67,6 +67,7 @@ struct taprio_sched {
+ 	u32 flags;
+ 	enum tk_offsets tk_offset;
+ 	int clockid;
++	bool offloaded;
+ 	atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
+ 				    * speeds it's sub-nanoseconds per byte
+ 				    */
+@@ -1279,6 +1280,8 @@ static int taprio_enable_offload(struct net_device *dev,
+ 		goto done;
+ 	}
+ 
++	q->offloaded = true;
++
+ done:
+ 	taprio_offload_free(offload);
+ 
+@@ -1293,12 +1296,9 @@ static int taprio_disable_offload(struct net_device *dev,
+ 	struct tc_taprio_qopt_offload *offload;
+ 	int err;
+ 
+-	if (!FULL_OFFLOAD_IS_ENABLED(q->flags))
++	if (!q->offloaded)
+ 		return 0;
+ 
+-	if (!ops->ndo_setup_tc)
+-		return -EOPNOTSUPP;
+-
+ 	offload = taprio_offload_alloc(0);
+ 	if (!offload) {
+ 		NL_SET_ERR_MSG(extack,
+@@ -1314,6 +1314,8 @@ static int taprio_disable_offload(struct net_device *dev,
+ 		goto out;
+ 	}
+ 
++	q->offloaded = false;
++
+ out:
+ 	taprio_offload_free(offload);
+ 
+@@ -1949,12 +1951,14 @@ start_error:
+ 
+ static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
+ {
+-	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
++	struct taprio_sched *q = qdisc_priv(sch);
++	struct net_device *dev = qdisc_dev(sch);
++	unsigned int ntx = cl - 1;
+ 
+-	if (!dev_queue)
++	if (ntx >= dev->num_tx_queues)
+ 		return NULL;
+ 
+-	return dev_queue->qdisc_sleeping;
++	return q->qdiscs[ntx];
+ }
+ 
+ static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
+diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
+index 1f3bb1f6b1f7b..8095876b66eb6 100644
+--- a/net/smc/smc_core.c
++++ b/net/smc/smc_core.c
+@@ -2148,7 +2148,7 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
+ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
+ 				     struct smc_buf_desc *buf_desc, bool is_rmb)
+ {
+-	int i, rc = 0;
++	int i, rc = 0, cnt = 0;
+ 
+ 	/* protect against parallel link reconfiguration */
+ 	mutex_lock(&lgr->llc_conf_mutex);
+@@ -2161,9 +2161,12 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
+ 			rc = -ENOMEM;
+ 			goto out;
+ 		}
++		cnt++;
+ 	}
+ out:
+ 	mutex_unlock(&lgr->llc_conf_mutex);
++	if (!rc && !cnt)
++		rc = -EINVAL;
+ 	return rc;
+ }
+ 
+diff --git a/scripts/Makefile.debug b/scripts/Makefile.debug
+index 9f39b0130551f..8cf1cb22dd934 100644
+--- a/scripts/Makefile.debug
++++ b/scripts/Makefile.debug
+@@ -1,20 +1,19 @@
+ DEBUG_CFLAGS	:=
++debug-flags-y	:= -g
+ 
+ ifdef CONFIG_DEBUG_INFO_SPLIT
+ DEBUG_CFLAGS	+= -gsplit-dwarf
+-else
+-DEBUG_CFLAGS	+= -g
+ endif
+ 
+-ifndef CONFIG_AS_IS_LLVM
+-KBUILD_AFLAGS	+= -Wa,-gdwarf-2
+-endif
+-
+-ifndef CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT
+-dwarf-version-$(CONFIG_DEBUG_INFO_DWARF4) := 4
+-dwarf-version-$(CONFIG_DEBUG_INFO_DWARF5) := 5
+-DEBUG_CFLAGS	+= -gdwarf-$(dwarf-version-y)
++debug-flags-$(CONFIG_DEBUG_INFO_DWARF4)	+= -gdwarf-4
++debug-flags-$(CONFIG_DEBUG_INFO_DWARF5)	+= -gdwarf-5
++ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_AS_IS_GNU),yy)
++# Clang does not pass -g or -gdwarf-* option down to GAS.
++# Add -Wa, prefix to explicitly specify the flags.
++KBUILD_AFLAGS	+= $(addprefix -Wa$(comma), $(debug-flags-y))
+ endif
++DEBUG_CFLAGS	+= $(debug-flags-y)
++KBUILD_AFLAGS	+= $(debug-flags-y)
+ 
+ ifdef CONFIG_DEBUG_INFO_REDUCED
+ DEBUG_CFLAGS	+= -fno-var-tracking
+@@ -29,5 +28,5 @@ KBUILD_AFLAGS	+= -gz=zlib
+ KBUILD_LDFLAGS	+= --compress-debug-sections=zlib
+ endif
+ 
+-KBUILD_CFLAGS += $(DEBUG_CFLAGS)
++KBUILD_CFLAGS	+= $(DEBUG_CFLAGS)
+ export DEBUG_CFLAGS
+diff --git a/sound/core/init.c b/sound/core/init.c
+index 726a8353201f8..4eacfafa41730 100644
+--- a/sound/core/init.c
++++ b/sound/core/init.c
+@@ -178,10 +178,8 @@ int snd_card_new(struct device *parent, int idx, const char *xid,
+ 		return -ENOMEM;
+ 
+ 	err = snd_card_init(card, parent, idx, xid, module, extra_size);
+-	if (err < 0) {
+-		kfree(card);
+-		return err;
+-	}
++	if (err < 0)
++		return err; /* card is freed by error handler */
+ 
+ 	*card_ret = card;
+ 	return 0;
+@@ -231,7 +229,7 @@ int snd_devm_card_new(struct device *parent, int idx, const char *xid,
+ 	card->managed = true;
+ 	err = snd_card_init(card, parent, idx, xid, module, extra_size);
+ 	if (err < 0) {
+-		devres_free(card);
++		devres_free(card); /* in managed mode, we need to free manually */
+ 		return err;
+ 	}
+ 
+@@ -293,6 +291,8 @@ static int snd_card_init(struct snd_card *card, struct device *parent,
+ 		mutex_unlock(&snd_card_mutex);
+ 		dev_err(parent, "cannot find the slot for index %d (range 0-%i), error: %d\n",
+ 			 idx, snd_ecards_limit - 1, err);
++		if (!card->managed)
++			kfree(card); /* manually free here, as no destructor called */
+ 		return err;
+ 	}
+ 	set_bit(idx, snd_cards_lock);		/* lock it */
+diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c
+index c572fb5886d5d..7af2515735957 100644
+--- a/sound/pci/hda/hda_bind.c
++++ b/sound/pci/hda/hda_bind.c
+@@ -157,10 +157,10 @@ static int hda_codec_driver_remove(struct device *dev)
+ 		return codec->bus->core.ext_ops->hdev_detach(&codec->core);
+ 	}
+ 
+-	refcount_dec(&codec->pcm_ref);
+ 	snd_hda_codec_disconnect_pcms(codec);
+ 	snd_hda_jack_tbl_disconnect(codec);
+-	wait_event(codec->remove_sleep, !refcount_read(&codec->pcm_ref));
++	if (!refcount_dec_and_test(&codec->pcm_ref))
++		wait_event(codec->remove_sleep, !refcount_read(&codec->pcm_ref));
+ 	snd_power_sync_ref(codec->bus->card);
+ 
+ 	if (codec->patch_ops.free)
+diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
+index b20694fd69dea..6f30c374f896e 100644
+--- a/sound/pci/hda/hda_intel.c
++++ b/sound/pci/hda/hda_intel.c
+@@ -2550,6 +2550,8 @@ static const struct pci_device_id azx_ids[] = {
+ 	/* 5 Series/3400 */
+ 	{ PCI_DEVICE(0x8086, 0x3b56),
+ 	  .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_NOPM },
++	{ PCI_DEVICE(0x8086, 0x3b57),
++	  .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_NOPM },
+ 	/* Poulsbo */
+ 	{ PCI_DEVICE(0x8086, 0x811b),
+ 	  .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_BASE },
+diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
+index 6c209cd26c0ca..c9d9aa6351ecf 100644
+--- a/sound/pci/hda/patch_hdmi.c
++++ b/sound/pci/hda/patch_hdmi.c
+@@ -170,6 +170,8 @@ struct hdmi_spec {
+ 	bool dyn_pcm_no_legacy;
+ 	/* hdmi interrupt trigger control flag for Nvidia codec */
+ 	bool hdmi_intr_trig_ctrl;
++	bool nv_dp_workaround; /* workaround DP audio infoframe for Nvidia */
++
+ 	bool intel_hsw_fixup;	/* apply Intel platform-specific fixups */
+ 	/*
+ 	 * Non-generic VIA/NVIDIA specific
+@@ -679,15 +681,24 @@ static void hdmi_pin_setup_infoframe(struct hda_codec *codec,
+ 				     int ca, int active_channels,
+ 				     int conn_type)
+ {
++	struct hdmi_spec *spec = codec->spec;
+ 	union audio_infoframe ai;
+ 
+ 	memset(&ai, 0, sizeof(ai));
+-	if (conn_type == 0) { /* HDMI */
++	if ((conn_type == 0) || /* HDMI */
++		/* Nvidia DisplayPort: Nvidia HW expects same layout as HDMI */
++		(conn_type == 1 && spec->nv_dp_workaround)) {
+ 		struct hdmi_audio_infoframe *hdmi_ai = &ai.hdmi;
+ 
+-		hdmi_ai->type		= 0x84;
+-		hdmi_ai->ver		= 0x01;
+-		hdmi_ai->len		= 0x0a;
++		if (conn_type == 0) { /* HDMI */
++			hdmi_ai->type		= 0x84;
++			hdmi_ai->ver		= 0x01;
++			hdmi_ai->len		= 0x0a;
++		} else {/* Nvidia DP */
++			hdmi_ai->type		= 0x84;
++			hdmi_ai->ver		= 0x1b;
++			hdmi_ai->len		= 0x11 << 2;
++		}
+ 		hdmi_ai->CC02_CT47	= active_channels - 1;
+ 		hdmi_ai->CA		= ca;
+ 		hdmi_checksum_audio_infoframe(hdmi_ai);
+@@ -3617,6 +3628,7 @@ static int patch_nvhdmi_2ch(struct hda_codec *codec)
+ 	spec->pcm_playback.rates = SUPPORTED_RATES;
+ 	spec->pcm_playback.maxbps = SUPPORTED_MAXBPS;
+ 	spec->pcm_playback.formats = SUPPORTED_FORMATS;
++	spec->nv_dp_workaround = true;
+ 	return 0;
+ }
+ 
+@@ -3756,6 +3768,7 @@ static int patch_nvhdmi(struct hda_codec *codec)
+ 	spec->chmap.ops.chmap_cea_alloc_validate_get_type =
+ 		nvhdmi_chmap_cea_alloc_validate_get_type;
+ 	spec->chmap.ops.chmap_validate = nvhdmi_chmap_validate;
++	spec->nv_dp_workaround = true;
+ 
+ 	codec->link_down_at_suspend = 1;
+ 
+@@ -3779,6 +3792,7 @@ static int patch_nvhdmi_legacy(struct hda_codec *codec)
+ 	spec->chmap.ops.chmap_cea_alloc_validate_get_type =
+ 		nvhdmi_chmap_cea_alloc_validate_get_type;
+ 	spec->chmap.ops.chmap_validate = nvhdmi_chmap_validate;
++	spec->nv_dp_workaround = true;
+ 
+ 	codec->link_down_at_suspend = 1;
+ 
+@@ -3984,6 +3998,7 @@ static int tegra_hdmi_init(struct hda_codec *codec)
+ 
+ 	generic_hdmi_init_per_pins(codec);
+ 
++	codec->depop_delay = 10;
+ 	codec->patch_ops.build_pcms = tegra_hdmi_build_pcms;
+ 	spec->chmap.ops.chmap_cea_alloc_validate_get_type =
+ 		nvhdmi_chmap_cea_alloc_validate_get_type;
+@@ -3992,6 +4007,7 @@ static int tegra_hdmi_init(struct hda_codec *codec)
+ 	spec->chmap.ops.chmap_cea_alloc_validate_get_type =
+ 		nvhdmi_chmap_cea_alloc_validate_get_type;
+ 	spec->chmap.ops.chmap_validate = nvhdmi_chmap_validate;
++	spec->nv_dp_workaround = true;
+ 
+ 	return 0;
+ }
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 799f6bf266dd0..9614b63415a8e 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -7037,6 +7037,8 @@ enum {
+ 	ALC294_FIXUP_ASUS_GU502_HP,
+ 	ALC294_FIXUP_ASUS_GU502_PINS,
+ 	ALC294_FIXUP_ASUS_GU502_VERBS,
++	ALC294_FIXUP_ASUS_G513_PINS,
++	ALC285_FIXUP_ASUS_G533Z_PINS,
+ 	ALC285_FIXUP_HP_GPIO_LED,
+ 	ALC285_FIXUP_HP_MUTE_LED,
+ 	ALC236_FIXUP_HP_GPIO_LED,
+@@ -8374,6 +8376,24 @@ static const struct hda_fixup alc269_fixups[] = {
+ 	[ALC294_FIXUP_ASUS_GU502_HP] = {
+ 		.type = HDA_FIXUP_FUNC,
+ 		.v.func = alc294_fixup_gu502_hp,
++	},
++	 [ALC294_FIXUP_ASUS_G513_PINS] = {
++		.type = HDA_FIXUP_PINS,
++		.v.pins = (const struct hda_pintbl[]) {
++				{ 0x19, 0x03a11050 }, /* front HP mic */
++				{ 0x1a, 0x03a11c30 }, /* rear external mic */
++				{ 0x21, 0x03211420 }, /* front HP out */
++				{ }
++		},
++	},
++	[ALC285_FIXUP_ASUS_G533Z_PINS] = {
++		.type = HDA_FIXUP_PINS,
++		.v.pins = (const struct hda_pintbl[]) {
++			{ 0x14, 0x90170120 },
++			{ }
++		},
++		.chained = true,
++		.chain_id = ALC294_FIXUP_ASUS_G513_PINS,
+ 	},
+ 	[ALC294_FIXUP_ASUS_COEF_1B] = {
+ 		.type = HDA_FIXUP_VERBS,
+@@ -9114,6 +9134,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC),
+ 	SND_PCI_QUIRK(0x1028, 0x0872, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC),
+ 	SND_PCI_QUIRK(0x1028, 0x0873, "Dell Precision 3930", ALC255_FIXUP_DUMMY_LINEOUT_VERB),
++	SND_PCI_QUIRK(0x1028, 0x087d, "Dell Precision 5530", ALC289_FIXUP_DUAL_SPK),
+ 	SND_PCI_QUIRK(0x1028, 0x08ad, "Dell WYSE AIO", ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1028, 0x08ae, "Dell WYSE NB", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
+@@ -9130,6 +9151,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x1028, 0x0a9d, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1028, 0x0a9e, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1028, 0x0b19, "Dell XPS 15 9520", ALC289_FIXUP_DUAL_SPK),
++	SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK),
+ 	SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
+@@ -9257,6 +9279,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
+ 	SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
+ 	SND_PCI_QUIRK(0x103c, 0x88d0, "HP Pavilion 15-eh1xxx (mainboard 88D0)", ALC287_FIXUP_HP_GPIO_LED),
++	SND_PCI_QUIRK(0x103c, 0x8902, "HP OMEN 16", ALC285_FIXUP_HP_MUTE_LED),
+ 	SND_PCI_QUIRK(0x103c, 0x896e, "HP EliteBook x360 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ 	SND_PCI_QUIRK(0x103c, 0x8971, "HP EliteBook 830 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ 	SND_PCI_QUIRK(0x103c, 0x8972, "HP EliteBook 840 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+@@ -9304,10 +9327,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC),
+ 	SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK),
+ 	SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
++	SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK),
++	SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
+ 	SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
+ 	SND_PCI_QUIRK(0x1043, 0x1740, "ASUS UX430UA", ALC295_FIXUP_ASUS_DACS),
+ 	SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK),
+-	SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK),
+ 	SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS),
+ 	SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
+ 	SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC),
+@@ -9323,14 +9347,16 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
+ 	SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
++	SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
+ 	SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC),
++	SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401),
+ 	SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
+ 	SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
+ 	SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
++	SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS),
+ 	SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
++	SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401),
+ 	SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401),
+-	SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401),
+-	SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
+ 	SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2),
+ 	SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
+ 	SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC),
+@@ -9532,6 +9558,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ 	SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
+ 	SND_PCI_QUIRK(0x1849, 0x1233, "ASRock NUC Box 1100", ALC233_FIXUP_NO_AUDIO_JACK),
+ 	SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS),
++	SND_PCI_QUIRK(0x19e5, 0x320f, "Huawei WRT-WX9 ", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE),
+ 	SND_PCI_QUIRK(0x1b35, 0x1235, "CZC B20", ALC269_FIXUP_CZC_B20),
+ 	SND_PCI_QUIRK(0x1b35, 0x1236, "CZC TMI", ALC269_FIXUP_CZC_TMI),
+ 	SND_PCI_QUIRK(0x1b35, 0x1237, "CZC L101", ALC269_FIXUP_CZC_L101),
+diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
+index ff2aa13b7b26f..5d105c44b46df 100644
+--- a/sound/usb/endpoint.c
++++ b/sound/usb/endpoint.c
+@@ -758,8 +758,7 @@ bool snd_usb_endpoint_compatible(struct snd_usb_audio *chip,
+  * The endpoint needs to be closed via snd_usb_endpoint_close() later.
+  *
+  * Note that this function doesn't configure the endpoint.  The substream
+- * needs to set it up later via snd_usb_endpoint_set_params() and
+- * snd_usb_endpoint_prepare().
++ * needs to set it up later via snd_usb_endpoint_configure().
+  */
+ struct snd_usb_endpoint *
+ snd_usb_endpoint_open(struct snd_usb_audio *chip,
+@@ -1293,13 +1292,12 @@ out_of_memory:
+ /*
+  * snd_usb_endpoint_set_params: configure an snd_usb_endpoint
+  *
+- * It's called either from hw_params callback.
+  * Determine the number of URBs to be used on this endpoint.
+  * An endpoint must be configured before it can be started.
+  * An endpoint that is already running can not be reconfigured.
+  */
+-int snd_usb_endpoint_set_params(struct snd_usb_audio *chip,
+-				struct snd_usb_endpoint *ep)
++static int snd_usb_endpoint_set_params(struct snd_usb_audio *chip,
++				       struct snd_usb_endpoint *ep)
+ {
+ 	const struct audioformat *fmt = ep->cur_audiofmt;
+ 	int err;
+@@ -1382,18 +1380,18 @@ static int init_sample_rate(struct snd_usb_audio *chip,
+ }
+ 
+ /*
+- * snd_usb_endpoint_prepare: Prepare the endpoint
++ * snd_usb_endpoint_configure: Configure the endpoint
+  *
+  * This function sets up the EP to be fully usable state.
+- * It's called either from prepare callback.
++ * It's called either from hw_params or prepare callback.
+  * The function checks need_setup flag, and performs nothing unless needed,
+  * so it's safe to call this multiple times.
+  *
+  * This returns zero if unchanged, 1 if the configuration has changed,
+  * or a negative error code.
+  */
+-int snd_usb_endpoint_prepare(struct snd_usb_audio *chip,
+-			     struct snd_usb_endpoint *ep)
++int snd_usb_endpoint_configure(struct snd_usb_audio *chip,
++			       struct snd_usb_endpoint *ep)
+ {
+ 	bool iface_first;
+ 	int err = 0;
+@@ -1414,6 +1412,9 @@ int snd_usb_endpoint_prepare(struct snd_usb_audio *chip,
+ 			if (err < 0)
+ 				goto unlock;
+ 		}
++		err = snd_usb_endpoint_set_params(chip, ep);
++		if (err < 0)
++			goto unlock;
+ 		goto done;
+ 	}
+ 
+@@ -1441,6 +1442,10 @@ int snd_usb_endpoint_prepare(struct snd_usb_audio *chip,
+ 	if (err < 0)
+ 		goto unlock;
+ 
++	err = snd_usb_endpoint_set_params(chip, ep);
++	if (err < 0)
++		goto unlock;
++
+ 	err = snd_usb_select_mode_quirk(chip, ep->cur_audiofmt);
+ 	if (err < 0)
+ 		goto unlock;
+diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h
+index e67ea28faa54f..6a9af04cf175a 100644
+--- a/sound/usb/endpoint.h
++++ b/sound/usb/endpoint.h
+@@ -17,10 +17,8 @@ snd_usb_endpoint_open(struct snd_usb_audio *chip,
+ 		      bool is_sync_ep);
+ void snd_usb_endpoint_close(struct snd_usb_audio *chip,
+ 			    struct snd_usb_endpoint *ep);
+-int snd_usb_endpoint_set_params(struct snd_usb_audio *chip,
+-				struct snd_usb_endpoint *ep);
+-int snd_usb_endpoint_prepare(struct snd_usb_audio *chip,
+-			     struct snd_usb_endpoint *ep);
++int snd_usb_endpoint_configure(struct snd_usb_audio *chip,
++			       struct snd_usb_endpoint *ep);
+ int snd_usb_endpoint_get_clock_rate(struct snd_usb_audio *chip, int clock);
+ 
+ bool snd_usb_endpoint_compatible(struct snd_usb_audio *chip,
+diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
+index 02035b545f9dd..e692ae04436a5 100644
+--- a/sound/usb/pcm.c
++++ b/sound/usb/pcm.c
+@@ -443,17 +443,17 @@ static int configure_endpoints(struct snd_usb_audio *chip,
+ 		if (stop_endpoints(subs, false))
+ 			sync_pending_stops(subs);
+ 		if (subs->sync_endpoint) {
+-			err = snd_usb_endpoint_prepare(chip, subs->sync_endpoint);
++			err = snd_usb_endpoint_configure(chip, subs->sync_endpoint);
+ 			if (err < 0)
+ 				return err;
+ 		}
+-		err = snd_usb_endpoint_prepare(chip, subs->data_endpoint);
++		err = snd_usb_endpoint_configure(chip, subs->data_endpoint);
+ 		if (err < 0)
+ 			return err;
+ 		snd_usb_set_format_quirk(subs, subs->cur_audiofmt);
+ 	} else {
+ 		if (subs->sync_endpoint) {
+-			err = snd_usb_endpoint_prepare(chip, subs->sync_endpoint);
++			err = snd_usb_endpoint_configure(chip, subs->sync_endpoint);
+ 			if (err < 0)
+ 				return err;
+ 		}
+@@ -551,13 +551,7 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
+ 	subs->cur_audiofmt = fmt;
+ 	mutex_unlock(&chip->mutex);
+ 
+-	if (subs->sync_endpoint) {
+-		ret = snd_usb_endpoint_set_params(chip, subs->sync_endpoint);
+-		if (ret < 0)
+-			goto unlock;
+-	}
+-
+-	ret = snd_usb_endpoint_set_params(chip, subs->data_endpoint);
++	ret = configure_endpoints(chip, subs);
+ 
+  unlock:
+ 	if (ret < 0)
+diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
+index 6b1bafe267a42..8ec5b9f344e02 100644
+--- a/tools/lib/perf/evlist.c
++++ b/tools/lib/perf/evlist.c
+@@ -441,6 +441,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+ 
+ 	perf_evlist__for_each_entry(evlist, evsel) {
+ 		bool overwrite = evsel->attr.write_backward;
++		enum fdarray_flags flgs;
+ 		struct perf_mmap *map;
+ 		int *output, fd, cpu;
+ 
+@@ -504,8 +505,8 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+ 
+ 		revent = !overwrite ? POLLIN : 0;
+ 
+-		if (!evsel->system_wide &&
+-		    perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) {
++		flgs = evsel->system_wide ? fdarray_flag__nonfilterable : fdarray_flag__default;
++		if (perf_evlist__add_pollfd(evlist, fd, map, revent, flgs) < 0) {
+ 			perf_mmap__put(map);
+ 			return -1;
+ 		}
+diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c
+index 63b9db6574425..97c69a249c6e4 100644
+--- a/tools/perf/util/bpf_counter_cgroup.c
++++ b/tools/perf/util/bpf_counter_cgroup.c
+@@ -95,7 +95,7 @@ static int bperf_load_program(struct evlist *evlist)
+ 
+ 	perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) {
+ 		link = bpf_program__attach_perf_event(skel->progs.on_cgrp_switch,
+-						      FD(cgrp_switch, cpu.cpu));
++						      FD(cgrp_switch, i));
+ 		if (IS_ERR(link)) {
+ 			pr_err("Failed to attach cgroup program\n");
+ 			err = PTR_ERR(link);
+@@ -123,7 +123,7 @@ static int bperf_load_program(struct evlist *evlist)
+ 
+ 			map_fd = bpf_map__fd(skel->maps.events);
+ 			perf_cpu_map__for_each_cpu(cpu, j, evlist->core.all_cpus) {
+-				int fd = FD(evsel, cpu.cpu);
++				int fd = FD(evsel, j);
+ 				__u32 idx = evsel->core.idx * total_cpus + cpu.cpu;
+ 
+ 				err = bpf_map_update_elem(map_fd, &idx, &fd,
+diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+index 292c430768b52..c72f8ad96f751 100644
+--- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
++++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c
+@@ -176,7 +176,7 @@ static int bperf_cgroup_count(void)
+ }
+ 
+ // This will be attached to cgroup-switches event for each cpu
+-SEC("perf_events")
++SEC("perf_event")
+ int BPF_PROG(on_cgrp_switch)
+ {
+ 	return bperf_cgroup_count();
+diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
+index 953338b9e887e..02cd9f75e3d2f 100644
+--- a/tools/perf/util/genelf.c
++++ b/tools/perf/util/genelf.c
+@@ -251,6 +251,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
+ 	Elf_Data *d;
+ 	Elf_Scn *scn;
+ 	Elf_Ehdr *ehdr;
++	Elf_Phdr *phdr;
+ 	Elf_Shdr *shdr;
+ 	uint64_t eh_frame_base_offset;
+ 	char *strsym = NULL;
+@@ -285,6 +286,19 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym,
+ 	ehdr->e_version = EV_CURRENT;
+ 	ehdr->e_shstrndx= unwinding ? 4 : 2; /* shdr index for section name */
+ 
++	/*
++	 * setup program header
++	 */
++	phdr = elf_newphdr(e, 1);
++	phdr[0].p_type = PT_LOAD;
++	phdr[0].p_offset = 0;
++	phdr[0].p_vaddr = 0;
++	phdr[0].p_paddr = 0;
++	phdr[0].p_filesz = csize;
++	phdr[0].p_memsz = csize;
++	phdr[0].p_flags = PF_X | PF_R;
++	phdr[0].p_align = 8;
++
+ 	/*
+ 	 * setup text section
+ 	 */
+diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
+index ae138afe6c563..b5c909546e3f2 100644
+--- a/tools/perf/util/genelf.h
++++ b/tools/perf/util/genelf.h
+@@ -53,8 +53,10 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
+ 
+ #if GEN_ELF_CLASS == ELFCLASS64
+ #define elf_newehdr	elf64_newehdr
++#define elf_newphdr	elf64_newphdr
+ #define elf_getshdr	elf64_getshdr
+ #define Elf_Ehdr	Elf64_Ehdr
++#define Elf_Phdr	Elf64_Phdr
+ #define Elf_Shdr	Elf64_Shdr
+ #define Elf_Sym		Elf64_Sym
+ #define ELF_ST_TYPE(a)	ELF64_ST_TYPE(a)
+@@ -62,8 +64,10 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
+ #define ELF_ST_VIS(a)	ELF64_ST_VISIBILITY(a)
+ #else
+ #define elf_newehdr	elf32_newehdr
++#define elf_newphdr	elf32_newphdr
+ #define elf_getshdr	elf32_getshdr
+ #define Elf_Ehdr	Elf32_Ehdr
++#define Elf_Phdr	Elf32_Phdr
+ #define Elf_Shdr	Elf32_Shdr
+ #define Elf_Sym		Elf32_Sym
+ #define ELF_ST_TYPE(a)	ELF32_ST_TYPE(a)
+diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
+index 75bec32d4f571..647b7dff8ef36 100644
+--- a/tools/perf/util/symbol-elf.c
++++ b/tools/perf/util/symbol-elf.c
+@@ -2102,8 +2102,8 @@ static int kcore_copy__compare_file(const char *from_dir, const char *to_dir,
+  * unusual.  One significant peculiarity is that the mapping (start -> pgoff)
+  * is not the same for the kernel map and the modules map.  That happens because
+  * the data is copied adjacently whereas the original kcore has gaps.  Finally,
+- * kallsyms and modules files are compared with their copies to check that
+- * modules have not been loaded or unloaded while the copies were taking place.
++ * kallsyms file is compared with its copy to check that modules have not been
++ * loaded or unloaded while the copies were taking place.
+  *
+  * Return: %0 on success, %-1 on failure.
+  */
+@@ -2166,9 +2166,6 @@ int kcore_copy(const char *from_dir, const char *to_dir)
+ 			goto out_extract_close;
+ 	}
+ 
+-	if (kcore_copy__compare_file(from_dir, to_dir, "modules"))
+-		goto out_extract_close;
+-
+ 	if (kcore_copy__compare_file(from_dir, to_dir, "kallsyms"))
+ 		goto out_extract_close;
+ 
+diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
+index 84d17bd4efaed..64e273b2b1b21 100644
+--- a/tools/perf/util/synthetic-events.c
++++ b/tools/perf/util/synthetic-events.c
+@@ -367,13 +367,24 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
+ 					     bool is_kernel)
+ {
+ 	struct build_id bid;
++	struct nsinfo *nsi;
++	struct nscookie nc;
+ 	int rc;
+ 
+-	if (is_kernel)
++	if (is_kernel) {
+ 		rc = sysfs__read_build_id("/sys/kernel/notes", &bid);
+-	else
+-		rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1;
++		goto out;
++	}
++
++	nsi = nsinfo__new(event->pid);
++	nsinfo__mountns_enter(nsi, &nc);
+ 
++	rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1;
++
++	nsinfo__mountns_exit(&nc);
++	nsinfo__put(nsi);
++
++out:
+ 	if (rc == 0) {
+ 		memcpy(event->build_id, bid.data, sizeof(bid.data));
+ 		event->build_id_size = (u8) bid.size;
+diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
+index e714bae473fb4..81f31179ac887 100755
+--- a/tools/testing/selftests/net/forwarding/sch_red.sh
++++ b/tools/testing/selftests/net/forwarding/sch_red.sh
+@@ -1,3 +1,4 @@
++#!/bin/bash
+ # SPDX-License-Identifier: GPL-2.0
+ 
+ # This test sends one stream of traffic from H1 through a TBF shaper, to a RED
diff --git a/sys-kernel/pinephone-sources/files/5.19.8-9.patch b/sys-kernel/pinephone-sources/files/5.19.8-9.patch
new file mode 100644
index 0000000..f12fb56
--- /dev/null
+++ b/sys-kernel/pinephone-sources/files/5.19.8-9.patch
@@ -0,0 +1,8234 @@
+diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
+index 33b04db8408f9..fda97b3fcf018 100644
+--- a/Documentation/arm64/silicon-errata.rst
++++ b/Documentation/arm64/silicon-errata.rst
+@@ -52,6 +52,8 @@ stable kernels.
+ | Allwinner      | A64/R18         | UNKNOWN1        | SUN50I_ERRATUM_UNKNOWN1     |
+ +----------------+-----------------+-----------------+-----------------------------+
+ +----------------+-----------------+-----------------+-----------------------------+
++| ARM            | Cortex-A510     | #2457168        | ARM64_ERRATUM_2457168       |
+++----------------+-----------------+-----------------+-----------------------------+
+ | ARM            | Cortex-A510     | #2064142        | ARM64_ERRATUM_2064142       |
+ +----------------+-----------------+-----------------+-----------------------------+
+ | ARM            | Cortex-A510     | #2038923        | ARM64_ERRATUM_2038923       |
+diff --git a/Documentation/hwmon/asus_ec_sensors.rst b/Documentation/hwmon/asus_ec_sensors.rst
+index 78ca69eda8778..02f4ad314a1eb 100644
+--- a/Documentation/hwmon/asus_ec_sensors.rst
++++ b/Documentation/hwmon/asus_ec_sensors.rst
+@@ -13,12 +13,16 @@ Supported boards:
+  * ROG CROSSHAIR VIII FORMULA
+  * ROG CROSSHAIR VIII HERO
+  * ROG CROSSHAIR VIII IMPACT
++ * ROG MAXIMUS XI HERO
++ * ROG MAXIMUS XI HERO (WI-FI)
+  * ROG STRIX B550-E GAMING
+  * ROG STRIX B550-I GAMING
+  * ROG STRIX X570-E GAMING
+  * ROG STRIX X570-E GAMING WIFI II
+  * ROG STRIX X570-F GAMING
+  * ROG STRIX X570-I GAMING
++ * ROG STRIX Z690-A GAMING WIFI D4
++ * ROG ZENITH II EXTREME
+ 
+ Authors:
+     - Eugene Shalygin <eugene.shalygin@gmail.com>
+diff --git a/Makefile b/Makefile
+index e361c6230e9e5..1f27c4bd09e67 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 5
+ PATCHLEVEL = 19
+-SUBLEVEL = 8
++SUBLEVEL = 9
+ EXTRAVERSION =
+ NAME = Superb Owl
+ 
+@@ -1286,8 +1286,7 @@ hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj
+ 
+ PHONY += headers
+ headers: $(version_h) scripts_unifdef uapi-asm-generic archheaders archscripts
+-	$(if $(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/Kbuild),, \
+-	  $(error Headers not exportable for the $(SRCARCH) architecture))
++	$(if $(filter um, $(SRCARCH)), $(error Headers not exportable for UML))
+ 	$(Q)$(MAKE) $(hdr-inst)=include/uapi
+ 	$(Q)$(MAKE) $(hdr-inst)=arch/$(SRCARCH)/include/uapi
+ 
+diff --git a/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi b/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi
+index ba621783acdbc..d6f364c6be94b 100644
+--- a/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi
++++ b/arch/arm/boot/dts/at91-sama5d27_wlsom1.dtsi
+@@ -76,8 +76,8 @@
+ 		regulators {
+ 			vdd_3v3: VDD_IO {
+ 				regulator-name = "VDD_IO";
+-				regulator-min-microvolt = <1200000>;
+-				regulator-max-microvolt = <3700000>;
++				regulator-min-microvolt = <3300000>;
++				regulator-max-microvolt = <3300000>;
+ 				regulator-initial-mode = <2>;
+ 				regulator-allowed-modes = <2>, <4>;
+ 				regulator-always-on;
+@@ -95,8 +95,8 @@
+ 
+ 			vddio_ddr: VDD_DDR {
+ 				regulator-name = "VDD_DDR";
+-				regulator-min-microvolt = <600000>;
+-				regulator-max-microvolt = <1850000>;
++				regulator-min-microvolt = <1200000>;
++				regulator-max-microvolt = <1200000>;
+ 				regulator-initial-mode = <2>;
+ 				regulator-allowed-modes = <2>, <4>;
+ 				regulator-always-on;
+@@ -118,8 +118,8 @@
+ 
+ 			vdd_core: VDD_CORE {
+ 				regulator-name = "VDD_CORE";
+-				regulator-min-microvolt = <600000>;
+-				regulator-max-microvolt = <1850000>;
++				regulator-min-microvolt = <1250000>;
++				regulator-max-microvolt = <1250000>;
+ 				regulator-initial-mode = <2>;
+ 				regulator-allowed-modes = <2>, <4>;
+ 				regulator-always-on;
+@@ -160,8 +160,8 @@
+ 
+ 			LDO1 {
+ 				regulator-name = "LDO1";
+-				regulator-min-microvolt = <1200000>;
+-				regulator-max-microvolt = <3700000>;
++				regulator-min-microvolt = <3300000>;
++				regulator-max-microvolt = <3300000>;
+ 				regulator-always-on;
+ 
+ 				regulator-state-standby {
+@@ -175,9 +175,8 @@
+ 
+ 			LDO2 {
+ 				regulator-name = "LDO2";
+-				regulator-min-microvolt = <1200000>;
+-				regulator-max-microvolt = <3700000>;
+-				regulator-always-on;
++				regulator-min-microvolt = <1800000>;
++				regulator-max-microvolt = <3300000>;
+ 
+ 				regulator-state-standby {
+ 					regulator-on-in-suspend;
+diff --git a/arch/arm/boot/dts/at91-sama5d2_icp.dts b/arch/arm/boot/dts/at91-sama5d2_icp.dts
+index 164201a8fbf2d..492456e195a37 100644
+--- a/arch/arm/boot/dts/at91-sama5d2_icp.dts
++++ b/arch/arm/boot/dts/at91-sama5d2_icp.dts
+@@ -197,8 +197,8 @@
+ 			regulators {
+ 				vdd_io_reg: VDD_IO {
+ 					regulator-name = "VDD_IO";
+-					regulator-min-microvolt = <1200000>;
+-					regulator-max-microvolt = <3700000>;
++					regulator-min-microvolt = <3300000>;
++					regulator-max-microvolt = <3300000>;
+ 					regulator-initial-mode = <2>;
+ 					regulator-allowed-modes = <2>, <4>;
+ 					regulator-always-on;
+@@ -216,8 +216,8 @@
+ 
+ 				VDD_DDR {
+ 					regulator-name = "VDD_DDR";
+-					regulator-min-microvolt = <600000>;
+-					regulator-max-microvolt = <1850000>;
++					regulator-min-microvolt = <1350000>;
++					regulator-max-microvolt = <1350000>;
+ 					regulator-initial-mode = <2>;
+ 					regulator-allowed-modes = <2>, <4>;
+ 					regulator-always-on;
+@@ -235,8 +235,8 @@
+ 
+ 				VDD_CORE {
+ 					regulator-name = "VDD_CORE";
+-					regulator-min-microvolt = <600000>;
+-					regulator-max-microvolt = <1850000>;
++					regulator-min-microvolt = <1250000>;
++					regulator-max-microvolt = <1250000>;
+ 					regulator-initial-mode = <2>;
+ 					regulator-allowed-modes = <2>, <4>;
+ 					regulator-always-on;
+@@ -258,7 +258,6 @@
+ 					regulator-max-microvolt = <1850000>;
+ 					regulator-initial-mode = <2>;
+ 					regulator-allowed-modes = <2>, <4>;
+-					regulator-always-on;
+ 
+ 					regulator-state-standby {
+ 						regulator-on-in-suspend;
+@@ -273,8 +272,8 @@
+ 
+ 				LDO1 {
+ 					regulator-name = "LDO1";
+-					regulator-min-microvolt = <1200000>;
+-					regulator-max-microvolt = <3700000>;
++					regulator-min-microvolt = <2500000>;
++					regulator-max-microvolt = <2500000>;
+ 					regulator-always-on;
+ 
+ 					regulator-state-standby {
+@@ -288,8 +287,8 @@
+ 
+ 				LDO2 {
+ 					regulator-name = "LDO2";
+-					regulator-min-microvolt = <1200000>;
+-					regulator-max-microvolt = <3700000>;
++					regulator-min-microvolt = <3300000>;
++					regulator-max-microvolt = <3300000>;
+ 					regulator-always-on;
+ 
+ 					regulator-state-standby {
+diff --git a/arch/arm/boot/dts/at91-sama7g5ek.dts b/arch/arm/boot/dts/at91-sama7g5ek.dts
+index 103544620fd7c..b261b4da08502 100644
+--- a/arch/arm/boot/dts/at91-sama7g5ek.dts
++++ b/arch/arm/boot/dts/at91-sama7g5ek.dts
+@@ -244,8 +244,8 @@
+ 			regulators {
+ 				vdd_3v3: VDD_IO {
+ 					regulator-name = "VDD_IO";
+-					regulator-min-microvolt = <1200000>;
+-					regulator-max-microvolt = <3700000>;
++					regulator-min-microvolt = <3300000>;
++					regulator-max-microvolt = <3300000>;
+ 					regulator-initial-mode = <2>;
+ 					regulator-allowed-modes = <2>, <4>;
+ 					regulator-always-on;
+@@ -264,8 +264,8 @@
+ 
+ 				vddioddr: VDD_DDR {
+ 					regulator-name = "VDD_DDR";
+-					regulator-min-microvolt = <1300000>;
+-					regulator-max-microvolt = <1450000>;
++					regulator-min-microvolt = <1350000>;
++					regulator-max-microvolt = <1350000>;
+ 					regulator-initial-mode = <2>;
+ 					regulator-allowed-modes = <2>, <4>;
+ 					regulator-always-on;
+@@ -285,8 +285,8 @@
+ 
+ 				vddcore: VDD_CORE {
+ 					regulator-name = "VDD_CORE";
+-					regulator-min-microvolt = <1100000>;
+-					regulator-max-microvolt = <1850000>;
++					regulator-min-microvolt = <1150000>;
++					regulator-max-microvolt = <1150000>;
+ 					regulator-initial-mode = <2>;
+ 					regulator-allowed-modes = <2>, <4>;
+ 					regulator-always-on;
+@@ -306,7 +306,7 @@
+ 				vddcpu: VDD_OTHER {
+ 					regulator-name = "VDD_OTHER";
+ 					regulator-min-microvolt = <1050000>;
+-					regulator-max-microvolt = <1850000>;
++					regulator-max-microvolt = <1250000>;
+ 					regulator-initial-mode = <2>;
+ 					regulator-allowed-modes = <2>, <4>;
+ 					regulator-ramp-delay = <3125>;
+@@ -326,8 +326,8 @@
+ 
+ 				vldo1: LDO1 {
+ 					regulator-name = "LDO1";
+-					regulator-min-microvolt = <1200000>;
+-					regulator-max-microvolt = <3700000>;
++					regulator-min-microvolt = <1800000>;
++					regulator-max-microvolt = <1800000>;
+ 					regulator-always-on;
+ 
+ 					regulator-state-standby {
+diff --git a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi
+index 095c9143d99a3..6b791d515e294 100644
+--- a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi
+@@ -51,16 +51,6 @@
+ 		vin-supply = <&reg_3p3v_s5>;
+ 	};
+ 
+-	reg_3p3v_s0: regulator-3p3v-s0 {
+-		compatible = "regulator-fixed";
+-		regulator-name = "V_3V3_S0";
+-		regulator-min-microvolt = <3300000>;
+-		regulator-max-microvolt = <3300000>;
+-		regulator-always-on;
+-		regulator-boot-on;
+-		vin-supply = <&reg_3p3v_s5>;
+-	};
+-
+ 	reg_3p3v_s5: regulator-3p3v-s5 {
+ 		compatible = "regulator-fixed";
+ 		regulator-name = "V_3V3_S5";
+@@ -259,7 +249,7 @@
+ 
+ 	/* default boot source: workaround #1 for errata ERR006282 */
+ 	smarc_flash: flash@0 {
+-		compatible = "winbond,w25q16dw", "jedec,spi-nor";
++		compatible = "jedec,spi-nor";
+ 		reg = <0>;
+ 		spi-max-frequency = <20000000>;
+ 	};
+diff --git a/arch/arm/boot/dts/imx6qdl-vicut1.dtsi b/arch/arm/boot/dts/imx6qdl-vicut1.dtsi
+index a1676b5d2980f..c5a98b0110dd3 100644
+--- a/arch/arm/boot/dts/imx6qdl-vicut1.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-vicut1.dtsi
+@@ -28,7 +28,7 @@
+ 		enable-gpios = <&gpio4 28 GPIO_ACTIVE_HIGH>;
+ 	};
+ 
+-	backlight_led: backlight_led {
++	backlight_led: backlight-led {
+ 		compatible = "pwm-backlight";
+ 		pwms = <&pwm3 0 5000000 0>;
+ 		brightness-levels = <0 16 64 255>;
+diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
+index df6d673e83d56..f4501dea98b04 100644
+--- a/arch/arm/mach-at91/pm.c
++++ b/arch/arm/mach-at91/pm.c
+@@ -541,9 +541,41 @@ extern u32 at91_pm_suspend_in_sram_sz;
+ 
+ static int at91_suspend_finish(unsigned long val)
+ {
++	unsigned char modified_gray_code[] = {
++		0x00, 0x01, 0x02, 0x03, 0x06, 0x07, 0x04, 0x05, 0x0c, 0x0d,
++		0x0e, 0x0f, 0x0a, 0x0b, 0x08, 0x09, 0x18, 0x19, 0x1a, 0x1b,
++		0x1e, 0x1f, 0x1c, 0x1d, 0x14, 0x15, 0x16, 0x17, 0x12, 0x13,
++		0x10, 0x11,
++	};
++	unsigned int tmp, index;
+ 	int i;
+ 
+ 	if (soc_pm.data.mode == AT91_PM_BACKUP && soc_pm.data.ramc_phy) {
++		/*
++		 * Bootloader will perform DDR recalibration and will try to
++		 * restore the ZQ0SR0 with the value saved here. But the
++		 * calibration is buggy and restoring some values from ZQ0SR0
++		 * is forbidden and risky thus we need to provide processed
++		 * values for these (modified gray code values).
++		 */
++		tmp = readl(soc_pm.data.ramc_phy + DDR3PHY_ZQ0SR0);
++
++		/* Store pull-down output impedance select. */
++		index = (tmp >> DDR3PHY_ZQ0SR0_PDO_OFF) & 0x1f;
++		soc_pm.bu->ddr_phy_calibration[0] = modified_gray_code[index];
++
++		/* Store pull-up output impedance select. */
++		index = (tmp >> DDR3PHY_ZQ0SR0_PUO_OFF) & 0x1f;
++		soc_pm.bu->ddr_phy_calibration[0] |= modified_gray_code[index];
++
++		/* Store pull-down on-die termination impedance select. */
++		index = (tmp >> DDR3PHY_ZQ0SR0_PDODT_OFF) & 0x1f;
++		soc_pm.bu->ddr_phy_calibration[0] |= modified_gray_code[index];
++
++		/* Store pull-up on-die termination impedance select. */
++		index = (tmp >> DDR3PHY_ZQ0SRO_PUODT_OFF) & 0x1f;
++		soc_pm.bu->ddr_phy_calibration[0] |= modified_gray_code[index];
++
+ 		/*
+ 		 * The 1st 8 words of memory might get corrupted in the process
+ 		 * of DDR PHY recalibration; it is saved here in securam and it
+@@ -1066,10 +1098,6 @@ static int __init at91_pm_backup_init(void)
+ 		of_scan_flat_dt(at91_pm_backup_scan_memcs, &located);
+ 		if (!located)
+ 			goto securam_fail;
+-
+-		/* DDR3PHY_ZQ0SR0 */
+-		soc_pm.bu->ddr_phy_calibration[0] = readl(soc_pm.data.ramc_phy +
+-							  0x188);
+ 	}
+ 
+ 	return 0;
+diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S
+index abe4ced33edaf..ffed4d9490428 100644
+--- a/arch/arm/mach-at91/pm_suspend.S
++++ b/arch/arm/mach-at91/pm_suspend.S
+@@ -172,9 +172,15 @@ sr_ena_2:
+ 	/* Put DDR PHY's DLL in bypass mode for non-backup modes. */
+ 	cmp	r7, #AT91_PM_BACKUP
+ 	beq	sr_ena_3
+-	ldr	tmp1, [r3, #DDR3PHY_PIR]
+-	orr	tmp1, tmp1, #DDR3PHY_PIR_DLLBYP
+-	str	tmp1, [r3, #DDR3PHY_PIR]
++
++	/* Disable DX DLLs. */
++	ldr	tmp1, [r3, #DDR3PHY_DX0DLLCR]
++	orr	tmp1, tmp1, #DDR3PHY_DXDLLCR_DLLDIS
++	str	tmp1, [r3, #DDR3PHY_DX0DLLCR]
++
++	ldr	tmp1, [r3, #DDR3PHY_DX1DLLCR]
++	orr	tmp1, tmp1, #DDR3PHY_DXDLLCR_DLLDIS
++	str	tmp1, [r3, #DDR3PHY_DX1DLLCR]
+ 
+ sr_ena_3:
+ 	/* Power down DDR PHY data receivers. */
+@@ -221,10 +227,14 @@ sr_ena_3:
+ 	bic	tmp1, tmp1, #DDR3PHY_DSGCR_ODTPDD_ODT0
+ 	str	tmp1, [r3, #DDR3PHY_DSGCR]
+ 
+-	/* Take DDR PHY's DLL out of bypass mode. */
+-	ldr	tmp1, [r3, #DDR3PHY_PIR]
+-	bic	tmp1, tmp1, #DDR3PHY_PIR_DLLBYP
+-	str	tmp1, [r3, #DDR3PHY_PIR]
++	/* Enable DX DLLs. */
++	ldr	tmp1, [r3, #DDR3PHY_DX0DLLCR]
++	bic	tmp1, tmp1, #DDR3PHY_DXDLLCR_DLLDIS
++	str	tmp1, [r3, #DDR3PHY_DX0DLLCR]
++
++	ldr	tmp1, [r3, #DDR3PHY_DX1DLLCR]
++	bic	tmp1, tmp1, #DDR3PHY_DXDLLCR_DLLDIS
++	str	tmp1, [r3, #DDR3PHY_DX1DLLCR]
+ 
+ 	/* Enable quasi-dynamic programming. */
+ 	mov	tmp1, #0
+diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
+index 001eaba5a6b4b..cc1e7bb49d38b 100644
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -914,6 +914,23 @@ config ARM64_ERRATUM_1902691
+ 
+ 	  If unsure, say Y.
+ 
++config ARM64_ERRATUM_2457168
++	bool "Cortex-A510: 2457168: workaround for AMEVCNTR01 incrementing incorrectly"
++	depends on ARM64_AMU_EXTN
++	default y
++	help
++	  This option adds the workaround for ARM Cortex-A510 erratum 2457168.
++
++	  The AMU counter AMEVCNTR01 (constant counter) should increment at the same rate
++	  as the system counter. On affected Cortex-A510 cores AMEVCNTR01 increments
++	  incorrectly giving a significantly higher output value.
++
++	  Work around this problem by returning 0 when reading the affected counter in
++	  key locations that results in disabling all users of this counter. This effect
++	  is the same to firmware disabling affected counters.
++
++	  If unsure, say Y.
++
+ config CAVIUM_ERRATUM_22375
+ 	bool "Cavium erratum 22375, 24313"
+ 	default y
+@@ -1867,6 +1884,8 @@ config ARM64_BTI_KERNEL
+ 	depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI
+ 	# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697
+ 	depends on !CC_IS_GCC || GCC_VERSION >= 100100
++	# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106671
++	depends on !CC_IS_GCC
+ 	# https://github.com/llvm/llvm-project/commit/a88c722e687e6780dcd6a58718350dc76fcc4cc9
+ 	depends on !CC_IS_CLANG || CLANG_VERSION >= 120000
+ 	depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
+diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds-65bb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds-65bb.dts
+index 40d34c8384a5e..b949cac037427 100644
+--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds-65bb.dts
++++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds-65bb.dts
+@@ -25,7 +25,6 @@
+ &enetc_port0 {
+ 	phy-handle = <&slot1_sgmii>;
+ 	phy-mode = "2500base-x";
+-	managed = "in-band-status";
+ 	status = "okay";
+ };
+ 
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts
+index 24737e89038a4..96cac0f969a77 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7901.dts
+@@ -626,24 +626,28 @@
+ 			lan1: port@0 {
+ 				reg = <0>;
+ 				label = "lan1";
++				phy-mode = "internal";
+ 				local-mac-address = [00 00 00 00 00 00];
+ 			};
+ 
+ 			lan2: port@1 {
+ 				reg = <1>;
+ 				label = "lan2";
++				phy-mode = "internal";
+ 				local-mac-address = [00 00 00 00 00 00];
+ 			};
+ 
+ 			lan3: port@2 {
+ 				reg = <2>;
+ 				label = "lan3";
++				phy-mode = "internal";
+ 				local-mac-address = [00 00 00 00 00 00];
+ 			};
+ 
+ 			lan4: port@3 {
+ 				reg = <3>;
+ 				label = "lan4";
++				phy-mode = "internal";
+ 				local-mac-address = [00 00 00 00 00 00];
+ 			};
+ 
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
+index eafa88d980b32..c2d4da25482ff 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi
+@@ -32,10 +32,10 @@
+ 	};
+ 
+ 	/* Fixed clock dedicated to SPI CAN controller */
+-	clk20m: oscillator {
++	clk40m: oscillator {
+ 		compatible = "fixed-clock";
+ 		#clock-cells = <0>;
+-		clock-frequency = <20000000>;
++		clock-frequency = <40000000>;
+ 	};
+ 
+ 	gpio-keys {
+@@ -194,8 +194,8 @@
+ 
+ 	can1: can@0 {
+ 		compatible = "microchip,mcp251xfd";
+-		clocks = <&clk20m>;
+-		interrupts-extended = <&gpio1 6 IRQ_TYPE_EDGE_FALLING>;
++		clocks = <&clk40m>;
++		interrupts-extended = <&gpio1 6 IRQ_TYPE_LEVEL_LOW>;
+ 		pinctrl-names = "default";
+ 		pinctrl-0 = <&pinctrl_can1_int>;
+ 		reg = <0>;
+@@ -595,7 +595,7 @@
+ 		pinctrl-0 = <&pinctrl_gpio_9_dsi>, <&pinctrl_i2s_2_bclk_touch_reset>;
+ 		reg = <0x4a>;
+ 		/* Verdin I2S_2_BCLK (TOUCH_RESET#, SODIMM 42) */
+-		reset-gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
++		reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>;
+ 		status = "disabled";
+ 	};
+ 
+@@ -737,6 +737,7 @@
+ };
+ 
+ &usbphynop2 {
++	power-domains = <&pgc_otg2>;
+ 	vcc-supply = <&reg_vdd_3v3>;
+ };
+ 
+diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
+index 521215520a0f4..6630ec561dc25 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw74xx.dts
+@@ -770,10 +770,10 @@
+ 
+ 	pinctrl_sai2: sai2grp {
+ 		fsl,pins = <
+-			MX8MP_IOMUXC_SAI2_TXFS__AUDIOMIX_SAI2_TX_SYNC
+-			MX8MP_IOMUXC_SAI2_TXD0__AUDIOMIX_SAI2_TX_DATA00
+-			MX8MP_IOMUXC_SAI2_TXC__AUDIOMIX_SAI2_TX_BCLK
+-			MX8MP_IOMUXC_SAI2_MCLK__AUDIOMIX_SAI2_MCLK
++			MX8MP_IOMUXC_SAI2_TXFS__AUDIOMIX_SAI2_TX_SYNC	0xd6
++			MX8MP_IOMUXC_SAI2_TXD0__AUDIOMIX_SAI2_TX_DATA00	0xd6
++			MX8MP_IOMUXC_SAI2_TXC__AUDIOMIX_SAI2_TX_BCLK	0xd6
++			MX8MP_IOMUXC_SAI2_MCLK__AUDIOMIX_SAI2_MCLK	0xd6
+ 		>;
+ 	};
+ 
+diff --git a/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi
+index fb17e329cd370..f5323291a9b24 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mp-verdin.dtsi
+@@ -620,7 +620,7 @@
+ 		interrupts = <5 IRQ_TYPE_EDGE_FALLING>;
+ 		reg = <0x4a>;
+ 		/* Verdin GPIO_2 (SODIMM 208) */
+-		reset-gpios = <&gpio1 1 GPIO_ACTIVE_HIGH>;
++		reset-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>;
+ 		status = "disabled";
+ 	};
+ };
+@@ -697,7 +697,7 @@
+ 		pinctrl-0 = <&pinctrl_gpio_9_dsi>, <&pinctrl_i2s_2_bclk_touch_reset>;
+ 		reg = <0x4a>;
+ 		/* Verdin I2S_2_BCLK (TOUCH_RESET#, SODIMM 42) */
+-		reset-gpios = <&gpio5 0 GPIO_ACTIVE_HIGH>;
++		reset-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>;
+ 		status = "disabled";
+ 	};
+ 
+diff --git a/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi
+index 899e8e7dbc24f..802ad6e5cef61 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mq-tqma8mq.dtsi
+@@ -204,7 +204,6 @@
+ 		reg = <0x51>;
+ 		pinctrl-names = "default";
+ 		pinctrl-0 = <&pinctrl_rtc>;
+-		interrupt-names = "irq";
+ 		interrupt-parent = <&gpio1>;
+ 		interrupts = <1 IRQ_TYPE_EDGE_FALLING>;
+ 		quartz-load-femtofarads = <7000>;
+diff --git a/arch/arm64/boot/dts/renesas/r8a779g0.dtsi b/arch/arm64/boot/dts/renesas/r8a779g0.dtsi
+index 7cbb0de060ddc..1c15726cff8bf 100644
+--- a/arch/arm64/boot/dts/renesas/r8a779g0.dtsi
++++ b/arch/arm64/boot/dts/renesas/r8a779g0.dtsi
+@@ -85,7 +85,7 @@
+ 				     "renesas,rcar-gen4-hscif",
+ 				     "renesas,hscif";
+ 			reg = <0 0xe6540000 0 96>;
+-			interrupts = <GIC_SPI 245 IRQ_TYPE_LEVEL_HIGH>;
++			interrupts = <GIC_SPI 246 IRQ_TYPE_LEVEL_HIGH>;
+ 			clocks = <&cpg CPG_MOD 514>,
+ 				 <&cpg CPG_CORE R8A779G0_CLK_S0D3_PER>,
+ 				 <&scif_clk>;
+diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
+index 5f4117dae8888..af137f91607da 100644
+--- a/arch/arm64/kernel/cpu_errata.c
++++ b/arch/arm64/kernel/cpu_errata.c
+@@ -656,6 +656,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
+ 		ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2)
+ 	},
+ #endif
++#ifdef CONFIG_ARM64_ERRATUM_2457168
++	{
++		.desc = "ARM erratum 2457168",
++		.capability = ARM64_WORKAROUND_2457168,
++		.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE,
++
++		/* Cortex-A510 r0p0-r1p1 */
++		CAP_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1)
++	},
++#endif
+ #ifdef CONFIG_ARM64_ERRATUM_2038923
+ 	{
+ 		.desc = "ARM erratum 2038923",
+diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
+index ebdfbd1cf207b..f34c9f8b9ee0a 100644
+--- a/arch/arm64/kernel/cpufeature.c
++++ b/arch/arm64/kernel/cpufeature.c
+@@ -1798,7 +1798,10 @@ static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap)
+ 		pr_info("detected CPU%d: Activity Monitors Unit (AMU)\n",
+ 			smp_processor_id());
+ 		cpumask_set_cpu(smp_processor_id(), &amu_cpus);
+-		update_freq_counters_refs();
++
++		/* 0 reference values signal broken/disabled counters */
++		if (!this_cpu_has_cap(ARM64_WORKAROUND_2457168))
++			update_freq_counters_refs();
+ 	}
+ }
+ 
+diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
+index af5df48ba915b..2e248342476ea 100644
+--- a/arch/arm64/kernel/hibernate.c
++++ b/arch/arm64/kernel/hibernate.c
+@@ -300,6 +300,11 @@ static void swsusp_mte_restore_tags(void)
+ 		unsigned long pfn = xa_state.xa_index;
+ 		struct page *page = pfn_to_online_page(pfn);
+ 
++		/*
++		 * It is not required to invoke page_kasan_tag_reset(page)
++		 * at this point since the tags stored in page->flags are
++		 * already restored.
++		 */
+ 		mte_restore_page_tags(page_address(page), tags);
+ 
+ 		mte_free_tag_storage(tags);
+diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
+index b2b730233274b..f6b00743c3994 100644
+--- a/arch/arm64/kernel/mte.c
++++ b/arch/arm64/kernel/mte.c
+@@ -48,6 +48,15 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte,
+ 	if (!pte_is_tagged)
+ 		return;
+ 
++	page_kasan_tag_reset(page);
++	/*
++	 * We need smp_wmb() in between setting the flags and clearing the
++	 * tags because if another thread reads page->flags and builds a
++	 * tagged address out of it, there is an actual dependency to the
++	 * memory access, but on the current thread we do not guarantee that
++	 * the new page->flags are visible before the tags were updated.
++	 */
++	smp_wmb();
+ 	mte_clear_page_tags(page_address(page));
+ }
+ 
+diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
+index 9ab78ad826e2a..707b5451929d4 100644
+--- a/arch/arm64/kernel/topology.c
++++ b/arch/arm64/kernel/topology.c
+@@ -310,12 +310,25 @@ core_initcall(init_amu_fie);
+ 
+ static void cpu_read_corecnt(void *val)
+ {
++	/*
++	 * A value of 0 can be returned if the current CPU does not support AMUs
++	 * or if the counter is disabled for this CPU. A return value of 0 at
++	 * counter read is properly handled as an error case by the users of the
++	 * counter.
++	 */
+ 	*(u64 *)val = read_corecnt();
+ }
+ 
+ static void cpu_read_constcnt(void *val)
+ {
+-	*(u64 *)val = read_constcnt();
++	/*
++	 * Return 0 if the current CPU is affected by erratum 2457168. A value
++	 * of 0 is also returned if the current CPU does not support AMUs or if
++	 * the counter is disabled. A return value of 0 at counter read is
++	 * properly handled as an error case by the users of the counter.
++	 */
++	*(u64 *)val = this_cpu_has_cap(ARM64_WORKAROUND_2457168) ?
++		      0UL : read_constcnt();
+ }
+ 
+ static inline
+@@ -342,7 +355,22 @@ int counters_read_on_cpu(int cpu, smp_call_func_t func, u64 *val)
+  */
+ bool cpc_ffh_supported(void)
+ {
+-	return freq_counters_valid(get_cpu_with_amu_feat());
++	int cpu = get_cpu_with_amu_feat();
++
++	/*
++	 * FFH is considered supported if there is at least one present CPU that
++	 * supports AMUs. Using FFH to read core and reference counters for CPUs
++	 * that do not support AMUs, have counters disabled or that are affected
++	 * by errata, will result in a return value of 0.
++	 *
++	 * This is done to allow any enabled and valid counters to be read
++	 * through FFH, knowing that potentially returning 0 as counter value is
++	 * properly handled by the users of these counters.
++	 */
++	if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask))
++		return false;
++
++	return true;
+ }
+ 
+ int cpc_read_ffh(int cpu, struct cpc_reg *reg, u64 *val)
+diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
+index 24913271e898c..0dea80bf6de46 100644
+--- a/arch/arm64/mm/copypage.c
++++ b/arch/arm64/mm/copypage.c
+@@ -23,6 +23,15 @@ void copy_highpage(struct page *to, struct page *from)
+ 
+ 	if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) {
+ 		set_bit(PG_mte_tagged, &to->flags);
++		page_kasan_tag_reset(to);
++		/*
++		 * We need smp_wmb() in between setting the flags and clearing the
++		 * tags because if another thread reads page->flags and builds a
++		 * tagged address out of it, there is an actual dependency to the
++		 * memory access, but on the current thread we do not guarantee that
++		 * the new page->flags are visible before the tags were updated.
++		 */
++		smp_wmb();
+ 		mte_copy_page_tags(kto, kfrom);
+ 	}
+ }
+diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
+index 4334dec93bd44..a9e50e930484a 100644
+--- a/arch/arm64/mm/mteswap.c
++++ b/arch/arm64/mm/mteswap.c
+@@ -53,6 +53,15 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page)
+ 	if (!tags)
+ 		return false;
+ 
++	page_kasan_tag_reset(page);
++	/*
++	 * We need smp_wmb() in between setting the flags and clearing the
++	 * tags because if another thread reads page->flags and builds a
++	 * tagged address out of it, there is an actual dependency to the
++	 * memory access, but on the current thread we do not guarantee that
++	 * the new page->flags are visible before the tags were updated.
++	 */
++	smp_wmb();
+ 	mte_restore_page_tags(page_address(page), tags);
+ 
+ 	return true;
+diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
+index 8809e14cf86a2..18999f46df19f 100644
+--- a/arch/arm64/tools/cpucaps
++++ b/arch/arm64/tools/cpucaps
+@@ -66,6 +66,7 @@ WORKAROUND_1902691
+ WORKAROUND_2038923
+ WORKAROUND_2064142
+ WORKAROUND_2077057
++WORKAROUND_2457168
+ WORKAROUND_TRBE_OVERWRITE_FILL_MODE
+ WORKAROUND_TSB_FLUSH_FAILURE
+ WORKAROUND_TRBE_WRITE_OUT_OF_RANGE
+diff --git a/arch/mips/loongson32/ls1c/board.c b/arch/mips/loongson32/ls1c/board.c
+index e9de6da0ce51f..9dcfe9de55b0a 100644
+--- a/arch/mips/loongson32/ls1c/board.c
++++ b/arch/mips/loongson32/ls1c/board.c
+@@ -15,7 +15,6 @@ static struct platform_device *ls1c_platform_devices[] __initdata = {
+ static int __init ls1c_platform_init(void)
+ {
+ 	ls1x_serial_set_uartclk(&ls1x_uart_pdev);
+-	ls1x_rtc_set_extclk(&ls1x_rtc_pdev);
+ 
+ 	return platform_add_devices(ls1c_platform_devices,
+ 				   ARRAY_SIZE(ls1c_platform_devices));
+diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h
+index 56ffd260c669b..0ec9cfc5131fc 100644
+--- a/arch/parisc/include/asm/bitops.h
++++ b/arch/parisc/include/asm/bitops.h
+@@ -12,14 +12,6 @@
+ #include <asm/barrier.h>
+ #include <linux/atomic.h>
+ 
+-/* compiler build environment sanity checks: */
+-#if !defined(CONFIG_64BIT) && defined(__LP64__)
+-#error "Please use 'ARCH=parisc' to build the 32-bit kernel."
+-#endif
+-#if defined(CONFIG_64BIT) && !defined(__LP64__)
+-#error "Please use 'ARCH=parisc64' to build the 64-bit kernel."
+-#endif
+-
+ /* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion
+  * on use of volatile and __*_bit() (set/clear/change):
+  *	*_bit() want use of volatile.
+diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
+index e0a9e96576221..fd15fd4bbb61b 100644
+--- a/arch/parisc/kernel/head.S
++++ b/arch/parisc/kernel/head.S
+@@ -22,7 +22,7 @@
+ #include <linux/init.h>
+ #include <linux/pgtable.h>
+ 
+-	.level	PA_ASM_LEVEL
++	.level	1.1
+ 
+ 	__INITDATA
+ ENTRY(boot_args)
+@@ -70,6 +70,47 @@ $bss_loop:
+ 	stw,ma          %arg2,4(%r1)
+ 	stw,ma          %arg3,4(%r1)
+ 
++#if !defined(CONFIG_64BIT) && defined(CONFIG_PA20)
++	/* This 32-bit kernel was compiled for PA2.0 CPUs. Check current CPU
++	 * and halt kernel if we detect a PA1.x CPU. */
++	ldi		32,%r10
++	mtctl		%r10,%cr11
++	.level 2.0
++	mfctl,w		%cr11,%r10
++	.level 1.1
++	comib,<>,n	0,%r10,$cpu_ok
++
++	load32		PA(msg1),%arg0
++	ldi		msg1_end-msg1,%arg1
++$iodc_panic:
++	copy		%arg0, %r10
++	copy		%arg1, %r11
++	load32		PA(init_stack),%sp
++#define MEM_CONS 0x3A0
++	ldw		MEM_CONS+32(%r0),%arg0	// HPA
++	ldi		ENTRY_IO_COUT,%arg1
++	ldw		MEM_CONS+36(%r0),%arg2	// SPA
++	ldw		MEM_CONS+8(%r0),%arg3	// layers
++	load32		PA(__bss_start),%r1
++	stw		%r1,-52(%sp)		// arg4
++	stw		%r0,-56(%sp)		// arg5
++	stw		%r10,-60(%sp)		// arg6 = ptr to text
++	stw		%r11,-64(%sp)		// arg7 = len
++	stw		%r0,-68(%sp)		// arg8
++	load32		PA(.iodc_panic_ret), %rp
++	ldw		MEM_CONS+40(%r0),%r1	// ENTRY_IODC
++	bv,n		(%r1)
++.iodc_panic_ret:
++	b .				/* wait endless with ... */
++	or		%r10,%r10,%r10	/* qemu idle sleep */
++msg1:	.ascii "Can't boot kernel which was built for PA8x00 CPUs on this machine.\r\n"
++msg1_end:
++
++$cpu_ok:
++#endif
++
++	.level	PA_ASM_LEVEL
++
+ 	/* Initialize startup VM. Just map first 16/32 MB of memory */
+ 	load32		PA(swapper_pg_dir),%r4
+ 	mtctl		%r4,%cr24	/* Initialize kernel root pointer */
+diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi
+index 9f5bce1488d93..9bf37ef379509 100644
+--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi
++++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
+@@ -161,7 +161,7 @@
+ 		ranges;
+ 
+ 		cctrllr: cache-controller@2010000 {
+-			compatible = "sifive,fu540-c000-ccache", "cache";
++			compatible = "microchip,mpfs-ccache", "sifive,fu540-c000-ccache", "cache";
+ 			reg = <0x0 0x2010000 0x0 0x1000>;
+ 			cache-block-size = <64>;
+ 			cache-level = <2>;
+diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
+index 53ed3884fe644..5d66e3947070c 100644
+--- a/arch/s390/kernel/nmi.c
++++ b/arch/s390/kernel/nmi.c
+@@ -63,7 +63,7 @@ static inline unsigned long nmi_get_mcesa_size(void)
+  * structure. The structure is required for machine check happening
+  * early in the boot process.
+  */
+-static struct mcesa boot_mcesa __initdata __aligned(MCESA_MAX_SIZE);
++static struct mcesa boot_mcesa __aligned(MCESA_MAX_SIZE);
+ 
+ void __init nmi_alloc_mcesa_early(u64 *mcesad)
+ {
+diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
+index 0a37f5de28631..3e0361db963ef 100644
+--- a/arch/s390/kernel/setup.c
++++ b/arch/s390/kernel/setup.c
+@@ -486,6 +486,7 @@ static void __init setup_lowcore_dat_off(void)
+ 	put_abs_lowcore(restart_data, lc->restart_data);
+ 	put_abs_lowcore(restart_source, lc->restart_source);
+ 	put_abs_lowcore(restart_psw, lc->restart_psw);
++	put_abs_lowcore(mcesad, lc->mcesad);
+ 
+ 	lc->spinlock_lockval = arch_spin_lockval(0);
+ 	lc->spinlock_index = 0;
+diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
+index 4a23e52fe0ee1..ebc271bb6d8ed 100644
+--- a/arch/x86/include/asm/sev.h
++++ b/arch/x86/include/asm/sev.h
+@@ -195,7 +195,7 @@ void snp_set_memory_shared(unsigned long vaddr, unsigned int npages);
+ void snp_set_memory_private(unsigned long vaddr, unsigned int npages);
+ void snp_set_wakeup_secondary_cpu(void);
+ bool snp_init(struct boot_params *bp);
+-void snp_abort(void);
++void __init __noreturn snp_abort(void);
+ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err);
+ #else
+ static inline void sev_es_ist_enter(struct pt_regs *regs) { }
+diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
+index 4f84c3f11af5b..a428c62330d37 100644
+--- a/arch/x86/kernel/sev.c
++++ b/arch/x86/kernel/sev.c
+@@ -2112,7 +2112,7 @@ bool __init snp_init(struct boot_params *bp)
+ 	return true;
+ }
+ 
+-void __init snp_abort(void)
++void __init __noreturn snp_abort(void)
+ {
+ 	sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
+ }
+diff --git a/block/partitions/core.c b/block/partitions/core.c
+index 8a0ec929023bc..76617b1d2d47f 100644
+--- a/block/partitions/core.c
++++ b/block/partitions/core.c
+@@ -597,6 +597,9 @@ static int blk_add_partitions(struct gendisk *disk)
+ 	if (disk->flags & GENHD_FL_NO_PART)
+ 		return 0;
+ 
++	if (test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
++		return 0;
++
+ 	state = check_partition(disk);
+ 	if (!state)
+ 		return 0;
+diff --git a/drivers/base/driver.c b/drivers/base/driver.c
+index 15a75afe6b845..676b6275d5b53 100644
+--- a/drivers/base/driver.c
++++ b/drivers/base/driver.c
+@@ -63,6 +63,12 @@ int driver_set_override(struct device *dev, const char **override,
+ 	if (len >= (PAGE_SIZE - 1))
+ 		return -EINVAL;
+ 
++	/*
++	 * Compute the real length of the string in case userspace sends us a
++	 * bunch of \0 characters like python likes to do.
++	 */
++	len = strlen(s);
++
+ 	if (!len) {
+ 		/* Empty string passed - clear override */
+ 		device_lock(dev);
+diff --git a/drivers/base/regmap/regmap-spi.c b/drivers/base/regmap/regmap-spi.c
+index 719323bc6c7f1..37ab23a9d0345 100644
+--- a/drivers/base/regmap/regmap-spi.c
++++ b/drivers/base/regmap/regmap-spi.c
+@@ -113,6 +113,7 @@ static const struct regmap_bus *regmap_get_spi_bus(struct spi_device *spi,
+ 						   const struct regmap_config *config)
+ {
+ 	size_t max_size = spi_max_transfer_size(spi);
++	size_t max_msg_size, reg_reserve_size;
+ 	struct regmap_bus *bus;
+ 
+ 	if (max_size != SIZE_MAX) {
+@@ -120,9 +121,16 @@ static const struct regmap_bus *regmap_get_spi_bus(struct spi_device *spi,
+ 		if (!bus)
+ 			return ERR_PTR(-ENOMEM);
+ 
++		max_msg_size = spi_max_message_size(spi);
++		reg_reserve_size = config->reg_bits / BITS_PER_BYTE
++				 + config->pad_bits / BITS_PER_BYTE;
++		if (max_size + reg_reserve_size > max_msg_size)
++			max_size -= reg_reserve_size;
++
+ 		bus->free_on_exit = true;
+ 		bus->max_raw_read = max_size;
+ 		bus->max_raw_write = max_size;
++
+ 		return bus;
+ 	}
+ 
+diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
+index 2cad427741647..f9fd1b6c15d42 100644
+--- a/drivers/cpufreq/cpufreq.c
++++ b/drivers/cpufreq/cpufreq.c
+@@ -532,7 +532,7 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy,
+ 
+ 	target_freq = clamp_val(target_freq, policy->min, policy->max);
+ 
+-	if (!cpufreq_driver->target_index)
++	if (!policy->freq_table)
+ 		return target_freq;
+ 
+ 	idx = cpufreq_frequency_table_target(policy, target_freq, relation);
+diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
+index 4dde8edd53b62..3e8d4b51a8140 100644
+--- a/drivers/firmware/efi/capsule-loader.c
++++ b/drivers/firmware/efi/capsule-loader.c
+@@ -242,29 +242,6 @@ failed:
+ 	return ret;
+ }
+ 
+-/**
+- * efi_capsule_flush - called by file close or file flush
+- * @file: file pointer
+- * @id: not used
+- *
+- *	If a capsule is being partially uploaded then calling this function
+- *	will be treated as upload termination and will free those completed
+- *	buffer pages and -ECANCELED will be returned.
+- **/
+-static int efi_capsule_flush(struct file *file, fl_owner_t id)
+-{
+-	int ret = 0;
+-	struct capsule_info *cap_info = file->private_data;
+-
+-	if (cap_info->index > 0) {
+-		pr_err("capsule upload not complete\n");
+-		efi_free_all_buff_pages(cap_info);
+-		ret = -ECANCELED;
+-	}
+-
+-	return ret;
+-}
+-
+ /**
+  * efi_capsule_release - called by file close
+  * @inode: not used
+@@ -277,6 +254,13 @@ static int efi_capsule_release(struct inode *inode, struct file *file)
+ {
+ 	struct capsule_info *cap_info = file->private_data;
+ 
++	if (cap_info->index > 0 &&
++	    (cap_info->header.headersize == 0 ||
++	     cap_info->count < cap_info->total_size)) {
++		pr_err("capsule upload not complete\n");
++		efi_free_all_buff_pages(cap_info);
++	}
++
+ 	kfree(cap_info->pages);
+ 	kfree(cap_info->phys);
+ 	kfree(file->private_data);
+@@ -324,7 +308,6 @@ static const struct file_operations efi_capsule_fops = {
+ 	.owner = THIS_MODULE,
+ 	.open = efi_capsule_open,
+ 	.write = efi_capsule_write,
+-	.flush = efi_capsule_flush,
+ 	.release = efi_capsule_release,
+ 	.llseek = no_llseek,
+ };
+diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
+index d0537573501e9..2c67f71f23753 100644
+--- a/drivers/firmware/efi/libstub/Makefile
++++ b/drivers/firmware/efi/libstub/Makefile
+@@ -37,6 +37,13 @@ KBUILD_CFLAGS			:= $(cflags-y) -Os -DDISABLE_BRANCH_PROFILING \
+ 				   $(call cc-option,-fno-addrsig) \
+ 				   -D__DISABLE_EXPORTS
+ 
++#
++# struct randomization only makes sense for Linux internal types, which the EFI
++# stub code never touches, so let's turn off struct randomization for the stub
++# altogether
++#
++KBUILD_CFLAGS := $(filter-out $(RANDSTRUCT_CFLAGS), $(KBUILD_CFLAGS))
++
+ # remove SCS flags from all objects in this directory
+ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+ # disable LTO
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 3adebb63680e0..67d4a3c13ed19 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -2482,12 +2482,14 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
+ 			if (!hive->reset_domain ||
+ 			    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
+ 				r = -ENOENT;
++				amdgpu_put_xgmi_hive(hive);
+ 				goto init_failed;
+ 			}
+ 
+ 			/* Drop the early temporary reset domain we created for device */
+ 			amdgpu_reset_put_reset_domain(adev->reset_domain);
+ 			adev->reset_domain = hive->reset_domain;
++			amdgpu_put_xgmi_hive(hive);
+ 		}
+ 	}
+ 
+@@ -4473,8 +4475,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
+ retry:
+ 	amdgpu_amdkfd_pre_reset(adev);
+ 
+-	amdgpu_amdkfd_pre_reset(adev);
+-
+ 	if (from_hypervisor)
+ 		r = amdgpu_virt_request_full_gpu(adev, true);
+ 	else
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+index e9411c28d88ba..2b00f8fe15a89 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+@@ -2612,6 +2612,9 @@ static int psp_hw_fini(void *handle)
+ 		psp_rap_terminate(psp);
+ 		psp_dtm_terminate(psp);
+ 		psp_hdcp_terminate(psp);
++
++		if (adev->gmc.xgmi.num_physical_nodes > 1)
++			psp_xgmi_terminate(psp);
+ 	}
+ 
+ 	psp_asd_terminate(psp);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+index 1b108d03e7859..f2aebbf3fbe38 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+@@ -742,7 +742,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
+ 		amdgpu_put_xgmi_hive(hive);
+ 	}
+ 
+-	return psp_xgmi_terminate(&adev->psp);
++	return 0;
+ }
+ 
+ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+index a4a6751b1e449..30998ac47707c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+@@ -5090,9 +5090,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
+ 		data = REG_SET_FIELD(data, SDMA0_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+ 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+ 
+-		data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+-		data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
+-		WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
++		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
++		if (adev->sdma.num_instances > 1) {
++			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
++			data = REG_SET_FIELD(data, SDMA1_RLC_CGCG_CTRL, CGCG_INT_ENABLE, 1);
++			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
++		}
+ 	} else {
+ 		/* Program RLC_CGCG_CGLS_CTRL */
+ 		def = data = RREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL);
+@@ -5121,9 +5124,12 @@ static void gfx_v11_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
+ 		data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
+ 		WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data);
+ 
+-		data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
+-		data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
+-		WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
++		/* Some ASICs only have one SDMA instance, not need to configure SDMA1 */
++		if (adev->sdma.num_instances > 1) {
++			data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL);
++			data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK;
++			WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data);
++		}
+ 	}
+ }
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+index 5349ca4d19e38..6d8ff3b099422 100644
+--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+@@ -2587,7 +2587,8 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
+ 
+ 	gfx_v9_0_tiling_mode_table_init(adev);
+ 
+-	gfx_v9_0_setup_rb(adev);
++	if (adev->gfx.num_gfx_rings)
++		gfx_v9_0_setup_rb(adev);
+ 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
+ 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
+ 
+diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+index 3f44a099c52a4..3e51e773f92be 100644
+--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+@@ -176,6 +176,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
+ 	tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
+ 	WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
+ 
++	tmp = mmVM_L2_CNTL3_DEFAULT;
+ 	if (adev->gmc.translate_further) {
+ 		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
+ 		tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
+diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+index c7a592d68febf..275bfb8ca6f89 100644
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+@@ -3188,7 +3188,7 @@ void crtc_debugfs_init(struct drm_crtc *crtc)
+ 				   &crc_win_y_end_fops);
+ 	debugfs_create_file_unsafe("crc_win_update", 0644, dir, crtc,
+ 				   &crc_win_update_fops);
+-
++	dput(dir);
+ }
+ #endif
+ /*
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
+index 30c6f9cd717f3..27fbe906682f9 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c
+@@ -41,6 +41,12 @@
+ #define FN(reg_name, field) \
+ 	FD(reg_name##__##field)
+ 
++#include "logger_types.h"
++#undef DC_LOGGER
++#define DC_LOGGER \
++	CTX->logger
++#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
++
+ #define VBIOSSMC_MSG_TestMessage                  0x1
+ #define VBIOSSMC_MSG_GetSmuVersion                0x2
+ #define VBIOSSMC_MSG_PowerUpGfx                   0x3
+@@ -95,7 +101,13 @@ static int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
+ 	uint32_t result;
+ 
+ 	result = rn_smu_wait_for_response(clk_mgr, 10, 200000);
+-	ASSERT(result == VBIOSSMC_Result_OK);
++
++	if (result != VBIOSSMC_Result_OK)
++		smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", result);
++
++	if (result == VBIOSSMC_Status_BUSY) {
++		return -1;
++	}
+ 
+ 	/* First clear response register */
+ 	REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);
+@@ -176,6 +188,10 @@ int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int reque
+ 			VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
+ 			khz_to_mhz_ceil(requested_dcfclk_khz));
+ 
++#ifdef DBG
++	smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", actual_dcfclk_set_mhz, actual_dcfclk_set_mhz * 1000);
++#endif
++
+ 	return actual_dcfclk_set_mhz * 1000;
+ }
+ 
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c
+index 1cae01a91a69d..e4f96b6fd79d0 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/dcn301_smu.c
+@@ -41,6 +41,12 @@
+ #define FN(reg_name, field) \
+ 	FD(reg_name##__##field)
+ 
++#include "logger_types.h"
++#undef DC_LOGGER
++#define DC_LOGGER \
++	CTX->logger
++#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
++
+ #define VBIOSSMC_MSG_GetSmuVersion                0x2
+ #define VBIOSSMC_MSG_SetDispclkFreq               0x4
+ #define VBIOSSMC_MSG_SetDprefclkFreq              0x5
+@@ -96,6 +102,13 @@ static int dcn301_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
+ 
+ 	result = dcn301_smu_wait_for_response(clk_mgr, 10, 200000);
+ 
++	if (result != VBIOSSMC_Result_OK)
++		smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", result);
++
++	if (result == VBIOSSMC_Status_BUSY) {
++		return -1;
++	}
++
+ 	/* First clear response register */
+ 	REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);
+ 
+@@ -167,6 +180,10 @@ int dcn301_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request
+ 			VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
+ 			khz_to_mhz_ceil(requested_dcfclk_khz));
+ 
++#ifdef DBG
++	smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", actual_dcfclk_set_mhz, actual_dcfclk_set_mhz * 1000);
++#endif
++
+ 	return actual_dcfclk_set_mhz * 1000;
+ }
+ 
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
+index c5d7d075026f3..090b2c02aee17 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
+@@ -40,6 +40,12 @@
+ #define FN(reg_name, field) \
+ 	FD(reg_name##__##field)
+ 
++#include "logger_types.h"
++#undef DC_LOGGER
++#define DC_LOGGER \
++	CTX->logger
++#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
++
+ #define VBIOSSMC_MSG_TestMessage                  0x1
+ #define VBIOSSMC_MSG_GetSmuVersion                0x2
+ #define VBIOSSMC_MSG_PowerUpGfx                   0x3
+@@ -102,7 +108,9 @@ static int dcn31_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
+ 	uint32_t result;
+ 
+ 	result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000);
+-	ASSERT(result == VBIOSSMC_Result_OK);
++
++	if (result != VBIOSSMC_Result_OK)
++		smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", result);
+ 
+ 	if (result == VBIOSSMC_Status_BUSY) {
+ 		return -1;
+@@ -194,6 +202,10 @@ int dcn31_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requeste
+ 			VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
+ 			khz_to_mhz_ceil(requested_dcfclk_khz));
+ 
++#ifdef DBG
++	smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", actual_dcfclk_set_mhz, actual_dcfclk_set_mhz * 1000);
++#endif
++
+ 	return actual_dcfclk_set_mhz * 1000;
+ }
+ 
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
+index 2600313fea579..925d6e13620ec 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c
+@@ -70,6 +70,12 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D
+ #define REG_NBIO(reg_name) \
+ 	(NBIO_BASE.instance[0].segment[regBIF_BX_PF2_ ## reg_name ## _BASE_IDX] + regBIF_BX_PF2_ ## reg_name)
+ 
++#include "logger_types.h"
++#undef DC_LOGGER
++#define DC_LOGGER \
++	CTX->logger
++#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
++
+ #define mmMP1_C2PMSG_3                            0x3B1050C
+ 
+ #define VBIOSSMC_MSG_TestMessage                  0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team
+@@ -130,7 +136,9 @@ static int dcn315_smu_send_msg_with_param(
+ 	uint32_t result;
+ 
+ 	result = dcn315_smu_wait_for_response(clk_mgr, 10, 200000);
+-	ASSERT(result == VBIOSSMC_Result_OK);
++
++	if (result != VBIOSSMC_Result_OK)
++		smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", result);
+ 
+ 	if (result == VBIOSSMC_Status_BUSY) {
+ 		return -1;
+@@ -197,6 +205,10 @@ int dcn315_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request
+ 			VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
+ 			khz_to_mhz_ceil(requested_dcfclk_khz));
+ 
++#ifdef DBG
++	smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", actual_dcfclk_set_mhz, actual_dcfclk_set_mhz * 1000);
++#endif
++
+ 	return actual_dcfclk_set_mhz * 1000;
+ }
+ 
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c
+index dceec4b960527..457a9254ae1c8 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_smu.c
+@@ -58,6 +58,12 @@ static const struct IP_BASE MP0_BASE = { { { { 0x00016000, 0x00DC0000, 0x00E0000
+ #define FN(reg_name, field) \
+ 	FD(reg_name##__##field)
+ 
++#include "logger_types.h"
++#undef DC_LOGGER
++#define DC_LOGGER \
++	CTX->logger
++#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
++
+ #define VBIOSSMC_MSG_TestMessage                  0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team
+ #define VBIOSSMC_MSG_GetPmfwVersion               0x02 ///< Get PMFW version
+ #define VBIOSSMC_MSG_Spare0                       0x03 ///< Spare0
+@@ -118,7 +124,9 @@ static int dcn316_smu_send_msg_with_param(
+ 	uint32_t result;
+ 
+ 	result = dcn316_smu_wait_for_response(clk_mgr, 10, 200000);
+-	ASSERT(result == VBIOSSMC_Result_OK);
++
++	if (result != VBIOSSMC_Result_OK)
++		smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", result);
+ 
+ 	if (result == VBIOSSMC_Status_BUSY) {
+ 		return -1;
+@@ -183,6 +191,10 @@ int dcn316_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request
+ 			VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
+ 			khz_to_mhz_ceil(requested_dcfclk_khz));
+ 
++#ifdef DBG
++	smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", actual_dcfclk_set_mhz, actual_dcfclk_set_mhz * 1000);
++#endif
++
+ 	return actual_dcfclk_set_mhz * 1000;
+ }
+ 
+diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
+index 86d670c712867..ad068865ba206 100644
+--- a/drivers/gpu/drm/drm_gem.c
++++ b/drivers/gpu/drm/drm_gem.c
+@@ -168,21 +168,6 @@ void drm_gem_private_object_init(struct drm_device *dev,
+ }
+ EXPORT_SYMBOL(drm_gem_private_object_init);
+ 
+-static void
+-drm_gem_remove_prime_handles(struct drm_gem_object *obj, struct drm_file *filp)
+-{
+-	/*
+-	 * Note: obj->dma_buf can't disappear as long as we still hold a
+-	 * handle reference in obj->handle_count.
+-	 */
+-	mutex_lock(&filp->prime.lock);
+-	if (obj->dma_buf) {
+-		drm_prime_remove_buf_handle_locked(&filp->prime,
+-						   obj->dma_buf);
+-	}
+-	mutex_unlock(&filp->prime.lock);
+-}
+-
+ /**
+  * drm_gem_object_handle_free - release resources bound to userspace handles
+  * @obj: GEM object to clean up.
+@@ -253,7 +238,7 @@ drm_gem_object_release_handle(int id, void *ptr, void *data)
+ 	if (obj->funcs->close)
+ 		obj->funcs->close(obj, file_priv);
+ 
+-	drm_gem_remove_prime_handles(obj, file_priv);
++	drm_prime_remove_buf_handle(&file_priv->prime, id);
+ 	drm_vma_node_revoke(&obj->vma_node, file_priv);
+ 
+ 	drm_gem_object_handle_put_unlocked(obj);
+diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
+index 1fbbc19f1ac09..7bb98e6a446d0 100644
+--- a/drivers/gpu/drm/drm_internal.h
++++ b/drivers/gpu/drm/drm_internal.h
+@@ -74,8 +74,8 @@ int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data,
+ 
+ void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv);
+ void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv);
+-void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv,
+-					struct dma_buf *dma_buf);
++void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
++				 uint32_t handle);
+ 
+ /* drm_drv.c */
+ struct drm_minor *drm_minor_acquire(unsigned int minor_id);
+diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
+index e3f09f18110c7..bd5366b16381b 100644
+--- a/drivers/gpu/drm/drm_prime.c
++++ b/drivers/gpu/drm/drm_prime.c
+@@ -190,29 +190,33 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri
+ 	return -ENOENT;
+ }
+ 
+-void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv,
+-					struct dma_buf *dma_buf)
++void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv,
++				 uint32_t handle)
+ {
+ 	struct rb_node *rb;
+ 
+-	rb = prime_fpriv->dmabufs.rb_node;
++	mutex_lock(&prime_fpriv->lock);
++
++	rb = prime_fpriv->handles.rb_node;
+ 	while (rb) {
+ 		struct drm_prime_member *member;
+ 
+-		member = rb_entry(rb, struct drm_prime_member, dmabuf_rb);
+-		if (member->dma_buf == dma_buf) {
++		member = rb_entry(rb, struct drm_prime_member, handle_rb);
++		if (member->handle == handle) {
+ 			rb_erase(&member->handle_rb, &prime_fpriv->handles);
+ 			rb_erase(&member->dmabuf_rb, &prime_fpriv->dmabufs);
+ 
+-			dma_buf_put(dma_buf);
++			dma_buf_put(member->dma_buf);
+ 			kfree(member);
+-			return;
+-		} else if (member->dma_buf < dma_buf) {
++			break;
++		} else if (member->handle < handle) {
+ 			rb = rb->rb_right;
+ 		} else {
+ 			rb = rb->rb_left;
+ 		}
+ 	}
++
++	mutex_unlock(&prime_fpriv->lock);
+ }
+ 
+ void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv)
+diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
+index 0c5638f5b72bc..91caf4523b34d 100644
+--- a/drivers/gpu/drm/i915/display/intel_bios.c
++++ b/drivers/gpu/drm/i915/display/intel_bios.c
+@@ -478,6 +478,13 @@ init_bdb_block(struct drm_i915_private *i915,
+ 
+ 	block_size = get_blocksize(block);
+ 
++	/*
++	 * Version number and new block size are considered
++	 * part of the header for MIPI sequenece block v3+.
++	 */
++	if (section_id == BDB_MIPI_SEQUENCE && *(const u8 *)block >= 3)
++		block_size += 5;
++
+ 	entry = kzalloc(struct_size(entry, data, max(min_size, block_size) + 3),
+ 			GFP_KERNEL);
+ 	if (!entry) {
+diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+index 9feaf1a589f38..d213d8ad1ea53 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
++++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+@@ -671,6 +671,28 @@ intel_dp_prepare_link_train(struct intel_dp *intel_dp,
+ 	intel_dp_compute_rate(intel_dp, crtc_state->port_clock,
+ 			      &link_bw, &rate_select);
+ 
++	/*
++	 * WaEdpLinkRateDataReload
++	 *
++	 * Parade PS8461E MUX (used on varius TGL+ laptops) needs
++	 * to snoop the link rates reported by the sink when we
++	 * use LINK_RATE_SET in order to operate in jitter cleaning
++	 * mode (as opposed to redriver mode). Unfortunately it
++	 * loses track of the snooped link rates when powered down,
++	 * so we need to make it re-snoop often. Without this high
++	 * link rates are not stable.
++	 */
++	if (!link_bw) {
++		struct intel_connector *connector = intel_dp->attached_connector;
++		__le16 sink_rates[DP_MAX_SUPPORTED_RATES];
++
++		drm_dbg_kms(&i915->drm, "[CONNECTOR:%d:%s] Reloading eDP link rates\n",
++			    connector->base.base.id, connector->base.name);
++
++		drm_dp_dpcd_read(&intel_dp->aux, DP_SUPPORTED_LINK_RATES,
++				 sink_rates, sizeof(sink_rates));
++	}
++
+ 	if (link_bw)
+ 		drm_dbg_kms(&i915->drm,
+ 			    "[ENCODER:%d:%s] Using LINK_BW_SET value %02x\n",
+diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c
+index 40e2e28ee6c75..bf01780e7ea56 100644
+--- a/drivers/gpu/drm/i915/gt/intel_llc.c
++++ b/drivers/gpu/drm/i915/gt/intel_llc.c
+@@ -12,6 +12,7 @@
+ #include "intel_llc.h"
+ #include "intel_mchbar_regs.h"
+ #include "intel_pcode.h"
++#include "intel_rps.h"
+ 
+ struct ia_constants {
+ 	unsigned int min_gpu_freq;
+@@ -55,9 +56,6 @@ static bool get_ia_constants(struct intel_llc *llc,
+ 	if (!HAS_LLC(i915) || IS_DGFX(i915))
+ 		return false;
+ 
+-	if (rps->max_freq <= rps->min_freq)
+-		return false;
+-
+ 	consts->max_ia_freq = cpu_max_MHz();
+ 
+ 	consts->min_ring_freq =
+@@ -65,13 +63,8 @@ static bool get_ia_constants(struct intel_llc *llc,
+ 	/* convert DDR frequency from units of 266.6MHz to bandwidth */
+ 	consts->min_ring_freq = mult_frac(consts->min_ring_freq, 8, 3);
+ 
+-	consts->min_gpu_freq = rps->min_freq;
+-	consts->max_gpu_freq = rps->max_freq;
+-	if (GRAPHICS_VER(i915) >= 9) {
+-		/* Convert GT frequency to 50 HZ units */
+-		consts->min_gpu_freq /= GEN9_FREQ_SCALER;
+-		consts->max_gpu_freq /= GEN9_FREQ_SCALER;
+-	}
++	consts->min_gpu_freq = intel_rps_get_min_raw_freq(rps);
++	consts->max_gpu_freq = intel_rps_get_max_raw_freq(rps);
+ 
+ 	return true;
+ }
+@@ -131,6 +124,12 @@ static void gen6_update_ring_freq(struct intel_llc *llc)
+ 	if (!get_ia_constants(llc, &consts))
+ 		return;
+ 
++	/*
++	 * Although this is unlikely on any platform during initialization,
++	 * let's ensure we don't get accidentally into infinite loop
++	 */
++	if (consts.max_gpu_freq <= consts.min_gpu_freq)
++		return;
+ 	/*
+ 	 * For each potential GPU frequency, load a ring frequency we'd like
+ 	 * to use for memory access.  We do this by specifying the IA frequency
+diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
+index 3476a11f294ce..7c068cc64c2fa 100644
+--- a/drivers/gpu/drm/i915/gt/intel_rps.c
++++ b/drivers/gpu/drm/i915/gt/intel_rps.c
+@@ -2123,6 +2123,31 @@ u32 intel_rps_get_max_frequency(struct intel_rps *rps)
+ 		return intel_gpu_freq(rps, rps->max_freq_softlimit);
+ }
+ 
++/**
++ * intel_rps_get_max_raw_freq - returns the max frequency in some raw format.
++ * @rps: the intel_rps structure
++ *
++ * Returns the max frequency in a raw format. In newer platforms raw is in
++ * units of 50 MHz.
++ */
++u32 intel_rps_get_max_raw_freq(struct intel_rps *rps)
++{
++	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
++	u32 freq;
++
++	if (rps_uses_slpc(rps)) {
++		return DIV_ROUND_CLOSEST(slpc->rp0_freq,
++					 GT_FREQUENCY_MULTIPLIER);
++	} else {
++		freq = rps->max_freq;
++		if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
++			/* Convert GT frequency to 50 MHz units */
++			freq /= GEN9_FREQ_SCALER;
++		}
++		return freq;
++	}
++}
++
+ u32 intel_rps_get_rp0_frequency(struct intel_rps *rps)
+ {
+ 	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
+@@ -2211,6 +2236,31 @@ u32 intel_rps_get_min_frequency(struct intel_rps *rps)
+ 		return intel_gpu_freq(rps, rps->min_freq_softlimit);
+ }
+ 
++/**
++ * intel_rps_get_min_raw_freq - returns the min frequency in some raw format.
++ * @rps: the intel_rps structure
++ *
++ * Returns the min frequency in a raw format. In newer platforms raw is in
++ * units of 50 MHz.
++ */
++u32 intel_rps_get_min_raw_freq(struct intel_rps *rps)
++{
++	struct intel_guc_slpc *slpc = rps_to_slpc(rps);
++	u32 freq;
++
++	if (rps_uses_slpc(rps)) {
++		return DIV_ROUND_CLOSEST(slpc->min_freq,
++					 GT_FREQUENCY_MULTIPLIER);
++	} else {
++		freq = rps->min_freq;
++		if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
++			/* Convert GT frequency to 50 MHz units */
++			freq /= GEN9_FREQ_SCALER;
++		}
++		return freq;
++	}
++}
++
+ static int set_min_freq(struct intel_rps *rps, u32 val)
+ {
+ 	int ret = 0;
+diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
+index 1e8d564913083..4509dfdc52e09 100644
+--- a/drivers/gpu/drm/i915/gt/intel_rps.h
++++ b/drivers/gpu/drm/i915/gt/intel_rps.h
+@@ -37,8 +37,10 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
+ u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
+ u32 intel_rps_get_requested_frequency(struct intel_rps *rps);
+ u32 intel_rps_get_min_frequency(struct intel_rps *rps);
++u32 intel_rps_get_min_raw_freq(struct intel_rps *rps);
+ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val);
+ u32 intel_rps_get_max_frequency(struct intel_rps *rps);
++u32 intel_rps_get_max_raw_freq(struct intel_rps *rps);
+ int intel_rps_set_max_frequency(struct intel_rps *rps, u32 val);
+ u32 intel_rps_get_rp0_frequency(struct intel_rps *rps);
+ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps);
+diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
+index 429644d5ddc69..9fba16cb3f1e7 100644
+--- a/drivers/gpu/drm/radeon/radeon_device.c
++++ b/drivers/gpu/drm/radeon/radeon_device.c
+@@ -1604,6 +1604,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend,
+ 		if (r) {
+ 			/* delay GPU reset to resume */
+ 			radeon_fence_driver_force_completion(rdev, i);
++		} else {
++			/* finish executing delayed work */
++			flush_delayed_work(&rdev->fence_drv[i].lockup_work);
+ 		}
+ 	}
+ 
+diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c
+index 3633ab691662b..81e688975c6a7 100644
+--- a/drivers/hwmon/asus-ec-sensors.c
++++ b/drivers/hwmon/asus-ec-sensors.c
+@@ -54,6 +54,10 @@ static char *mutex_path_override;
+ /* ACPI mutex for locking access to the EC for the firmware */
+ #define ASUS_HW_ACCESS_MUTEX_ASMX	"\\AMW0.ASMX"
+ 
++#define ASUS_HW_ACCESS_MUTEX_RMTW_ASMX	"\\RMTW.ASMX"
++
++#define ASUS_HW_ACCESS_MUTEX_SB_PCI0_SBRG_SIO1_MUT0 "\\_SB_.PCI0.SBRG.SIO1.MUT0"
++
+ #define MAX_IDENTICAL_BOARD_VARIATIONS	3
+ 
+ /* Moniker for the ACPI global lock (':' is not allowed in ASL identifiers) */
+@@ -119,6 +123,18 @@ enum ec_sensors {
+ 	ec_sensor_temp_water_in,
+ 	/* "Water_Out" temperature sensor reading [℃] */
+ 	ec_sensor_temp_water_out,
++	/* "Water_Block_In" temperature sensor reading [℃] */
++	ec_sensor_temp_water_block_in,
++	/* "Water_Block_Out" temperature sensor reading [℃] */
++	ec_sensor_temp_water_block_out,
++	/* "T_sensor_2" temperature sensor reading [℃] */
++	ec_sensor_temp_t_sensor_2,
++	/* "Extra_1" temperature sensor reading [℃] */
++	ec_sensor_temp_sensor_extra_1,
++	/* "Extra_2" temperature sensor reading [℃] */
++	ec_sensor_temp_sensor_extra_2,
++	/* "Extra_3" temperature sensor reading [℃] */
++	ec_sensor_temp_sensor_extra_3,
+ };
+ 
+ #define SENSOR_TEMP_CHIPSET BIT(ec_sensor_temp_chipset)
+@@ -134,11 +150,19 @@ enum ec_sensors {
+ #define SENSOR_CURR_CPU BIT(ec_sensor_curr_cpu)
+ #define SENSOR_TEMP_WATER_IN BIT(ec_sensor_temp_water_in)
+ #define SENSOR_TEMP_WATER_OUT BIT(ec_sensor_temp_water_out)
++#define SENSOR_TEMP_WATER_BLOCK_IN BIT(ec_sensor_temp_water_block_in)
++#define SENSOR_TEMP_WATER_BLOCK_OUT BIT(ec_sensor_temp_water_block_out)
++#define SENSOR_TEMP_T_SENSOR_2 BIT(ec_sensor_temp_t_sensor_2)
++#define SENSOR_TEMP_SENSOR_EXTRA_1 BIT(ec_sensor_temp_sensor_extra_1)
++#define SENSOR_TEMP_SENSOR_EXTRA_2 BIT(ec_sensor_temp_sensor_extra_2)
++#define SENSOR_TEMP_SENSOR_EXTRA_3 BIT(ec_sensor_temp_sensor_extra_3)
+ 
+ enum board_family {
+ 	family_unknown,
+ 	family_amd_400_series,
+ 	family_amd_500_series,
++	family_intel_300_series,
++	family_intel_600_series
+ };
+ 
+ /* All the known sensors for ASUS EC controllers */
+@@ -195,15 +219,54 @@ static const struct ec_sensor_info sensors_family_amd_500[] = {
+ 		EC_SENSOR("Water_In", hwmon_temp, 1, 0x01, 0x00),
+ 	[ec_sensor_temp_water_out] =
+ 		EC_SENSOR("Water_Out", hwmon_temp, 1, 0x01, 0x01),
++	[ec_sensor_temp_water_block_in] =
++		EC_SENSOR("Water_Block_In", hwmon_temp, 1, 0x01, 0x02),
++	[ec_sensor_temp_water_block_out] =
++		EC_SENSOR("Water_Block_Out", hwmon_temp, 1, 0x01, 0x03),
++	[ec_sensor_temp_sensor_extra_1] =
++		EC_SENSOR("Extra_1", hwmon_temp, 1, 0x01, 0x09),
++	[ec_sensor_temp_t_sensor_2] =
++		EC_SENSOR("T_sensor_2", hwmon_temp, 1, 0x01, 0x0a),
++	[ec_sensor_temp_sensor_extra_2] =
++		EC_SENSOR("Extra_2", hwmon_temp, 1, 0x01, 0x0b),
++	[ec_sensor_temp_sensor_extra_3] =
++		EC_SENSOR("Extra_3", hwmon_temp, 1, 0x01, 0x0c),
++};
++
++static const struct ec_sensor_info sensors_family_intel_300[] = {
++	[ec_sensor_temp_chipset] =
++		EC_SENSOR("Chipset", hwmon_temp, 1, 0x00, 0x3a),
++	[ec_sensor_temp_cpu] = EC_SENSOR("CPU", hwmon_temp, 1, 0x00, 0x3b),
++	[ec_sensor_temp_mb] =
++		EC_SENSOR("Motherboard", hwmon_temp, 1, 0x00, 0x3c),
++	[ec_sensor_temp_t_sensor] =
++		EC_SENSOR("T_Sensor", hwmon_temp, 1, 0x00, 0x3d),
++	[ec_sensor_temp_vrm] = EC_SENSOR("VRM", hwmon_temp, 1, 0x00, 0x3e),
++	[ec_sensor_fan_cpu_opt] =
++		EC_SENSOR("CPU_Opt", hwmon_fan, 2, 0x00, 0xb0),
++	[ec_sensor_fan_vrm_hs] = EC_SENSOR("VRM HS", hwmon_fan, 2, 0x00, 0xb2),
++	[ec_sensor_fan_water_flow] =
++		EC_SENSOR("Water_Flow", hwmon_fan, 2, 0x00, 0xbc),
++	[ec_sensor_temp_water_in] =
++		EC_SENSOR("Water_In", hwmon_temp, 1, 0x01, 0x00),
++	[ec_sensor_temp_water_out] =
++		EC_SENSOR("Water_Out", hwmon_temp, 1, 0x01, 0x01),
++};
++
++static const struct ec_sensor_info sensors_family_intel_600[] = {
++	[ec_sensor_temp_t_sensor] =
++		EC_SENSOR("T_Sensor", hwmon_temp, 1, 0x00, 0x3d),
++	[ec_sensor_temp_vrm] = EC_SENSOR("VRM", hwmon_temp, 1, 0x00, 0x3e),
+ };
+ 
+ /* Shortcuts for common combinations */
+ #define SENSOR_SET_TEMP_CHIPSET_CPU_MB                                         \
+ 	(SENSOR_TEMP_CHIPSET | SENSOR_TEMP_CPU | SENSOR_TEMP_MB)
+ #define SENSOR_SET_TEMP_WATER (SENSOR_TEMP_WATER_IN | SENSOR_TEMP_WATER_OUT)
++#define SENSOR_SET_WATER_BLOCK                                                 \
++	(SENSOR_TEMP_WATER_BLOCK_IN | SENSOR_TEMP_WATER_BLOCK_OUT)
+ 
+ struct ec_board_info {
+-	const char *board_names[MAX_IDENTICAL_BOARD_VARIATIONS];
+ 	unsigned long sensors;
+ 	/*
+ 	 * Defines which mutex to use for guarding access to the state and the
+@@ -216,121 +279,194 @@ struct ec_board_info {
+ 	enum board_family family;
+ };
+ 
+-static const struct ec_board_info board_info[] = {
+-	{
+-		.board_names = {"PRIME X470-PRO"},
+-		.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+-			SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
+-			SENSOR_FAN_CPU_OPT |
+-			SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE,
+-		.mutex_path = ACPI_GLOBAL_LOCK_PSEUDO_PATH,
+-		.family = family_amd_400_series,
+-	},
+-	{
+-		.board_names = {"PRIME X570-PRO"},
+-		.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
+-			SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET,
+-		.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+-		.family = family_amd_500_series,
+-	},
+-	{
+-		.board_names = {"ProArt X570-CREATOR WIFI"},
+-		.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
+-			SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CPU_OPT |
+-			SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE,
+-	},
+-	{
+-		.board_names = {"Pro WS X570-ACE"},
+-		.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
+-			SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET |
+-			SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE,
+-		.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+-		.family = family_amd_500_series,
+-	},
+-	{
+-		.board_names = {"ROG CROSSHAIR VIII DARK HERO"},
+-		.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+-			SENSOR_TEMP_T_SENSOR |
+-			SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
+-			SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW |
+-			SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE,
+-		.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+-		.family = family_amd_500_series,
+-	},
+-	{
+-		.board_names = {
+-			"ROG CROSSHAIR VIII FORMULA",
+-			"ROG CROSSHAIR VIII HERO",
+-			"ROG CROSSHAIR VIII HERO (WI-FI)",
+-		},
+-		.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+-			SENSOR_TEMP_T_SENSOR |
+-			SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
+-			SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
+-			SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU |
+-			SENSOR_IN_CPU_CORE,
+-		.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+-		.family = family_amd_500_series,
+-	},
+-	{
+-		.board_names = {"ROG CROSSHAIR VIII IMPACT"},
+-		.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+-			SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
+-			SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU |
+-			SENSOR_IN_CPU_CORE,
+-		.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+-		.family = family_amd_500_series,
+-	},
+-	{
+-		.board_names = {"ROG STRIX B550-E GAMING"},
+-		.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+-			SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
+-			SENSOR_FAN_CPU_OPT,
+-		.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+-		.family = family_amd_500_series,
+-	},
+-	{
+-		.board_names = {"ROG STRIX B550-I GAMING"},
+-		.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+-			SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
+-			SENSOR_FAN_VRM_HS | SENSOR_CURR_CPU |
+-			SENSOR_IN_CPU_CORE,
+-		.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+-		.family = family_amd_500_series,
+-	},
+-	{
+-		.board_names = {"ROG STRIX X570-E GAMING"},
+-		.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+-			SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
+-			SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU |
+-			SENSOR_IN_CPU_CORE,
+-		.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+-		.family = family_amd_500_series,
+-	},
+-	{
+-		.board_names = {"ROG STRIX X570-E GAMING WIFI II"},
+-		.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+-			SENSOR_TEMP_T_SENSOR | SENSOR_CURR_CPU |
+-			SENSOR_IN_CPU_CORE,
+-		.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+-		.family = family_amd_500_series,
+-	},
+-	{
+-		.board_names = {"ROG STRIX X570-F GAMING"},
+-		.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
+-			SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET,
+-		.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+-		.family = family_amd_500_series,
+-	},
+-	{
+-		.board_names = {"ROG STRIX X570-I GAMING"},
+-		.sensors = SENSOR_TEMP_T_SENSOR | SENSOR_FAN_VRM_HS |
+-			SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU |
+-			SENSOR_IN_CPU_CORE,
+-		.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
+-		.family = family_amd_500_series,
+-	},
+-	{}
++static const struct ec_board_info board_info_prime_x470_pro = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
++		SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
++		SENSOR_FAN_CPU_OPT |
++		SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE,
++	.mutex_path = ACPI_GLOBAL_LOCK_PSEUDO_PATH,
++	.family = family_amd_400_series,
++};
++
++static const struct ec_board_info board_info_prime_x570_pro = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
++		SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
++	.family = family_amd_500_series,
++};
++
++static const struct ec_board_info board_info_pro_art_x570_creator_wifi = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
++		SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CPU_OPT |
++		SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE,
++	.family = family_amd_500_series,
++};
++
++static const struct ec_board_info board_info_pro_ws_x570_ace = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_VRM |
++		SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET |
++		SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
++	.family = family_amd_500_series,
++};
++
++static const struct ec_board_info board_info_crosshair_viii_dark_hero = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
++		SENSOR_TEMP_T_SENSOR |
++		SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
++		SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW |
++		SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
++	.family = family_amd_500_series,
++};
++
++static const struct ec_board_info board_info_crosshair_viii_hero = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
++		SENSOR_TEMP_T_SENSOR |
++		SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
++		SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET |
++		SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU |
++		SENSOR_IN_CPU_CORE,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
++	.family = family_amd_500_series,
++};
++
++static const struct ec_board_info board_info_maximus_xi_hero = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
++		SENSOR_TEMP_T_SENSOR |
++		SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
++		SENSOR_FAN_CPU_OPT | SENSOR_FAN_WATER_FLOW,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
++	.family = family_intel_300_series,
++};
++
++static const struct ec_board_info board_info_crosshair_viii_impact = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
++		SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
++		SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU |
++		SENSOR_IN_CPU_CORE,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
++	.family = family_amd_500_series,
++};
++
++static const struct ec_board_info board_info_strix_b550_e_gaming = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
++		SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
++		SENSOR_FAN_CPU_OPT,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
++	.family = family_amd_500_series,
++};
++
++static const struct ec_board_info board_info_strix_b550_i_gaming = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
++		SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
++		SENSOR_FAN_VRM_HS | SENSOR_CURR_CPU |
++		SENSOR_IN_CPU_CORE,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
++	.family = family_amd_500_series,
++};
++
++static const struct ec_board_info board_info_strix_x570_e_gaming = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
++		SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM |
++		SENSOR_FAN_CHIPSET | SENSOR_CURR_CPU |
++		SENSOR_IN_CPU_CORE,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
++	.family = family_amd_500_series,
++};
++
++static const struct ec_board_info board_info_strix_x570_e_gaming_wifi_ii = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
++		SENSOR_TEMP_T_SENSOR | SENSOR_CURR_CPU |
++		SENSOR_IN_CPU_CORE,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
++	.family = family_amd_500_series,
++};
++
++static const struct ec_board_info board_info_strix_x570_f_gaming = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB |
++		SENSOR_TEMP_T_SENSOR | SENSOR_FAN_CHIPSET,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
++	.family = family_amd_500_series,
++};
++
++static const struct ec_board_info board_info_strix_x570_i_gaming = {
++	.sensors = SENSOR_TEMP_CHIPSET | SENSOR_TEMP_VRM |
++		SENSOR_TEMP_T_SENSOR |
++		SENSOR_FAN_VRM_HS | SENSOR_FAN_CHIPSET |
++		SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_ASMX,
++	.family = family_amd_500_series,
++};
++
++static const struct ec_board_info board_info_strix_z690_a_gaming_wifi_d4 = {
++	.sensors = SENSOR_TEMP_T_SENSOR | SENSOR_TEMP_VRM,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_RMTW_ASMX,
++	.family = family_intel_600_series,
++};
++
++static const struct ec_board_info board_info_zenith_ii_extreme = {
++	.sensors = SENSOR_SET_TEMP_CHIPSET_CPU_MB | SENSOR_TEMP_T_SENSOR |
++		SENSOR_TEMP_VRM | SENSOR_SET_TEMP_WATER |
++		SENSOR_FAN_CPU_OPT | SENSOR_FAN_CHIPSET | SENSOR_FAN_VRM_HS |
++		SENSOR_FAN_WATER_FLOW | SENSOR_CURR_CPU | SENSOR_IN_CPU_CORE |
++		SENSOR_SET_WATER_BLOCK |
++		SENSOR_TEMP_T_SENSOR_2 | SENSOR_TEMP_SENSOR_EXTRA_1 |
++		SENSOR_TEMP_SENSOR_EXTRA_2 | SENSOR_TEMP_SENSOR_EXTRA_3,
++	.mutex_path = ASUS_HW_ACCESS_MUTEX_SB_PCI0_SBRG_SIO1_MUT0,
++	.family = family_amd_500_series,
++};
++
++#define DMI_EXACT_MATCH_ASUS_BOARD_NAME(name, board_info)                      \
++	{                                                                      \
++		.matches = {                                                   \
++			DMI_EXACT_MATCH(DMI_BOARD_VENDOR,                      \
++					"ASUSTeK COMPUTER INC."),              \
++			DMI_EXACT_MATCH(DMI_BOARD_NAME, name),                 \
++		},                                                             \
++		.driver_data = (void *)board_info,                              \
++	}
++
++static const struct dmi_system_id dmi_table[] = {
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X470-PRO",
++					&board_info_prime_x470_pro),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("PRIME X570-PRO",
++					&board_info_prime_x570_pro),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ProArt X570-CREATOR WIFI",
++					&board_info_pro_art_x570_creator_wifi),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("Pro WS X570-ACE",
++					&board_info_pro_ws_x570_ace),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VIII DARK HERO",
++					&board_info_crosshair_viii_dark_hero),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VIII FORMULA",
++					&board_info_crosshair_viii_hero),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VIII HERO",
++					&board_info_crosshair_viii_hero),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VIII HERO (WI-FI)",
++					&board_info_crosshair_viii_hero),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG MAXIMUS XI HERO",
++					&board_info_maximus_xi_hero),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG MAXIMUS XI HERO (WI-FI)",
++					&board_info_maximus_xi_hero),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG CROSSHAIR VIII IMPACT",
++					&board_info_crosshair_viii_impact),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B550-E GAMING",
++					&board_info_strix_b550_e_gaming),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX B550-I GAMING",
++					&board_info_strix_b550_i_gaming),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X570-E GAMING",
++					&board_info_strix_x570_e_gaming),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X570-E GAMING WIFI II",
++					&board_info_strix_x570_e_gaming_wifi_ii),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X570-F GAMING",
++					&board_info_strix_x570_f_gaming),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX X570-I GAMING",
++					&board_info_strix_x570_i_gaming),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG STRIX Z690-A GAMING WIFI D4",
++					&board_info_strix_z690_a_gaming_wifi_d4),
++	DMI_EXACT_MATCH_ASUS_BOARD_NAME("ROG ZENITH II EXTREME",
++					&board_info_zenith_ii_extreme),
++	{},
+ };
+ 
+ struct ec_sensor {
+@@ -441,12 +577,12 @@ static int find_ec_sensor_index(const struct ec_sensors_data *ec,
+ 	return -ENOENT;
+ }
+ 
+-static int __init bank_compare(const void *a, const void *b)
++static int bank_compare(const void *a, const void *b)
+ {
+ 	return *((const s8 *)a) - *((const s8 *)b);
+ }
+ 
+-static void __init setup_sensor_data(struct ec_sensors_data *ec)
++static void setup_sensor_data(struct ec_sensors_data *ec)
+ {
+ 	struct ec_sensor *s = ec->sensors;
+ 	bool bank_found;
+@@ -478,7 +614,7 @@ static void __init setup_sensor_data(struct ec_sensors_data *ec)
+ 	sort(ec->banks, ec->nr_banks, 1, bank_compare, NULL);
+ }
+ 
+-static void __init fill_ec_registers(struct ec_sensors_data *ec)
++static void fill_ec_registers(struct ec_sensors_data *ec)
+ {
+ 	const struct ec_sensor_info *si;
+ 	unsigned int i, j, register_idx = 0;
+@@ -493,7 +629,7 @@ static void __init fill_ec_registers(struct ec_sensors_data *ec)
+ 	}
+ }
+ 
+-static int __init setup_lock_data(struct device *dev)
++static int setup_lock_data(struct device *dev)
+ {
+ 	const char *mutex_path;
+ 	int status;
+@@ -716,7 +852,7 @@ static umode_t asus_ec_hwmon_is_visible(const void *drvdata,
+ 	return find_ec_sensor_index(state, type, channel) >= 0 ? S_IRUGO : 0;
+ }
+ 
+-static int __init
++static int
+ asus_ec_hwmon_add_chan_info(struct hwmon_channel_info *asus_ec_hwmon_chan,
+ 			     struct device *dev, int num,
+ 			     enum hwmon_sensor_types type, u32 config)
+@@ -745,27 +881,15 @@ static struct hwmon_chip_info asus_ec_chip_info = {
+ 	.ops = &asus_ec_hwmon_ops,
+ };
+ 
+-static const struct ec_board_info * __init get_board_info(void)
++static const struct ec_board_info *get_board_info(void)
+ {
+-	const char *dmi_board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
+-	const char *dmi_board_name = dmi_get_system_info(DMI_BOARD_NAME);
+-	const struct ec_board_info *board;
+-
+-	if (!dmi_board_vendor || !dmi_board_name ||
+-	    strcasecmp(dmi_board_vendor, "ASUSTeK COMPUTER INC."))
+-		return NULL;
+-
+-	for (board = board_info; board->sensors; board++) {
+-		if (match_string(board->board_names,
+-				 MAX_IDENTICAL_BOARD_VARIATIONS,
+-				 dmi_board_name) >= 0)
+-			return board;
+-	}
++	const struct dmi_system_id *dmi_entry;
+ 
+-	return NULL;
++	dmi_entry = dmi_first_match(dmi_table);
++	return dmi_entry ? dmi_entry->driver_data : NULL;
+ }
+ 
+-static int __init asus_ec_probe(struct platform_device *pdev)
++static int asus_ec_probe(struct platform_device *pdev)
+ {
+ 	const struct hwmon_channel_info **ptr_asus_ec_ci;
+ 	int nr_count[hwmon_max] = { 0 }, nr_types = 0;
+@@ -799,6 +923,12 @@ static int __init asus_ec_probe(struct platform_device *pdev)
+ 	case family_amd_500_series:
+ 		ec_data->sensors_info = sensors_family_amd_500;
+ 		break;
++	case family_intel_300_series:
++		ec_data->sensors_info = sensors_family_intel_300;
++		break;
++	case family_intel_600_series:
++		ec_data->sensors_info = sensors_family_intel_600;
++		break;
+ 	default:
+ 		dev_err(dev, "Unknown board family: %d",
+ 			ec_data->board_info->family);
+@@ -868,29 +998,37 @@ static int __init asus_ec_probe(struct platform_device *pdev)
+ 	return PTR_ERR_OR_ZERO(hwdev);
+ }
+ 
+-
+-static const struct acpi_device_id acpi_ec_ids[] = {
+-	/* Embedded Controller Device */
+-	{ "PNP0C09", 0 },
+-	{}
+-};
++MODULE_DEVICE_TABLE(dmi, dmi_table);
+ 
+ static struct platform_driver asus_ec_sensors_platform_driver = {
+ 	.driver = {
+ 		.name	= "asus-ec-sensors",
+-		.acpi_match_table = acpi_ec_ids,
+ 	},
++	.probe = asus_ec_probe,
+ };
+ 
+-MODULE_DEVICE_TABLE(acpi, acpi_ec_ids);
+-/*
+- * we use module_platform_driver_probe() rather than module_platform_driver()
+- * because the probe function (and its dependants) are marked with __init, which
+- * means we can't put it into the .probe member of the platform_driver struct
+- * above, and we can't mark the asus_ec_sensors_platform_driver object as __init
+- * because the object is referenced from the module exit code.
+- */
+-module_platform_driver_probe(asus_ec_sensors_platform_driver, asus_ec_probe);
++static struct platform_device *asus_ec_sensors_platform_device;
++
++static int __init asus_ec_init(void)
++{
++	asus_ec_sensors_platform_device =
++		platform_create_bundle(&asus_ec_sensors_platform_driver,
++				       asus_ec_probe, NULL, 0, NULL, 0);
++
++	if (IS_ERR(asus_ec_sensors_platform_device))
++		return PTR_ERR(asus_ec_sensors_platform_device);
++
++	return 0;
++}
++
++static void __exit asus_ec_exit(void)
++{
++	platform_device_unregister(asus_ec_sensors_platform_device);
++	platform_driver_unregister(&asus_ec_sensors_platform_driver);
++}
++
++module_init(asus_ec_init);
++module_exit(asus_ec_exit);
+ 
+ module_param_named(mutex_path, mutex_path_override, charp, 0);
+ MODULE_PARM_DESC(mutex_path,
+diff --git a/drivers/hwmon/mr75203.c b/drivers/hwmon/mr75203.c
+index 26278b0f17a98..9259779cc2dff 100644
+--- a/drivers/hwmon/mr75203.c
++++ b/drivers/hwmon/mr75203.c
+@@ -68,8 +68,9 @@
+ 
+ /* VM Individual Macro Register */
+ #define VM_COM_REG_SIZE	0x200
+-#define VM_SDIF_DONE(n)	(VM_COM_REG_SIZE + 0x34 + 0x200 * (n))
+-#define VM_SDIF_DATA(n)	(VM_COM_REG_SIZE + 0x40 + 0x200 * (n))
++#define VM_SDIF_DONE(vm)	(VM_COM_REG_SIZE + 0x34 + 0x200 * (vm))
++#define VM_SDIF_DATA(vm, ch)	\
++	(VM_COM_REG_SIZE + 0x40 + 0x200 * (vm) + 0x4 * (ch))
+ 
+ /* SDA Slave Register */
+ #define IP_CTRL			0x00
+@@ -115,6 +116,7 @@ struct pvt_device {
+ 	u32			t_num;
+ 	u32			p_num;
+ 	u32			v_num;
++	u32			c_num;
+ 	u32			ip_freq;
+ 	u8			*vm_idx;
+ };
+@@ -178,14 +180,15 @@ static int pvt_read_in(struct device *dev, u32 attr, int channel, long *val)
+ {
+ 	struct pvt_device *pvt = dev_get_drvdata(dev);
+ 	struct regmap *v_map = pvt->v_map;
++	u8 vm_idx, ch_idx;
+ 	u32 n, stat;
+-	u8 vm_idx;
+ 	int ret;
+ 
+-	if (channel >= pvt->v_num)
++	if (channel >= pvt->v_num * pvt->c_num)
+ 		return -EINVAL;
+ 
+-	vm_idx = pvt->vm_idx[channel];
++	vm_idx = pvt->vm_idx[channel / pvt->c_num];
++	ch_idx = channel % pvt->c_num;
+ 
+ 	switch (attr) {
+ 	case hwmon_in_input:
+@@ -196,13 +199,23 @@ static int pvt_read_in(struct device *dev, u32 attr, int channel, long *val)
+ 		if (ret)
+ 			return ret;
+ 
+-		ret = regmap_read(v_map, VM_SDIF_DATA(vm_idx), &n);
++		ret = regmap_read(v_map, VM_SDIF_DATA(vm_idx, ch_idx), &n);
+ 		if(ret < 0)
+ 			return ret;
+ 
+ 		n &= SAMPLE_DATA_MSK;
+-		/* Convert the N bitstream count into voltage */
+-		*val = (PVT_N_CONST * n - PVT_R_CONST) >> PVT_CONV_BITS;
++		/*
++		 * Convert the N bitstream count into voltage.
++		 * To support negative voltage calculation for 64bit machines
++		 * n must be cast to long, since n and *val differ both in
++		 * signedness and in size.
++		 * Division is used instead of right shift, because for signed
++		 * numbers, the sign bit is used to fill the vacated bit
++		 * positions, and if the number is negative, 1 is used.
++		 * BIT(x) may not be used instead of (1 << x) because it's
++		 * unsigned.
++		 */
++		*val = (PVT_N_CONST * (long)n - PVT_R_CONST) / (1 << PVT_CONV_BITS);
+ 
+ 		return 0;
+ 	default:
+@@ -375,6 +388,19 @@ static int pvt_init(struct pvt_device *pvt)
+ 		if (ret)
+ 			return ret;
+ 
++		val = (BIT(pvt->c_num) - 1) | VM_CH_INIT |
++		      IP_POLL << SDIF_ADDR_SFT | SDIF_WRN_W | SDIF_PROG;
++		ret = regmap_write(v_map, SDIF_W, val);
++		if (ret < 0)
++			return ret;
++
++		ret = regmap_read_poll_timeout(v_map, SDIF_STAT,
++					       val, !(val & SDIF_BUSY),
++					       PVT_POLL_DELAY_US,
++					       PVT_POLL_TIMEOUT_US);
++		if (ret)
++			return ret;
++
+ 		val = CFG1_VOL_MEAS_MODE | CFG1_PARALLEL_OUT |
+ 		      CFG1_14_BIT | IP_CFG << SDIF_ADDR_SFT |
+ 		      SDIF_WRN_W | SDIF_PROG;
+@@ -489,8 +515,8 @@ static int pvt_reset_control_deassert(struct device *dev, struct pvt_device *pvt
+ 
+ static int mr75203_probe(struct platform_device *pdev)
+ {
++	u32 ts_num, vm_num, pd_num, ch_num, val, index, i;
+ 	const struct hwmon_channel_info **pvt_info;
+-	u32 ts_num, vm_num, pd_num, val, index, i;
+ 	struct device *dev = &pdev->dev;
+ 	u32 *temp_config, *in_config;
+ 	struct device *hwmon_dev;
+@@ -531,9 +557,11 @@ static int mr75203_probe(struct platform_device *pdev)
+ 	ts_num = (val & TS_NUM_MSK) >> TS_NUM_SFT;
+ 	pd_num = (val & PD_NUM_MSK) >> PD_NUM_SFT;
+ 	vm_num = (val & VM_NUM_MSK) >> VM_NUM_SFT;
++	ch_num = (val & CH_NUM_MSK) >> CH_NUM_SFT;
+ 	pvt->t_num = ts_num;
+ 	pvt->p_num = pd_num;
+ 	pvt->v_num = vm_num;
++	pvt->c_num = ch_num;
+ 	val = 0;
+ 	if (ts_num)
+ 		val++;
+@@ -570,7 +598,7 @@ static int mr75203_probe(struct platform_device *pdev)
+ 	}
+ 
+ 	if (vm_num) {
+-		u32 num = vm_num;
++		u32 total_ch;
+ 
+ 		ret = pvt_get_regmap(pdev, "vm", pvt);
+ 		if (ret)
+@@ -584,30 +612,30 @@ static int mr75203_probe(struct platform_device *pdev)
+ 		ret = device_property_read_u8_array(dev, "intel,vm-map",
+ 						    pvt->vm_idx, vm_num);
+ 		if (ret) {
+-			num = 0;
++			/*
++			 * Incase intel,vm-map property is not defined, we
++			 * assume incremental channel numbers.
++			 */
++			for (i = 0; i < vm_num; i++)
++				pvt->vm_idx[i] = i;
+ 		} else {
+ 			for (i = 0; i < vm_num; i++)
+ 				if (pvt->vm_idx[i] >= vm_num ||
+ 				    pvt->vm_idx[i] == 0xff) {
+-					num = i;
++					pvt->v_num = i;
++					vm_num = i;
+ 					break;
+ 				}
+ 		}
+ 
+-		/*
+-		 * Incase intel,vm-map property is not defined, we assume
+-		 * incremental channel numbers.
+-		 */
+-		for (i = num; i < vm_num; i++)
+-			pvt->vm_idx[i] = i;
+-
+-		in_config = devm_kcalloc(dev, num + 1,
++		total_ch = ch_num * vm_num;
++		in_config = devm_kcalloc(dev, total_ch + 1,
+ 					 sizeof(*in_config), GFP_KERNEL);
+ 		if (!in_config)
+ 			return -ENOMEM;
+ 
+-		memset32(in_config, HWMON_I_INPUT, num);
+-		in_config[num] = 0;
++		memset32(in_config, HWMON_I_INPUT, total_ch);
++		in_config[total_ch] = 0;
+ 		pvt_in.config = in_config;
+ 
+ 		pvt_info[index++] = &pvt_in;
+diff --git a/drivers/hwmon/tps23861.c b/drivers/hwmon/tps23861.c
+index 8bd6435c13e82..2148fd543bb4b 100644
+--- a/drivers/hwmon/tps23861.c
++++ b/drivers/hwmon/tps23861.c
+@@ -489,18 +489,20 @@ static char *tps23861_port_poe_plus_status(struct tps23861_data *data, int port)
+ 
+ static int tps23861_port_resistance(struct tps23861_data *data, int port)
+ {
+-	u16 regval;
++	unsigned int raw_val;
++	__le16 regval;
+ 
+ 	regmap_bulk_read(data->regmap,
+ 			 PORT_1_RESISTANCE_LSB + PORT_N_RESISTANCE_LSB_OFFSET * (port - 1),
+ 			 &regval,
+ 			 2);
+ 
+-	switch (FIELD_GET(PORT_RESISTANCE_RSN_MASK, regval)) {
++	raw_val = le16_to_cpu(regval);
++	switch (FIELD_GET(PORT_RESISTANCE_RSN_MASK, raw_val)) {
+ 	case PORT_RESISTANCE_RSN_OTHER:
+-		return (FIELD_GET(PORT_RESISTANCE_MASK, regval) * RESISTANCE_LSB) / 10000;
++		return (FIELD_GET(PORT_RESISTANCE_MASK, raw_val) * RESISTANCE_LSB) / 10000;
+ 	case PORT_RESISTANCE_RSN_LOW:
+-		return (FIELD_GET(PORT_RESISTANCE_MASK, regval) * RESISTANCE_LSB_LOW) / 10000;
++		return (FIELD_GET(PORT_RESISTANCE_MASK, raw_val) * RESISTANCE_LSB_LOW) / 10000;
+ 	case PORT_RESISTANCE_RSN_SHORT:
+ 	case PORT_RESISTANCE_RSN_OPEN:
+ 	default:
+diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
+index fabca5e51e3d4..4dd133eccfdfb 100644
+--- a/drivers/infiniband/core/cma.c
++++ b/drivers/infiniband/core/cma.c
+@@ -1719,8 +1719,8 @@ cma_ib_id_from_event(struct ib_cm_id *cm_id,
+ 		}
+ 
+ 		if (!validate_net_dev(*net_dev,
+-				 (struct sockaddr *)&req->listen_addr_storage,
+-				 (struct sockaddr *)&req->src_addr_storage)) {
++				 (struct sockaddr *)&req->src_addr_storage,
++				 (struct sockaddr *)&req->listen_addr_storage)) {
+ 			id_priv = ERR_PTR(-EHOSTUNREACH);
+ 			goto err;
+ 		}
+diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
+index 186ed8859920c..d39e16c211e8a 100644
+--- a/drivers/infiniband/core/umem_odp.c
++++ b/drivers/infiniband/core/umem_odp.c
+@@ -462,7 +462,7 @@ retry:
+ 		mutex_unlock(&umem_odp->umem_mutex);
+ 
+ out_put_mm:
+-	mmput(owning_mm);
++	mmput_async(owning_mm);
+ out_put_task:
+ 	if (owning_process)
+ 		put_task_struct(owning_process);
+diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
+index 2855e9ad4b328..1df076e70e293 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_device.h
++++ b/drivers/infiniband/hw/hns/hns_roce_device.h
+@@ -730,7 +730,6 @@ struct hns_roce_caps {
+ 	u32		num_qps;
+ 	u32		num_pi_qps;
+ 	u32		reserved_qps;
+-	int		num_qpc_timer;
+ 	u32		num_srqs;
+ 	u32		max_wqes;
+ 	u32		max_srq_wrs;
+diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+index b354caeaa9b29..49edff989f1f1 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+@@ -1941,7 +1941,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
+ 
+ 	caps->num_mtpts		= HNS_ROCE_V2_MAX_MTPT_NUM;
+ 	caps->num_pds		= HNS_ROCE_V2_MAX_PD_NUM;
+-	caps->num_qpc_timer	= HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
++	caps->qpc_timer_bt_num	= HNS_ROCE_V2_MAX_QPC_TIMER_BT_NUM;
+ 	caps->cqc_timer_bt_num	= HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM;
+ 
+ 	caps->max_qp_init_rdma	= HNS_ROCE_V2_MAX_QP_INIT_RDMA;
+@@ -2237,7 +2237,6 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
+ 	caps->max_rq_sg		     = le16_to_cpu(resp_a->max_rq_sg);
+ 	caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
+ 	caps->max_extend_sg	     = le32_to_cpu(resp_a->max_extend_sg);
+-	caps->num_qpc_timer	     = le16_to_cpu(resp_a->num_qpc_timer);
+ 	caps->max_srq_sges	     = le16_to_cpu(resp_a->max_srq_sges);
+ 	caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
+ 	caps->num_aeq_vectors	     = resp_a->num_aeq_vectors;
+diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+index 7ffb7824d2689..e4b640caee1b7 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+@@ -36,11 +36,11 @@
+ #include <linux/bitops.h>
+ 
+ #define HNS_ROCE_V2_MAX_QP_NUM			0x1000
+-#define HNS_ROCE_V2_MAX_QPC_TIMER_NUM		0x200
+ #define HNS_ROCE_V2_MAX_WQE_NUM			0x8000
+ #define HNS_ROCE_V2_MAX_SRQ_WR			0x8000
+ #define HNS_ROCE_V2_MAX_SRQ_SGE			64
+ #define HNS_ROCE_V2_MAX_CQ_NUM			0x100000
++#define HNS_ROCE_V2_MAX_QPC_TIMER_BT_NUM	0x100
+ #define HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM	0x100
+ #define HNS_ROCE_V2_MAX_SRQ_NUM			0x100000
+ #define HNS_ROCE_V2_MAX_CQE_NUM			0x400000
+@@ -83,7 +83,7 @@
+ 
+ #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ		PAGE_SIZE
+ #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ		PAGE_SIZE
+-#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED		0xFFFFF000
++#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED		0xFFFF000
+ #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM		2
+ #define HNS_ROCE_INVALID_LKEY			0x0
+ #define HNS_ROCE_INVALID_SGE_LENGTH		0x80000000
+diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
+index c8af4ebd7cbd3..4ccb217b2841d 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_main.c
++++ b/drivers/infiniband/hw/hns/hns_roce_main.c
+@@ -725,7 +725,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
+ 		ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qpc_timer_table,
+ 					      HEM_TYPE_QPC_TIMER,
+ 					      hr_dev->caps.qpc_timer_entry_sz,
+-					      hr_dev->caps.num_qpc_timer, 1);
++					      hr_dev->caps.qpc_timer_bt_num, 1);
+ 		if (ret) {
+ 			dev_err(dev,
+ 				"Failed to init QPC timer memory, aborting.\n");
+diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
+index 48d3616a6d71d..7bee7f6c5e702 100644
+--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
++++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
+@@ -462,11 +462,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
+ 	hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
+ 					      hr_qp->rq.rsv_sge);
+ 
+-	if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE)
+-		hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
+-	else
+-		hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
+-					    hr_qp->rq.max_gs);
++	hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz *
++				    hr_qp->rq.max_gs);
+ 
+ 	hr_qp->rq.wqe_cnt = cnt;
+ 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE &&
+diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
+index daeab5daed5bc..d003ad864ee44 100644
+--- a/drivers/infiniband/hw/irdma/uk.c
++++ b/drivers/infiniband/hw/irdma/uk.c
+@@ -1005,6 +1005,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
+ 	int ret_code;
+ 	bool move_cq_head = true;
+ 	u8 polarity;
++	u8 op_type;
+ 	bool ext_valid;
+ 	__le64 *ext_cqe;
+ 
+@@ -1187,7 +1188,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
+ 			do {
+ 				__le64 *sw_wqe;
+ 				u64 wqe_qword;
+-				u8 op_type;
+ 				u32 tail;
+ 
+ 				tail = qp->sq_ring.tail;
+@@ -1204,6 +1204,8 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
+ 					break;
+ 				}
+ 			} while (1);
++			if (op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR)
++				info->minor_err = FLUSH_MW_BIND_ERR;
+ 			qp->sq_flush_seen = true;
+ 			if (!IRDMA_RING_MORE_WORK(qp->sq_ring))
+ 				qp->sq_flush_complete = true;
+diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
+index ab3c5208a1231..f4d774451160d 100644
+--- a/drivers/infiniband/hw/irdma/utils.c
++++ b/drivers/infiniband/hw/irdma/utils.c
+@@ -590,11 +590,14 @@ static int irdma_wait_event(struct irdma_pci_f *rf,
+ 	cqp_error = cqp_request->compl_info.error;
+ 	if (cqp_error) {
+ 		err_code = -EIO;
+-		if (cqp_request->compl_info.maj_err_code == 0xFFFF &&
+-		    cqp_request->compl_info.min_err_code == 0x8029) {
+-			if (!rf->reset) {
+-				rf->reset = true;
+-				rf->gen_ops.request_reset(rf);
++		if (cqp_request->compl_info.maj_err_code == 0xFFFF) {
++			if (cqp_request->compl_info.min_err_code == 0x8002)
++				err_code = -EBUSY;
++			else if (cqp_request->compl_info.min_err_code == 0x8029) {
++				if (!rf->reset) {
++					rf->reset = true;
++					rf->gen_ops.request_reset(rf);
++				}
+ 			}
+ 		}
+ 	}
+@@ -2597,7 +2600,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
+ 		spin_unlock_irqrestore(&iwqp->lock, flags2);
+ 		spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
+ 		if (compl_generated)
+-			irdma_comp_handler(iwqp->iwrcq);
++			irdma_comp_handler(iwqp->iwscq);
+ 	} else {
+ 		spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
+ 		mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
+diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
+index 227a799385d1d..ab73d1715f991 100644
+--- a/drivers/infiniband/hw/irdma/verbs.c
++++ b/drivers/infiniband/hw/irdma/verbs.c
+@@ -39,15 +39,18 @@ static int irdma_query_device(struct ib_device *ibdev,
+ 	props->max_send_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
+ 	props->max_recv_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
+ 	props->max_cq = rf->max_cq - rf->used_cqs;
+-	props->max_cqe = rf->max_cqe;
++	props->max_cqe = rf->max_cqe - 1;
+ 	props->max_mr = rf->max_mr - rf->used_mrs;
+ 	props->max_mw = props->max_mr;
+ 	props->max_pd = rf->max_pd - rf->used_pds;
+ 	props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges;
+ 	props->max_qp_rd_atom = hw_attrs->max_hw_ird;
+ 	props->max_qp_init_rd_atom = hw_attrs->max_hw_ord;
+-	if (rdma_protocol_roce(ibdev, 1))
++	if (rdma_protocol_roce(ibdev, 1)) {
++		props->device_cap_flags |= IB_DEVICE_RC_RNR_NAK_GEN;
+ 		props->max_pkeys = IRDMA_PKEY_TBL_SZ;
++	}
++
+ 	props->max_ah = rf->max_ah;
+ 	props->max_mcast_grp = rf->max_mcg;
+ 	props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX;
+@@ -3001,6 +3004,7 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
+ 	struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
+ 	struct irdma_cqp_request *cqp_request;
+ 	struct cqp_cmds_info *cqp_info;
++	int status;
+ 
+ 	if (iwmr->type != IRDMA_MEMREG_TYPE_MEM) {
+ 		if (iwmr->region) {
+@@ -3031,8 +3035,11 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
+ 	cqp_info->post_sq = 1;
+ 	cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev;
+ 	cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
+-	irdma_handle_cqp_op(iwdev->rf, cqp_request);
++	status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ 	irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
++	if (status)
++		return status;
++
+ 	irdma_free_stag(iwdev, iwmr->stag);
+ done:
+ 	if (iwpbl->pbl_allocated)
+diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
+index 293ed709e5ed5..b4dc52392275b 100644
+--- a/drivers/infiniband/hw/mlx5/mad.c
++++ b/drivers/infiniband/hw/mlx5/mad.c
+@@ -166,6 +166,12 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
+ 		mdev = dev->mdev;
+ 		mdev_port_num = 1;
+ 	}
++	if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1) {
++		/* set local port to one for Function-Per-Port HCA. */
++		mdev = dev->mdev;
++		mdev_port_num = 1;
++	}
++
+ 	/* Declaring support of extended counters */
+ 	if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
+ 		struct ib_class_port_info cpi = {};
+diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
+index 1f4e60257700e..7d47b521070b1 100644
+--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
++++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
+@@ -29,7 +29,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx)
+ 	dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx);
+ 
+ 	if (paddr)
+-		return virt_to_page(paddr);
++		return virt_to_page((void *)paddr);
+ 
+ 	return NULL;
+ }
+@@ -533,13 +533,23 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
+ 					kunmap_local(kaddr);
+ 				}
+ 			} else {
+-				u64 va = sge->laddr + sge_off;
++				/*
++				 * Cast to an uintptr_t to preserve all 64 bits
++				 * in sge->laddr.
++				 */
++				uintptr_t va = (uintptr_t)(sge->laddr + sge_off);
+ 
+-				page_array[seg] = virt_to_page(va & PAGE_MASK);
++				/*
++				 * virt_to_page() takes a (void *) pointer
++				 * so cast to a (void *) meaning it will be 64
++				 * bits on a 64 bit platform and 32 bits on a
++				 * 32 bit platform.
++				 */
++				page_array[seg] = virt_to_page((void *)(va & PAGE_MASK));
+ 				if (do_crc)
+ 					crypto_shash_update(
+ 						c_tx->mpa_crc_hd,
+-						(void *)(uintptr_t)va,
++						(void *)va,
+ 						plen);
+ 			}
+ 
+diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+index 525f083fcaeb4..bf464400a4409 100644
+--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
++++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+@@ -1004,7 +1004,8 @@ rtrs_clt_get_copy_req(struct rtrs_clt_path *alive_path,
+ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con,
+ 				   struct rtrs_clt_io_req *req,
+ 				   struct rtrs_rbuf *rbuf, bool fr_en,
+-				   u32 size, u32 imm, struct ib_send_wr *wr,
++				   u32 count, u32 size, u32 imm,
++				   struct ib_send_wr *wr,
+ 				   struct ib_send_wr *tail)
+ {
+ 	struct rtrs_clt_path *clt_path = to_clt_path(con->c.path);
+@@ -1024,12 +1025,12 @@ static int rtrs_post_rdma_write_sg(struct rtrs_clt_con *con,
+ 		num_sge = 2;
+ 		ptail = tail;
+ 	} else {
+-		for_each_sg(req->sglist, sg, req->sg_cnt, i) {
++		for_each_sg(req->sglist, sg, count, i) {
+ 			sge[i].addr   = sg_dma_address(sg);
+ 			sge[i].length = sg_dma_len(sg);
+ 			sge[i].lkey   = clt_path->s.dev->ib_pd->local_dma_lkey;
+ 		}
+-		num_sge = 1 + req->sg_cnt;
++		num_sge = 1 + count;
+ 	}
+ 	sge[i].addr   = req->iu->dma_addr;
+ 	sge[i].length = size;
+@@ -1142,7 +1143,7 @@ static int rtrs_clt_write_req(struct rtrs_clt_io_req *req)
+ 	 */
+ 	rtrs_clt_update_all_stats(req, WRITE);
+ 
+-	ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en,
++	ret = rtrs_post_rdma_write_sg(req->con, req, rbuf, fr_en, count,
+ 				      req->usr_len + sizeof(*msg),
+ 				      imm, wr, &inv_wr);
+ 	if (ret) {
+diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+index 24024bce25664..ee4876bdce4ac 100644
+--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
++++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+@@ -600,7 +600,7 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
+ 		struct sg_table *sgt = &srv_mr->sgt;
+ 		struct scatterlist *s;
+ 		struct ib_mr *mr;
+-		int nr, chunks;
++		int nr, nr_sgt, chunks;
+ 
+ 		chunks = chunks_per_mr * mri;
+ 		if (!always_invalidate)
+@@ -615,19 +615,19 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
+ 			sg_set_page(s, srv->chunks[chunks + i],
+ 				    max_chunk_size, 0);
+ 
+-		nr = ib_dma_map_sg(srv_path->s.dev->ib_dev, sgt->sgl,
++		nr_sgt = ib_dma_map_sg(srv_path->s.dev->ib_dev, sgt->sgl,
+ 				   sgt->nents, DMA_BIDIRECTIONAL);
+-		if (nr < sgt->nents) {
+-			err = nr < 0 ? nr : -EINVAL;
++		if (!nr_sgt) {
++			err = -EINVAL;
+ 			goto free_sg;
+ 		}
+ 		mr = ib_alloc_mr(srv_path->s.dev->ib_pd, IB_MR_TYPE_MEM_REG,
+-				 sgt->nents);
++				 nr_sgt);
+ 		if (IS_ERR(mr)) {
+ 			err = PTR_ERR(mr);
+ 			goto unmap_sg;
+ 		}
+-		nr = ib_map_mr_sg(mr, sgt->sgl, sgt->nents,
++		nr = ib_map_mr_sg(mr, sgt->sgl, nr_sgt,
+ 				  NULL, max_chunk_size);
+ 		if (nr < 0 || nr < sgt->nents) {
+ 			err = nr < 0 ? nr : -EINVAL;
+@@ -646,7 +646,7 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
+ 			}
+ 		}
+ 		/* Eventually dma addr for each chunk can be cached */
+-		for_each_sg(sgt->sgl, s, sgt->orig_nents, i)
++		for_each_sg(sgt->sgl, s, nr_sgt, i)
+ 			srv_path->dma_addr[chunks + i] = sg_dma_address(s);
+ 
+ 		ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
+diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
+index 6058abf42ba74..3d9c108d73ad8 100644
+--- a/drivers/infiniband/ulp/srp/ib_srp.c
++++ b/drivers/infiniband/ulp/srp/ib_srp.c
+@@ -1962,7 +1962,8 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
+ 		if (scmnd) {
+ 			req = scsi_cmd_priv(scmnd);
+ 			scmnd = srp_claim_req(ch, req, NULL, scmnd);
+-		} else {
++		}
++		if (!scmnd) {
+ 			shost_printk(KERN_ERR, target->scsi_host,
+ 				     "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
+ 				     rsp->tag, ch - target->ch, ch->qp->qp_num);
+diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
+index 840831d5d2ad9..a0924144bac80 100644
+--- a/drivers/iommu/amd/iommu.c
++++ b/drivers/iommu/amd/iommu.c
+@@ -874,7 +874,8 @@ static void build_completion_wait(struct iommu_cmd *cmd,
+ 	memset(cmd, 0, sizeof(*cmd));
+ 	cmd->data[0] = lower_32_bits(paddr) | CMD_COMPL_WAIT_STORE_MASK;
+ 	cmd->data[1] = upper_32_bits(paddr);
+-	cmd->data[2] = data;
++	cmd->data[2] = lower_32_bits(data);
++	cmd->data[3] = upper_32_bits(data);
+ 	CMD_SET_TYPE(cmd, CMD_COMPL_WAIT);
+ }
+ 
+diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
+index afb3efd565b78..f3e2689787ae5 100644
+--- a/drivers/iommu/amd/iommu_v2.c
++++ b/drivers/iommu/amd/iommu_v2.c
+@@ -786,6 +786,8 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
+ 	if (dev_state->domain == NULL)
+ 		goto out_free_states;
+ 
++	/* See iommu_is_default_domain() */
++	dev_state->domain->type = IOMMU_DOMAIN_IDENTITY;
+ 	amd_iommu_domain_direct_map(dev_state->domain);
+ 
+ 	ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
+diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
+index 64b14ac4c7b02..fc8c1420c0b69 100644
+--- a/drivers/iommu/intel/dmar.c
++++ b/drivers/iommu/intel/dmar.c
+@@ -2368,6 +2368,13 @@ static int dmar_device_hotplug(acpi_handle handle, bool insert)
+ 	if (!dmar_in_use())
+ 		return 0;
+ 
++	/*
++	 * It's unlikely that any I/O board is hot added before the IOMMU
++	 * subsystem is initialized.
++	 */
++	if (IS_ENABLED(CONFIG_INTEL_IOMMU) && !intel_iommu_enabled)
++		return -EOPNOTSUPP;
++
+ 	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
+ 		tmp = handle;
+ 	} else {
+diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
+index 5c0dce78586aa..40ac3a78d90ef 100644
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -422,14 +422,36 @@ static inline int domain_pfn_supported(struct dmar_domain *domain,
+ 	return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
+ }
+ 
++/*
++ * Calculate the Supported Adjusted Guest Address Widths of an IOMMU.
++ * Refer to 11.4.2 of the VT-d spec for the encoding of each bit of
++ * the returned SAGAW.
++ */
++static unsigned long __iommu_calculate_sagaw(struct intel_iommu *iommu)
++{
++	unsigned long fl_sagaw, sl_sagaw;
++
++	fl_sagaw = BIT(2) | (cap_fl1gp_support(iommu->cap) ? BIT(3) : 0);
++	sl_sagaw = cap_sagaw(iommu->cap);
++
++	/* Second level only. */
++	if (!sm_supported(iommu) || !ecap_flts(iommu->ecap))
++		return sl_sagaw;
++
++	/* First level only. */
++	if (!ecap_slts(iommu->ecap))
++		return fl_sagaw;
++
++	return fl_sagaw & sl_sagaw;
++}
++
+ static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
+ {
+ 	unsigned long sagaw;
+ 	int agaw;
+ 
+-	sagaw = cap_sagaw(iommu->cap);
+-	for (agaw = width_to_agaw(max_gaw);
+-	     agaw >= 0; agaw--) {
++	sagaw = __iommu_calculate_sagaw(iommu);
++	for (agaw = width_to_agaw(max_gaw); agaw >= 0; agaw--) {
+ 		if (test_bit(agaw, &sagaw))
+ 			break;
+ 	}
+@@ -3123,13 +3145,7 @@ static int __init init_dmars(void)
+ 
+ #ifdef CONFIG_INTEL_IOMMU_SVM
+ 		if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) {
+-			/*
+-			 * Call dmar_alloc_hwirq() with dmar_global_lock held,
+-			 * could cause possible lock race condition.
+-			 */
+-			up_write(&dmar_global_lock);
+ 			ret = intel_svm_enable_prq(iommu);
+-			down_write(&dmar_global_lock);
+ 			if (ret)
+ 				goto free_iommu;
+ 		}
+@@ -4035,7 +4051,6 @@ int __init intel_iommu_init(void)
+ 	force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) ||
+ 		    platform_optin_force_iommu();
+ 
+-	down_write(&dmar_global_lock);
+ 	if (dmar_table_init()) {
+ 		if (force_on)
+ 			panic("tboot: Failed to initialize DMAR table\n");
+@@ -4048,16 +4063,6 @@ int __init intel_iommu_init(void)
+ 		goto out_free_dmar;
+ 	}
+ 
+-	up_write(&dmar_global_lock);
+-
+-	/*
+-	 * The bus notifier takes the dmar_global_lock, so lockdep will
+-	 * complain later when we register it under the lock.
+-	 */
+-	dmar_register_bus_notifier();
+-
+-	down_write(&dmar_global_lock);
+-
+ 	if (!no_iommu)
+ 		intel_iommu_debugfs_init();
+ 
+@@ -4105,11 +4110,9 @@ int __init intel_iommu_init(void)
+ 		pr_err("Initialization failed\n");
+ 		goto out_free_dmar;
+ 	}
+-	up_write(&dmar_global_lock);
+ 
+ 	init_iommu_pm_ops();
+ 
+-	down_read(&dmar_global_lock);
+ 	for_each_active_iommu(iommu, drhd) {
+ 		/*
+ 		 * The flush queue implementation does not perform
+@@ -4127,13 +4130,11 @@ int __init intel_iommu_init(void)
+ 				       "%s", iommu->name);
+ 		iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
+ 	}
+-	up_read(&dmar_global_lock);
+ 
+ 	bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
+ 	if (si_domain && !hw_pass_through)
+ 		register_memory_notifier(&intel_iommu_memory_nb);
+ 
+-	down_read(&dmar_global_lock);
+ 	if (probe_acpi_namespace_devices())
+ 		pr_warn("ACPI name space devices didn't probe correctly\n");
+ 
+@@ -4144,17 +4145,15 @@ int __init intel_iommu_init(void)
+ 
+ 		iommu_disable_protect_mem_regions(iommu);
+ 	}
+-	up_read(&dmar_global_lock);
+-
+-	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
+ 
+ 	intel_iommu_enabled = 1;
++	dmar_register_bus_notifier();
++	pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
+ 
+ 	return 0;
+ 
+ out_free_dmar:
+ 	intel_iommu_free_dmars();
+-	up_write(&dmar_global_lock);
+ 	return ret;
+ }
+ 
+diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
+index 847ad47a2dfd3..f113833c3075c 100644
+--- a/drivers/iommu/iommu.c
++++ b/drivers/iommu/iommu.c
+@@ -3089,6 +3089,24 @@ out:
+ 	return ret;
+ }
+ 
++static bool iommu_is_default_domain(struct iommu_group *group)
++{
++	if (group->domain == group->default_domain)
++		return true;
++
++	/*
++	 * If the default domain was set to identity and it is still an identity
++	 * domain then we consider this a pass. This happens because of
++	 * amd_iommu_init_device() replacing the default idenytity domain with an
++	 * identity domain that has a different configuration for AMDGPU.
++	 */
++	if (group->default_domain &&
++	    group->default_domain->type == IOMMU_DOMAIN_IDENTITY &&
++	    group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY)
++		return true;
++	return false;
++}
++
+ /**
+  * iommu_device_use_default_domain() - Device driver wants to handle device
+  *                                     DMA through the kernel DMA API.
+@@ -3107,8 +3125,7 @@ int iommu_device_use_default_domain(struct device *dev)
+ 
+ 	mutex_lock(&group->mutex);
+ 	if (group->owner_cnt) {
+-		if (group->domain != group->default_domain ||
+-		    group->owner) {
++		if (group->owner || !iommu_is_default_domain(group)) {
+ 			ret = -EBUSY;
+ 			goto unlock_out;
+ 		}
+diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
+index 25be4b822aa07..bf340d779c10b 100644
+--- a/drivers/iommu/virtio-iommu.c
++++ b/drivers/iommu/virtio-iommu.c
+@@ -1006,7 +1006,18 @@ static int viommu_of_xlate(struct device *dev, struct of_phandle_args *args)
+ 	return iommu_fwspec_add_ids(dev, args->args, 1);
+ }
+ 
++static bool viommu_capable(enum iommu_cap cap)
++{
++	switch (cap) {
++	case IOMMU_CAP_CACHE_COHERENCY:
++		return true;
++	default:
++		return false;
++	}
++}
++
+ static struct iommu_ops viommu_ops = {
++	.capable		= viommu_capable,
+ 	.domain_alloc		= viommu_domain_alloc,
+ 	.probe_device		= viommu_probe_device,
+ 	.probe_finalize		= viommu_probe_finalize,
+diff --git a/drivers/md/md.c b/drivers/md/md.c
+index 91e7e80fce489..25d18b67a1620 100644
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -5647,6 +5647,7 @@ static int md_alloc(dev_t dev, char *name)
+ 	 * removed (mddev_delayed_delete).
+ 	 */
+ 	flush_workqueue(md_misc_wq);
++	flush_workqueue(md_rdev_misc_wq);
+ 
+ 	mutex_lock(&disks_mutex);
+ 	mddev = mddev_alloc(dev);
+diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
+index 6ba4c83fe5fc0..bff0bfd10e235 100644
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -1974,6 +1974,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
+ 	for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
+ 		new_slave->target_last_arp_rx[i] = new_slave->last_rx;
+ 
++	new_slave->last_tx = new_slave->last_rx;
++
+ 	if (bond->params.miimon && !bond->params.use_carrier) {
+ 		link_reporting = bond_check_dev_link(bond, slave_dev, 1);
+ 
+@@ -2857,8 +2859,11 @@ static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip,
+ 		return;
+ 	}
+ 
+-	if (bond_handle_vlan(slave, tags, skb))
++	if (bond_handle_vlan(slave, tags, skb)) {
++		slave_update_last_tx(slave);
+ 		arp_xmit(skb);
++	}
++
+ 	return;
+ }
+ 
+@@ -3047,8 +3052,7 @@ static int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
+ 			    curr_active_slave->last_link_up))
+ 		bond_validate_arp(bond, slave, tip, sip);
+ 	else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
+-		 bond_time_in_interval(bond,
+-				       dev_trans_start(curr_arp_slave->dev), 1))
++		 bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1))
+ 		bond_validate_arp(bond, slave, sip, tip);
+ 
+ out_unlock:
+@@ -3076,8 +3080,10 @@ static void bond_ns_send(struct slave *slave, const struct in6_addr *daddr,
+ 	}
+ 
+ 	addrconf_addr_solict_mult(daddr, &mcaddr);
+-	if (bond_handle_vlan(slave, tags, skb))
++	if (bond_handle_vlan(slave, tags, skb)) {
++		slave_update_last_tx(slave);
+ 		ndisc_send_skb(skb, &mcaddr, saddr);
++	}
+ }
+ 
+ static void bond_ns_send_all(struct bonding *bond, struct slave *slave)
+@@ -3134,6 +3140,9 @@ static void bond_ns_send_all(struct bonding *bond, struct slave *slave)
+ found:
+ 		if (!ipv6_dev_get_saddr(dev_net(dst->dev), dst->dev, &targets[i], 0, &saddr))
+ 			bond_ns_send(slave, &targets[i], &saddr, tags);
++		else
++			bond_ns_send(slave, &targets[i], &in6addr_any, tags);
++
+ 		dst_release(dst);
+ 		kfree(tags);
+ 	}
+@@ -3165,12 +3174,19 @@ static bool bond_has_this_ip6(struct bonding *bond, struct in6_addr *addr)
+ 	return ret;
+ }
+ 
+-static void bond_validate_ns(struct bonding *bond, struct slave *slave,
++static void bond_validate_na(struct bonding *bond, struct slave *slave,
+ 			     struct in6_addr *saddr, struct in6_addr *daddr)
+ {
+ 	int i;
+ 
+-	if (ipv6_addr_any(saddr) || !bond_has_this_ip6(bond, daddr)) {
++	/* Ignore NAs that:
++	 * 1. Source address is unspecified address.
++	 * 2. Dest address is neither all-nodes multicast address nor
++	 *    exist on bond interface.
++	 */
++	if (ipv6_addr_any(saddr) ||
++	    (!ipv6_addr_equal(daddr, &in6addr_linklocal_allnodes) &&
++	     !bond_has_this_ip6(bond, daddr))) {
+ 		slave_dbg(bond->dev, slave->dev, "%s: sip %pI6c tip %pI6c not found\n",
+ 			  __func__, saddr, daddr);
+ 		return;
+@@ -3213,15 +3229,14 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond,
+ 	 * see bond_arp_rcv().
+ 	 */
+ 	if (bond_is_active_slave(slave))
+-		bond_validate_ns(bond, slave, saddr, daddr);
++		bond_validate_na(bond, slave, saddr, daddr);
+ 	else if (curr_active_slave &&
+ 		 time_after(slave_last_rx(bond, curr_active_slave),
+ 			    curr_active_slave->last_link_up))
+-		bond_validate_ns(bond, slave, saddr, daddr);
++		bond_validate_na(bond, slave, saddr, daddr);
+ 	else if (curr_arp_slave &&
+-		 bond_time_in_interval(bond,
+-				       dev_trans_start(curr_arp_slave->dev), 1))
+-		bond_validate_ns(bond, slave, saddr, daddr);
++		 bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1))
++		bond_validate_na(bond, slave, saddr, daddr);
+ 
+ out:
+ 	return RX_HANDLER_ANOTHER;
+@@ -3308,12 +3323,12 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
+ 	 *       so it can wait
+ 	 */
+ 	bond_for_each_slave_rcu(bond, slave, iter) {
+-		unsigned long trans_start = dev_trans_start(slave->dev);
++		unsigned long last_tx = slave_last_tx(slave);
+ 
+ 		bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
+ 
+ 		if (slave->link != BOND_LINK_UP) {
+-			if (bond_time_in_interval(bond, trans_start, 1) &&
++			if (bond_time_in_interval(bond, last_tx, 1) &&
+ 			    bond_time_in_interval(bond, slave->last_rx, 1)) {
+ 
+ 				bond_propose_link_state(slave, BOND_LINK_UP);
+@@ -3338,7 +3353,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
+ 			 * when the source ip is 0, so don't take the link down
+ 			 * if we don't know our ip yet
+ 			 */
+-			if (!bond_time_in_interval(bond, trans_start, bond->params.missed_max) ||
++			if (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) ||
+ 			    !bond_time_in_interval(bond, slave->last_rx, bond->params.missed_max)) {
+ 
+ 				bond_propose_link_state(slave, BOND_LINK_DOWN);
+@@ -3404,7 +3419,7 @@ re_arm:
+  */
+ static int bond_ab_arp_inspect(struct bonding *bond)
+ {
+-	unsigned long trans_start, last_rx;
++	unsigned long last_tx, last_rx;
+ 	struct list_head *iter;
+ 	struct slave *slave;
+ 	int commit = 0;
+@@ -3455,9 +3470,9 @@ static int bond_ab_arp_inspect(struct bonding *bond)
+ 		 * - (more than missed_max*delta since receive AND
+ 		 *    the bond has an IP address)
+ 		 */
+-		trans_start = dev_trans_start(slave->dev);
++		last_tx = slave_last_tx(slave);
+ 		if (bond_is_active_slave(slave) &&
+-		    (!bond_time_in_interval(bond, trans_start, bond->params.missed_max) ||
++		    (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) ||
+ 		     !bond_time_in_interval(bond, last_rx, bond->params.missed_max))) {
+ 			bond_propose_link_state(slave, BOND_LINK_DOWN);
+ 			commit++;
+@@ -3474,8 +3489,8 @@ static int bond_ab_arp_inspect(struct bonding *bond)
+  */
+ static void bond_ab_arp_commit(struct bonding *bond)
+ {
+-	unsigned long trans_start;
+ 	struct list_head *iter;
++	unsigned long last_tx;
+ 	struct slave *slave;
+ 
+ 	bond_for_each_slave(bond, slave, iter) {
+@@ -3484,10 +3499,10 @@ static void bond_ab_arp_commit(struct bonding *bond)
+ 			continue;
+ 
+ 		case BOND_LINK_UP:
+-			trans_start = dev_trans_start(slave->dev);
++			last_tx = slave_last_tx(slave);
+ 			if (rtnl_dereference(bond->curr_active_slave) != slave ||
+ 			    (!rtnl_dereference(bond->curr_active_slave) &&
+-			     bond_time_in_interval(bond, trans_start, 1))) {
++			     bond_time_in_interval(bond, last_tx, 1))) {
+ 				struct slave *current_arp_slave;
+ 
+ 				current_arp_slave = rtnl_dereference(bond->current_arp_slave);
+diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
+index 6439b56f381f9..517bc3922ee24 100644
+--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
++++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
+@@ -16,11 +16,13 @@
+ #include <linux/iopoll.h>
+ #include <linux/mdio.h>
+ #include <linux/pci.h>
++#include <linux/time.h>
+ #include "felix.h"
+ 
+ #define VSC9959_NUM_PORTS		6
+ 
+ #define VSC9959_TAS_GCL_ENTRY_MAX	63
++#define VSC9959_TAS_MIN_GATE_LEN_NS	33
+ #define VSC9959_VCAP_POLICER_BASE	63
+ #define VSC9959_VCAP_POLICER_MAX	383
+ #define VSC9959_SWITCH_PCI_BAR		4
+@@ -1410,6 +1412,23 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot)
+ 	mdiobus_free(felix->imdio);
+ }
+ 
++/* The switch considers any frame (regardless of size) as eligible for
++ * transmission if the traffic class gate is open for at least 33 ns.
++ * Overruns are prevented by cropping an interval at the end of the gate time
++ * slot for which egress scheduling is blocked, but we need to still keep 33 ns
++ * available for one packet to be transmitted, otherwise the port tc will hang.
++ * This function returns the size of a gate interval that remains available for
++ * setting the guard band, after reserving the space for one egress frame.
++ */
++static u64 vsc9959_tas_remaining_gate_len_ps(u64 gate_len_ns)
++{
++	/* Gate always open */
++	if (gate_len_ns == U64_MAX)
++		return U64_MAX;
++
++	return (gate_len_ns - VSC9959_TAS_MIN_GATE_LEN_NS) * PSEC_PER_NSEC;
++}
++
+ /* Extract shortest continuous gate open intervals in ns for each traffic class
+  * of a cyclic tc-taprio schedule. If a gate is always open, the duration is
+  * considered U64_MAX. If the gate is always closed, it is considered 0.
+@@ -1471,6 +1490,65 @@ static void vsc9959_tas_min_gate_lengths(struct tc_taprio_qopt_offload *taprio,
+ 			min_gate_len[tc] = 0;
+ }
+ 
++/* ocelot_write_rix is a macro that concatenates QSYS_MAXSDU_CFG_* with _RSZ,
++ * so we need to spell out the register access to each traffic class in helper
++ * functions, to simplify callers
++ */
++static void vsc9959_port_qmaxsdu_set(struct ocelot *ocelot, int port, int tc,
++				     u32 max_sdu)
++{
++	switch (tc) {
++	case 0:
++		ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_0,
++				 port);
++		break;
++	case 1:
++		ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_1,
++				 port);
++		break;
++	case 2:
++		ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_2,
++				 port);
++		break;
++	case 3:
++		ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_3,
++				 port);
++		break;
++	case 4:
++		ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_4,
++				 port);
++		break;
++	case 5:
++		ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_5,
++				 port);
++		break;
++	case 6:
++		ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_6,
++				 port);
++		break;
++	case 7:
++		ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_7,
++				 port);
++		break;
++	}
++}
++
++static u32 vsc9959_port_qmaxsdu_get(struct ocelot *ocelot, int port, int tc)
++{
++	switch (tc) {
++	case 0: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_0, port);
++	case 1: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_1, port);
++	case 2: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_2, port);
++	case 3: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_3, port);
++	case 4: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_4, port);
++	case 5: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_5, port);
++	case 6: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_6, port);
++	case 7: return ocelot_read_rix(ocelot, QSYS_QMAXSDU_CFG_7, port);
++	default:
++		return 0;
++	}
++}
++
+ /* Update QSYS_PORT_MAX_SDU to make sure the static guard bands added by the
+  * switch (see the ALWAYS_GUARD_BAND_SCH_Q comment) are correct at all MTU
+  * values (the default value is 1518). Also, for traffic class windows smaller
+@@ -1527,11 +1605,16 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
+ 
+ 	vsc9959_tas_min_gate_lengths(ocelot_port->taprio, min_gate_len);
+ 
++	mutex_lock(&ocelot->fwd_domain_lock);
++
+ 	for (tc = 0; tc < OCELOT_NUM_TC; tc++) {
++		u64 remaining_gate_len_ps;
+ 		u32 max_sdu;
+ 
+-		if (min_gate_len[tc] == U64_MAX /* Gate always open */ ||
+-		    min_gate_len[tc] * 1000 > needed_bit_time_ps) {
++		remaining_gate_len_ps =
++			vsc9959_tas_remaining_gate_len_ps(min_gate_len[tc]);
++
++		if (remaining_gate_len_ps > needed_bit_time_ps) {
+ 			/* Setting QMAXSDU_CFG to 0 disables oversized frame
+ 			 * dropping.
+ 			 */
+@@ -1544,9 +1627,15 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
+ 			/* If traffic class doesn't support a full MTU sized
+ 			 * frame, make sure to enable oversize frame dropping
+ 			 * for frames larger than the smallest that would fit.
++			 *
++			 * However, the exact same register, QSYS_QMAXSDU_CFG_*,
++			 * controls not only oversized frame dropping, but also
++			 * per-tc static guard band lengths, so it reduces the
++			 * useful gate interval length. Therefore, be careful
++			 * to calculate a guard band (and therefore max_sdu)
++			 * that still leaves 33 ns available in the time slot.
+ 			 */
+-			max_sdu = div_u64(min_gate_len[tc] * 1000,
+-					  picos_per_byte);
++			max_sdu = div_u64(remaining_gate_len_ps, picos_per_byte);
+ 			/* A TC gate may be completely closed, which is a
+ 			 * special case where all packets are oversized.
+ 			 * Any limit smaller than 64 octets accomplishes this
+@@ -1569,47 +1658,14 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
+ 				 max_sdu);
+ 		}
+ 
+-		/* ocelot_write_rix is a macro that concatenates
+-		 * QSYS_MAXSDU_CFG_* with _RSZ, so we need to spell out
+-		 * the writes to each traffic class
+-		 */
+-		switch (tc) {
+-		case 0:
+-			ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_0,
+-					 port);
+-			break;
+-		case 1:
+-			ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_1,
+-					 port);
+-			break;
+-		case 2:
+-			ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_2,
+-					 port);
+-			break;
+-		case 3:
+-			ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_3,
+-					 port);
+-			break;
+-		case 4:
+-			ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_4,
+-					 port);
+-			break;
+-		case 5:
+-			ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_5,
+-					 port);
+-			break;
+-		case 6:
+-			ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_6,
+-					 port);
+-			break;
+-		case 7:
+-			ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_7,
+-					 port);
+-			break;
+-		}
++		vsc9959_port_qmaxsdu_set(ocelot, port, tc, max_sdu);
+ 	}
+ 
+ 	ocelot_write_rix(ocelot, maxlen, QSYS_PORT_MAX_SDU, port);
++
++	ocelot->ops->cut_through_fwd(ocelot);
++
++	mutex_unlock(&ocelot->fwd_domain_lock);
+ }
+ 
+ static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
+@@ -1636,13 +1692,13 @@ static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
+ 		break;
+ 	}
+ 
++	mutex_lock(&ocelot->tas_lock);
++
+ 	ocelot_rmw_rix(ocelot,
+ 		       QSYS_TAG_CONFIG_LINK_SPEED(tas_speed),
+ 		       QSYS_TAG_CONFIG_LINK_SPEED_M,
+ 		       QSYS_TAG_CONFIG, port);
+ 
+-	mutex_lock(&ocelot->tas_lock);
+-
+ 	if (ocelot_port->taprio)
+ 		vsc9959_tas_guard_bands_update(ocelot, port);
+ 
+@@ -2709,7 +2765,7 @@ static void vsc9959_cut_through_fwd(struct ocelot *ocelot)
+ {
+ 	struct felix *felix = ocelot_to_felix(ocelot);
+ 	struct dsa_switch *ds = felix->ds;
+-	int port, other_port;
++	int tc, port, other_port;
+ 
+ 	lockdep_assert_held(&ocelot->fwd_domain_lock);
+ 
+@@ -2753,19 +2809,27 @@ static void vsc9959_cut_through_fwd(struct ocelot *ocelot)
+ 				min_speed = other_ocelot_port->speed;
+ 		}
+ 
+-		/* Enable cut-through forwarding for all traffic classes. */
+-		if (ocelot_port->speed == min_speed)
++		/* Enable cut-through forwarding for all traffic classes that
++		 * don't have oversized dropping enabled, since this check is
++		 * bypassed in cut-through mode.
++		 */
++		if (ocelot_port->speed == min_speed) {
+ 			val = GENMASK(7, 0);
+ 
++			for (tc = 0; tc < OCELOT_NUM_TC; tc++)
++				if (vsc9959_port_qmaxsdu_get(ocelot, port, tc))
++					val &= ~BIT(tc);
++		}
++
+ set:
+ 		tmp = ocelot_read_rix(ocelot, ANA_CUT_THRU_CFG, port);
+ 		if (tmp == val)
+ 			continue;
+ 
+ 		dev_dbg(ocelot->dev,
+-			"port %d fwd mask 0x%lx speed %d min_speed %d, %s cut-through forwarding\n",
++			"port %d fwd mask 0x%lx speed %d min_speed %d, %s cut-through forwarding on TC mask 0x%x\n",
+ 			port, mask, ocelot_port->speed, min_speed,
+-			val ? "enabling" : "disabling");
++			val ? "enabling" : "disabling", val);
+ 
+ 		ocelot_write_rix(ocelot, val, ANA_CUT_THRU_CFG, port);
+ 	}
+diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
+index 407fe8f340a06..c5b61bc80f783 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e.h
++++ b/drivers/net/ethernet/intel/i40e/i40e.h
+@@ -1291,4 +1291,18 @@ int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+ 				      struct i40e_cloud_filter *filter,
+ 				      bool add);
++
++/**
++ * i40e_is_tc_mqprio_enabled - check if TC MQPRIO is enabled on PF
++ * @pf: pointer to a pf.
++ *
++ * Check and return value of flag I40E_FLAG_TC_MQPRIO.
++ *
++ * Return: I40E_FLAG_TC_MQPRIO set state.
++ **/
++static inline u32 i40e_is_tc_mqprio_enabled(struct i40e_pf *pf)
++{
++	return pf->flags & I40E_FLAG_TC_MQPRIO;
++}
++
+ #endif /* _I40E_H_ */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
+index ea2bb0140a6eb..10d7a982a5b9b 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
+@@ -177,6 +177,10 @@ void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset)
+ 			"Cannot locate client instance close routine\n");
+ 		return;
+ 	}
++	if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
++		dev_dbg(&pf->pdev->dev, "Client is not open, abort close\n");
++		return;
++	}
+ 	cdev->client->ops->close(&cdev->lan_info, cdev->client, reset);
+ 	clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+ 	i40e_client_release_qvlist(&cdev->lan_info);
+@@ -429,7 +433,6 @@ void i40e_client_subtask(struct i40e_pf *pf)
+ 				/* Remove failed client instance */
+ 				clear_bit(__I40E_CLIENT_INSTANCE_OPENED,
+ 					  &cdev->state);
+-				i40e_client_del_instance(pf);
+ 				return;
+ 			}
+ 		}
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+index 22a61802a4027..ed9984f1e1b9f 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+@@ -4931,7 +4931,7 @@ static int i40e_set_channels(struct net_device *dev,
+ 	/* We do not support setting channels via ethtool when TCs are
+ 	 * configured through mqprio
+ 	 */
+-	if (pf->flags & I40E_FLAG_TC_MQPRIO)
++	if (i40e_is_tc_mqprio_enabled(pf))
+ 		return -EINVAL;
+ 
+ 	/* verify they are not requesting separate vectors */
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index 71a8e1698ed48..1aaf0c5ddf6cf 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -5339,7 +5339,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
+ 	u8 num_tc = 0;
+ 	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
+ 
+-	if (pf->flags & I40E_FLAG_TC_MQPRIO)
++	if (i40e_is_tc_mqprio_enabled(pf))
+ 		return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc;
+ 
+ 	/* If neither MQPRIO nor DCB is enabled, then always use single TC */
+@@ -5371,7 +5371,7 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
+  **/
+ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
+ {
+-	if (pf->flags & I40E_FLAG_TC_MQPRIO)
++	if (i40e_is_tc_mqprio_enabled(pf))
+ 		return i40e_mqprio_get_enabled_tc(pf);
+ 
+ 	/* If neither MQPRIO nor DCB is enabled for this PF then just return
+@@ -5468,7 +5468,7 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
+ 	int i;
+ 
+ 	/* There is no need to reset BW when mqprio mode is on.  */
+-	if (pf->flags & I40E_FLAG_TC_MQPRIO)
++	if (i40e_is_tc_mqprio_enabled(pf))
+ 		return 0;
+ 	if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+ 		ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
+@@ -5540,7 +5540,7 @@ static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc)
+ 					vsi->tc_config.tc_info[i].qoffset);
+ 	}
+ 
+-	if (pf->flags & I40E_FLAG_TC_MQPRIO)
++	if (i40e_is_tc_mqprio_enabled(pf))
+ 		return;
+ 
+ 	/* Assign UP2TC map for the VSI */
+@@ -5701,7 +5701,7 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
+ 	ctxt.vf_num = 0;
+ 	ctxt.uplink_seid = vsi->uplink_seid;
+ 	ctxt.info = vsi->info;
+-	if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) {
++	if (i40e_is_tc_mqprio_enabled(pf)) {
+ 		ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc);
+ 		if (ret)
+ 			goto out;
+@@ -6425,7 +6425,7 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi,
+ 		pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+ 
+ 		if (vsi->type == I40E_VSI_MAIN) {
+-			if (pf->flags & I40E_FLAG_TC_MQPRIO)
++			if (i40e_is_tc_mqprio_enabled(pf))
+ 				i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
+ 			else
+ 				i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
+@@ -6536,6 +6536,9 @@ static int i40e_configure_queue_channels(struct i40e_vsi *vsi)
+ 			vsi->tc_seid_map[i] = ch->seid;
+ 		}
+ 	}
++
++	/* reset to reconfigure TX queue contexts */
++	i40e_do_reset(vsi->back, I40E_PF_RESET_FLAG, true);
+ 	return ret;
+ 
+ err_free:
+@@ -7819,7 +7822,7 @@ static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev)
+ 		netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n");
+ 		return ERR_PTR(-EINVAL);
+ 	}
+-	if ((pf->flags & I40E_FLAG_TC_MQPRIO)) {
++	if (i40e_is_tc_mqprio_enabled(pf)) {
+ 		netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n");
+ 		return ERR_PTR(-EINVAL);
+ 	}
+@@ -8072,7 +8075,7 @@ config_tc:
+ 	/* Quiesce VSI queues */
+ 	i40e_quiesce_vsi(vsi);
+ 
+-	if (!hw && !(pf->flags & I40E_FLAG_TC_MQPRIO))
++	if (!hw && !i40e_is_tc_mqprio_enabled(pf))
+ 		i40e_remove_queue_channels(vsi);
+ 
+ 	/* Configure VSI for enabled TCs */
+@@ -8096,7 +8099,7 @@ config_tc:
+ 		 "Setup channel (id:%u) utilizing num_queues %d\n",
+ 		 vsi->seid, vsi->tc_config.tc_info[0].qcount);
+ 
+-	if (pf->flags & I40E_FLAG_TC_MQPRIO) {
++	if (i40e_is_tc_mqprio_enabled(pf)) {
+ 		if (vsi->mqprio_qopt.max_rate[0]) {
+ 			u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+ 
+@@ -10750,7 +10753,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+ 	 * unless I40E_FLAG_TC_MQPRIO was enabled or DCB
+ 	 * is not supported with new link speed
+ 	 */
+-	if (pf->flags & I40E_FLAG_TC_MQPRIO) {
++	if (i40e_is_tc_mqprio_enabled(pf)) {
+ 		i40e_aq_set_dcb_parameters(hw, false, NULL);
+ 	} else {
+ 		if (I40E_IS_X710TL_DEVICE(hw->device_id) &&
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+index af69ccc6e8d2f..07f1e209d524d 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+@@ -3689,7 +3689,8 @@ u16 i40e_lan_select_queue(struct net_device *netdev,
+ 	u8 prio;
+ 
+ 	/* is DCB enabled at all? */
+-	if (vsi->tc_config.numtc == 1)
++	if (vsi->tc_config.numtc == 1 ||
++	    i40e_is_tc_mqprio_enabled(vsi->back))
+ 		return netdev_pick_tx(netdev, skb, sb_dev);
+ 
+ 	prio = skb->priority;
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 6d159334da9ec..981c43b204ff4 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -2789,6 +2789,11 @@ static void iavf_reset_task(struct work_struct *work)
+ 	int i = 0, err;
+ 	bool running;
+ 
++	/* Detach interface to avoid subsequent NDO callbacks */
++	rtnl_lock();
++	netif_device_detach(netdev);
++	rtnl_unlock();
++
+ 	/* When device is being removed it doesn't make sense to run the reset
+ 	 * task, just return in such a case.
+ 	 */
+@@ -2796,7 +2801,7 @@ static void iavf_reset_task(struct work_struct *work)
+ 		if (adapter->state != __IAVF_REMOVE)
+ 			queue_work(iavf_wq, &adapter->reset_task);
+ 
+-		return;
++		goto reset_finish;
+ 	}
+ 
+ 	while (!mutex_trylock(&adapter->client_lock))
+@@ -2866,7 +2871,6 @@ continue_reset:
+ 
+ 	if (running) {
+ 		netif_carrier_off(netdev);
+-		netif_tx_stop_all_queues(netdev);
+ 		adapter->link_up = false;
+ 		iavf_napi_disable_all(adapter);
+ 	}
+@@ -2996,7 +3000,7 @@ continue_reset:
+ 	mutex_unlock(&adapter->client_lock);
+ 	mutex_unlock(&adapter->crit_lock);
+ 
+-	return;
++	goto reset_finish;
+ reset_err:
+ 	if (running) {
+ 		set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+@@ -3007,6 +3011,10 @@ reset_err:
+ 	mutex_unlock(&adapter->client_lock);
+ 	mutex_unlock(&adapter->crit_lock);
+ 	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
++reset_finish:
++	rtnl_lock();
++	netif_device_attach(netdev);
++	rtnl_unlock();
+ }
+ 
+ /**
+diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
+index 136d7911adb48..1e32438081780 100644
+--- a/drivers/net/ethernet/intel/ice/ice_base.c
++++ b/drivers/net/ethernet/intel/ice/ice_base.c
+@@ -7,18 +7,6 @@
+ #include "ice_dcb_lib.h"
+ #include "ice_sriov.h"
+ 
+-static bool ice_alloc_rx_buf_zc(struct ice_rx_ring *rx_ring)
+-{
+-	rx_ring->xdp_buf = kcalloc(rx_ring->count, sizeof(*rx_ring->xdp_buf), GFP_KERNEL);
+-	return !!rx_ring->xdp_buf;
+-}
+-
+-static bool ice_alloc_rx_buf(struct ice_rx_ring *rx_ring)
+-{
+-	rx_ring->rx_buf = kcalloc(rx_ring->count, sizeof(*rx_ring->rx_buf), GFP_KERNEL);
+-	return !!rx_ring->rx_buf;
+-}
+-
+ /**
+  * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
+  * @qs_cfg: gathered variables needed for PF->VSI queues assignment
+@@ -519,11 +507,8 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
+ 			xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+ 					 ring->q_index, ring->q_vector->napi.napi_id);
+ 
+-		kfree(ring->rx_buf);
+ 		ring->xsk_pool = ice_xsk_pool(ring);
+ 		if (ring->xsk_pool) {
+-			if (!ice_alloc_rx_buf_zc(ring))
+-				return -ENOMEM;
+ 			xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+ 
+ 			ring->rx_buf_len =
+@@ -538,8 +523,6 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
+ 			dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
+ 				 ring->q_index);
+ 		} else {
+-			if (!ice_alloc_rx_buf(ring))
+-				return -ENOMEM;
+ 			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+ 				/* coverity[check_return] */
+ 				xdp_rxq_info_reg(&ring->xdp_rxq,
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index 3d45e075204e3..4c6bb7482b362 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -2898,10 +2898,18 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+ 			if (xdp_ring_err)
+ 				NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
+ 		}
++		/* reallocate Rx queues that are used for zero-copy */
++		xdp_ring_err = ice_realloc_zc_buf(vsi, true);
++		if (xdp_ring_err)
++			NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed");
+ 	} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
+ 		xdp_ring_err = ice_destroy_xdp_rings(vsi);
+ 		if (xdp_ring_err)
+ 			NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
++		/* reallocate Rx queues that were used for zero-copy */
++		xdp_ring_err = ice_realloc_zc_buf(vsi, false);
++		if (xdp_ring_err)
++			NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed");
+ 	} else {
+ 		/* safe to call even when prog == vsi->xdp_prog as
+ 		 * dev_xdp_install in net/core/dev.c incremented prog's
+@@ -3904,7 +3912,7 @@ static int ice_init_pf(struct ice_pf *pf)
+ 
+ 	pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL);
+ 	if (!pf->avail_rxqs) {
+-		devm_kfree(ice_pf_to_dev(pf), pf->avail_txqs);
++		bitmap_free(pf->avail_txqs);
+ 		pf->avail_txqs = NULL;
+ 		return -ENOMEM;
+ 	}
+diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
+index e48e29258450f..03ce85f6e6df8 100644
+--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
++++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
+@@ -192,6 +192,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+ 	err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
+ 	if (err)
+ 		return err;
++	ice_clean_rx_ring(rx_ring);
+ 
+ 	ice_qvec_toggle_napi(vsi, q_vector, false);
+ 	ice_qp_clean_rings(vsi, q_idx);
+@@ -316,6 +317,62 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
+ 	return 0;
+ }
+ 
++/**
++ * ice_realloc_rx_xdp_bufs - reallocate for either XSK or normal buffer
++ * @rx_ring: Rx ring
++ * @pool_present: is pool for XSK present
++ *
++ * Try allocating memory and return ENOMEM, if failed to allocate.
++ * If allocation was successful, substitute buffer with allocated one.
++ * Returns 0 on success, negative on failure
++ */
++static int
++ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present)
++{
++	size_t elem_size = pool_present ? sizeof(*rx_ring->xdp_buf) :
++					  sizeof(*rx_ring->rx_buf);
++	void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);
++
++	if (!sw_ring)
++		return -ENOMEM;
++
++	if (pool_present) {
++		kfree(rx_ring->rx_buf);
++		rx_ring->rx_buf = NULL;
++		rx_ring->xdp_buf = sw_ring;
++	} else {
++		kfree(rx_ring->xdp_buf);
++		rx_ring->xdp_buf = NULL;
++		rx_ring->rx_buf = sw_ring;
++	}
++
++	return 0;
++}
++
++/**
++ * ice_realloc_zc_buf - reallocate XDP ZC queue pairs
++ * @vsi: Current VSI
++ * @zc: is zero copy set
++ *
++ * Reallocate buffer for rx_rings that might be used by XSK.
++ * XDP requires more memory, than rx_buf provides.
++ * Returns 0 on success, negative on failure
++ */
++int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc)
++{
++	struct ice_rx_ring *rx_ring;
++	unsigned long q;
++
++	for_each_set_bit(q, vsi->af_xdp_zc_qps,
++			 max_t(int, vsi->alloc_txq, vsi->alloc_rxq)) {
++		rx_ring = vsi->rx_rings[q];
++		if (ice_realloc_rx_xdp_bufs(rx_ring, zc))
++			return -ENOMEM;
++	}
++
++	return 0;
++}
++
+ /**
+  * ice_xsk_pool_setup - enable/disable a buffer pool region depending on its state
+  * @vsi: Current VSI
+@@ -345,11 +402,17 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
+ 	if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+ 
+ 	if (if_running) {
++		struct ice_rx_ring *rx_ring = vsi->rx_rings[qid];
++
+ 		ret = ice_qp_dis(vsi, qid);
+ 		if (ret) {
+ 			netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret);
+ 			goto xsk_pool_if_up;
+ 		}
++
++		ret = ice_realloc_rx_xdp_bufs(rx_ring, pool_present);
++		if (ret)
++			goto xsk_pool_if_up;
+ 	}
+ 
+ 	pool_failure = pool_present ? ice_xsk_pool_enable(vsi, pool, qid) :
+diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
+index 21faec8e97db1..4edbe81eb6460 100644
+--- a/drivers/net/ethernet/intel/ice/ice_xsk.h
++++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
+@@ -27,6 +27,7 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
+ void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring);
+ void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring);
+ bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, u32 budget, int napi_budget);
++int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc);
+ #else
+ static inline bool
+ ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring,
+@@ -72,5 +73,12 @@ ice_xsk_wakeup(struct net_device __always_unused *netdev,
+ 
+ static inline void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { }
+ static inline void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring) { }
++
++static inline int
++ice_realloc_zc_buf(struct ice_vsi __always_unused *vsi,
++		   bool __always_unused zc)
++{
++	return 0;
++}
+ #endif /* CONFIG_XDP_SOCKETS */
+ #endif /* !_ICE_XSK_H_ */
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
+index dab8f3f771f84..cfe804bc8d205 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
+@@ -412,7 +412,7 @@ __mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
+ 	if (entry->hash != 0xffff) {
+ 		ppe->foe_table[entry->hash].ib1 &= ~MTK_FOE_IB1_STATE;
+ 		ppe->foe_table[entry->hash].ib1 |= FIELD_PREP(MTK_FOE_IB1_STATE,
+-							      MTK_FOE_STATE_BIND);
++							      MTK_FOE_STATE_UNBIND);
+ 		dma_wmb();
+ 	}
+ 	entry->hash = 0xffff;
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
+index 1f5cf1c9a9475..69ffce04d6306 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
+@@ -293,6 +293,9 @@ mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash)
+ 	if (!ppe)
+ 		return;
+ 
++	if (hash > MTK_PPE_HASH_MASK)
++		return;
++
+ 	now = (u16)jiffies;
+ 	diff = now - ppe->foe_check_time[hash];
+ 	if (diff < HZ / 10)
+diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c
+index 73f7962a37d33..c49062ad72c6c 100644
+--- a/drivers/net/phy/meson-gxl.c
++++ b/drivers/net/phy/meson-gxl.c
+@@ -243,13 +243,7 @@ static irqreturn_t meson_gxl_handle_interrupt(struct phy_device *phydev)
+ 	    irq_status == INTSRC_ENERGY_DETECT)
+ 		return IRQ_HANDLED;
+ 
+-	/* Give PHY some time before MAC starts sending data. This works
+-	 * around an issue where network doesn't come up properly.
+-	 */
+-	if (!(irq_status & INTSRC_LINK_DOWN))
+-		phy_queue_state_machine(phydev, msecs_to_jiffies(100));
+-	else
+-		phy_trigger_machine(phydev);
++	phy_trigger_machine(phydev);
+ 
+ 	return IRQ_HANDLED;
+ }
+diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c
+index d4c93d59bc539..8569a545e0a3f 100644
+--- a/drivers/net/phy/microchip_t1.c
++++ b/drivers/net/phy/microchip_t1.c
+@@ -28,12 +28,16 @@
+ 
+ /* Interrupt Source Register */
+ #define LAN87XX_INTERRUPT_SOURCE                (0x18)
++#define LAN87XX_INTERRUPT_SOURCE_2              (0x08)
+ 
+ /* Interrupt Mask Register */
+ #define LAN87XX_INTERRUPT_MASK                  (0x19)
+ #define LAN87XX_MASK_LINK_UP                    (0x0004)
+ #define LAN87XX_MASK_LINK_DOWN                  (0x0002)
+ 
++#define LAN87XX_INTERRUPT_MASK_2                (0x09)
++#define LAN87XX_MASK_COMM_RDY			BIT(10)
++
+ /* MISC Control 1 Register */
+ #define LAN87XX_CTRL_1                          (0x11)
+ #define LAN87XX_MASK_RGMII_TXC_DLY_EN           (0x4000)
+@@ -424,17 +428,55 @@ static int lan87xx_phy_config_intr(struct phy_device *phydev)
+ 	int rc, val = 0;
+ 
+ 	if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+-		/* unmask all source and clear them before enable */
+-		rc = phy_write(phydev, LAN87XX_INTERRUPT_MASK, 0x7FFF);
++		/* clear all interrupt */
++		rc = phy_write(phydev, LAN87XX_INTERRUPT_MASK, val);
++		if (rc < 0)
++			return rc;
++
+ 		rc = phy_read(phydev, LAN87XX_INTERRUPT_SOURCE);
+-		val = LAN87XX_MASK_LINK_UP | LAN87XX_MASK_LINK_DOWN;
++		if (rc < 0)
++			return rc;
++
++		rc = access_ereg(phydev, PHYACC_ATTR_MODE_WRITE,
++				 PHYACC_ATTR_BANK_MISC,
++				 LAN87XX_INTERRUPT_MASK_2, val);
++		if (rc < 0)
++			return rc;
++
++		rc = access_ereg(phydev, PHYACC_ATTR_MODE_READ,
++				 PHYACC_ATTR_BANK_MISC,
++				 LAN87XX_INTERRUPT_SOURCE_2, 0);
++		if (rc < 0)
++			return rc;
++
++		/* enable link down and comm ready interrupt */
++		val = LAN87XX_MASK_LINK_DOWN;
+ 		rc = phy_write(phydev, LAN87XX_INTERRUPT_MASK, val);
++		if (rc < 0)
++			return rc;
++
++		val = LAN87XX_MASK_COMM_RDY;
++		rc = access_ereg(phydev, PHYACC_ATTR_MODE_WRITE,
++				 PHYACC_ATTR_BANK_MISC,
++				 LAN87XX_INTERRUPT_MASK_2, val);
+ 	} else {
+ 		rc = phy_write(phydev, LAN87XX_INTERRUPT_MASK, val);
+-		if (rc)
++		if (rc < 0)
+ 			return rc;
+ 
+ 		rc = phy_read(phydev, LAN87XX_INTERRUPT_SOURCE);
++		if (rc < 0)
++			return rc;
++
++		rc = access_ereg(phydev, PHYACC_ATTR_MODE_WRITE,
++				 PHYACC_ATTR_BANK_MISC,
++				 LAN87XX_INTERRUPT_MASK_2, val);
++		if (rc < 0)
++			return rc;
++
++		rc = access_ereg(phydev, PHYACC_ATTR_MODE_READ,
++				 PHYACC_ATTR_BANK_MISC,
++				 LAN87XX_INTERRUPT_SOURCE_2, 0);
+ 	}
+ 
+ 	return rc < 0 ? rc : 0;
+@@ -444,6 +486,14 @@ static irqreturn_t lan87xx_handle_interrupt(struct phy_device *phydev)
+ {
+ 	int irq_status;
+ 
++	irq_status  = access_ereg(phydev, PHYACC_ATTR_MODE_READ,
++				  PHYACC_ATTR_BANK_MISC,
++				  LAN87XX_INTERRUPT_SOURCE_2, 0);
++	if (irq_status < 0) {
++		phy_error(phydev);
++		return IRQ_NONE;
++	}
++
+ 	irq_status = phy_read(phydev, LAN87XX_INTERRUPT_SOURCE);
+ 	if (irq_status < 0) {
+ 		phy_error(phydev);
+diff --git a/drivers/net/wireless/intel/iwlegacy/4965-rs.c b/drivers/net/wireless/intel/iwlegacy/4965-rs.c
+index c62f299b9e0a8..d8a5dbf89a021 100644
+--- a/drivers/net/wireless/intel/iwlegacy/4965-rs.c
++++ b/drivers/net/wireless/intel/iwlegacy/4965-rs.c
+@@ -2403,7 +2403,7 @@ il4965_rs_fill_link_cmd(struct il_priv *il, struct il_lq_sta *lq_sta,
+ 		/* Repeat initial/next rate.
+ 		 * For legacy IL_NUMBER_TRY == 1, this loop will not execute.
+ 		 * For HT IL_HT_NUMBER_TRY == 3, this executes twice. */
+-		while (repeat_rate > 0) {
++		while (repeat_rate > 0 && idx < (LINK_QUAL_MAX_RETRY_NUM - 1)) {
+ 			if (is_legacy(tbl_type.lq_type)) {
+ 				if (ant_toggle_cnt < NUM_TRY_BEFORE_ANT_TOGGLE)
+ 					ant_toggle_cnt++;
+@@ -2422,8 +2422,6 @@ il4965_rs_fill_link_cmd(struct il_priv *il, struct il_lq_sta *lq_sta,
+ 			    cpu_to_le32(new_rate);
+ 			repeat_rate--;
+ 			idx++;
+-			if (idx >= LINK_QUAL_MAX_RETRY_NUM)
+-				goto out;
+ 		}
+ 
+ 		il4965_rs_get_tbl_info_from_mcs(new_rate, lq_sta->band,
+@@ -2468,7 +2466,6 @@ il4965_rs_fill_link_cmd(struct il_priv *il, struct il_lq_sta *lq_sta,
+ 		repeat_rate--;
+ 	}
+ 
+-out:
+ 	lq_cmd->agg_params.agg_frame_cnt_limit = LINK_QUAL_AGG_FRAME_LIMIT_DEF;
+ 	lq_cmd->agg_params.agg_dis_start_th = LINK_QUAL_AGG_DISABLE_START_DEF;
+ 
+diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c
+index b0f58bcf70cb0..106c88b723b90 100644
+--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c
+@@ -345,7 +345,7 @@ int mt7921e_mac_reset(struct mt7921_dev *dev)
+ 
+ 	err = mt7921e_driver_own(dev);
+ 	if (err)
+-		return err;
++		goto out;
+ 
+ 	err = mt7921_run_firmware(dev);
+ 	if (err)
+diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.h b/drivers/net/wireless/microchip/wilc1000/netdev.h
+index a067274c20144..bf001e9def6aa 100644
+--- a/drivers/net/wireless/microchip/wilc1000/netdev.h
++++ b/drivers/net/wireless/microchip/wilc1000/netdev.h
+@@ -254,6 +254,7 @@ struct wilc {
+ 	u8 *rx_buffer;
+ 	u32 rx_buffer_offset;
+ 	u8 *tx_buffer;
++	u32 *vmm_table;
+ 
+ 	struct txq_handle txq[NQUEUES];
+ 	int txq_entries;
+diff --git a/drivers/net/wireless/microchip/wilc1000/sdio.c b/drivers/net/wireless/microchip/wilc1000/sdio.c
+index 7962c11cfe848..56f924a31bc66 100644
+--- a/drivers/net/wireless/microchip/wilc1000/sdio.c
++++ b/drivers/net/wireless/microchip/wilc1000/sdio.c
+@@ -27,6 +27,7 @@ struct wilc_sdio {
+ 	bool irq_gpio;
+ 	u32 block_size;
+ 	int has_thrpt_enh3;
++	u8 *cmd53_buf;
+ };
+ 
+ struct sdio_cmd52 {
+@@ -46,6 +47,7 @@ struct sdio_cmd53 {
+ 	u32 count:		9;
+ 	u8 *buffer;
+ 	u32 block_size;
++	bool use_global_buf;
+ };
+ 
+ static const struct wilc_hif_func wilc_hif_sdio;
+@@ -90,6 +92,8 @@ static int wilc_sdio_cmd53(struct wilc *wilc, struct sdio_cmd53 *cmd)
+ {
+ 	struct sdio_func *func = container_of(wilc->dev, struct sdio_func, dev);
+ 	int size, ret;
++	struct wilc_sdio *sdio_priv = wilc->bus_data;
++	u8 *buf = cmd->buffer;
+ 
+ 	sdio_claim_host(func);
+ 
+@@ -100,12 +104,23 @@ static int wilc_sdio_cmd53(struct wilc *wilc, struct sdio_cmd53 *cmd)
+ 	else
+ 		size = cmd->count;
+ 
++	if (cmd->use_global_buf) {
++		if (size > sizeof(u32))
++			return -EINVAL;
++
++		buf = sdio_priv->cmd53_buf;
++	}
++
+ 	if (cmd->read_write) {  /* write */
+-		ret = sdio_memcpy_toio(func, cmd->address,
+-				       (void *)cmd->buffer, size);
++		if (cmd->use_global_buf)
++			memcpy(buf, cmd->buffer, size);
++
++		ret = sdio_memcpy_toio(func, cmd->address, buf, size);
+ 	} else {        /* read */
+-		ret = sdio_memcpy_fromio(func, (void *)cmd->buffer,
+-					 cmd->address,  size);
++		ret = sdio_memcpy_fromio(func, buf, cmd->address, size);
++
++		if (cmd->use_global_buf)
++			memcpy(cmd->buffer, buf, size);
+ 	}
+ 
+ 	sdio_release_host(func);
+@@ -127,6 +142,12 @@ static int wilc_sdio_probe(struct sdio_func *func,
+ 	if (!sdio_priv)
+ 		return -ENOMEM;
+ 
++	sdio_priv->cmd53_buf = kzalloc(sizeof(u32), GFP_KERNEL);
++	if (!sdio_priv->cmd53_buf) {
++		ret = -ENOMEM;
++		goto free;
++	}
++
+ 	ret = wilc_cfg80211_init(&wilc, &func->dev, WILC_HIF_SDIO,
+ 				 &wilc_hif_sdio);
+ 	if (ret)
+@@ -160,6 +181,7 @@ dispose_irq:
+ 	irq_dispose_mapping(wilc->dev_irq_num);
+ 	wilc_netdev_cleanup(wilc);
+ free:
++	kfree(sdio_priv->cmd53_buf);
+ 	kfree(sdio_priv);
+ 	return ret;
+ }
+@@ -171,6 +193,7 @@ static void wilc_sdio_remove(struct sdio_func *func)
+ 
+ 	clk_disable_unprepare(wilc->rtc_clk);
+ 	wilc_netdev_cleanup(wilc);
++	kfree(sdio_priv->cmd53_buf);
+ 	kfree(sdio_priv);
+ }
+ 
+@@ -367,8 +390,9 @@ static int wilc_sdio_write_reg(struct wilc *wilc, u32 addr, u32 data)
+ 		cmd.address = WILC_SDIO_FBR_DATA_REG;
+ 		cmd.block_mode = 0;
+ 		cmd.increment = 1;
+-		cmd.count = 4;
++		cmd.count = sizeof(u32);
+ 		cmd.buffer = (u8 *)&data;
++		cmd.use_global_buf = true;
+ 		cmd.block_size = sdio_priv->block_size;
+ 		ret = wilc_sdio_cmd53(wilc, &cmd);
+ 		if (ret)
+@@ -406,6 +430,7 @@ static int wilc_sdio_write(struct wilc *wilc, u32 addr, u8 *buf, u32 size)
+ 	nblk = size / block_size;
+ 	nleft = size % block_size;
+ 
++	cmd.use_global_buf = false;
+ 	if (nblk > 0) {
+ 		cmd.block_mode = 1;
+ 		cmd.increment = 1;
+@@ -484,8 +509,9 @@ static int wilc_sdio_read_reg(struct wilc *wilc, u32 addr, u32 *data)
+ 		cmd.address = WILC_SDIO_FBR_DATA_REG;
+ 		cmd.block_mode = 0;
+ 		cmd.increment = 1;
+-		cmd.count = 4;
++		cmd.count = sizeof(u32);
+ 		cmd.buffer = (u8 *)data;
++		cmd.use_global_buf = true;
+ 
+ 		cmd.block_size = sdio_priv->block_size;
+ 		ret = wilc_sdio_cmd53(wilc, &cmd);
+@@ -527,6 +553,7 @@ static int wilc_sdio_read(struct wilc *wilc, u32 addr, u8 *buf, u32 size)
+ 	nblk = size / block_size;
+ 	nleft = size % block_size;
+ 
++	cmd.use_global_buf = false;
+ 	if (nblk > 0) {
+ 		cmd.block_mode = 1;
+ 		cmd.increment = 1;
+diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c
+index 48441f0389ca1..0c8a571486d25 100644
+--- a/drivers/net/wireless/microchip/wilc1000/wlan.c
++++ b/drivers/net/wireless/microchip/wilc1000/wlan.c
+@@ -714,7 +714,7 @@ int wilc_wlan_handle_txq(struct wilc *wilc, u32 *txq_count)
+ 	int ret = 0;
+ 	int counter;
+ 	int timeout;
+-	u32 vmm_table[WILC_VMM_TBL_SIZE];
++	u32 *vmm_table = wilc->vmm_table;
+ 	u8 ac_pkt_num_to_chip[NQUEUES] = {0, 0, 0, 0};
+ 	const struct wilc_hif_func *func;
+ 	int srcu_idx;
+@@ -1251,6 +1251,8 @@ void wilc_wlan_cleanup(struct net_device *dev)
+ 	while ((rqe = wilc_wlan_rxq_remove(wilc)))
+ 		kfree(rqe);
+ 
++	kfree(wilc->vmm_table);
++	wilc->vmm_table = NULL;
+ 	kfree(wilc->rx_buffer);
+ 	wilc->rx_buffer = NULL;
+ 	kfree(wilc->tx_buffer);
+@@ -1485,6 +1487,14 @@ int wilc_wlan_init(struct net_device *dev)
+ 		goto fail;
+ 	}
+ 
++	if (!wilc->vmm_table)
++		wilc->vmm_table = kzalloc(WILC_VMM_TBL_SIZE, GFP_KERNEL);
++
++	if (!wilc->vmm_table) {
++		ret = -ENOBUFS;
++		goto fail;
++	}
++
+ 	if (!wilc->tx_buffer)
+ 		wilc->tx_buffer = kmalloc(WILC_TX_BUFF_SIZE, GFP_KERNEL);
+ 
+@@ -1509,7 +1519,8 @@ int wilc_wlan_init(struct net_device *dev)
+ 	return 0;
+ 
+ fail:
+-
++	kfree(wilc->vmm_table);
++	wilc->vmm_table = NULL;
+ 	kfree(wilc->rx_buffer);
+ 	wilc->rx_buffer = NULL;
+ 	kfree(wilc->tx_buffer);
+diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
+index 990360d75cb64..e85b3c5d4acce 100644
+--- a/drivers/net/xen-netback/xenbus.c
++++ b/drivers/net/xen-netback/xenbus.c
+@@ -256,7 +256,6 @@ static void backend_disconnect(struct backend_info *be)
+ 		unsigned int queue_index;
+ 
+ 		xen_unregister_watchers(vif);
+-		xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status");
+ #ifdef CONFIG_DEBUG_FS
+ 		xenvif_debugfs_delif(vif);
+ #endif /* CONFIG_DEBUG_FS */
+@@ -984,6 +983,7 @@ static int netback_remove(struct xenbus_device *dev)
+ 	struct backend_info *be = dev_get_drvdata(&dev->dev);
+ 
+ 	unregister_hotplug_status_watch(be);
++	xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
+ 	if (be->vif) {
+ 		kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+ 		backend_disconnect(be);
+diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
+index 7a9e6ffa23429..daa0e160e1212 100644
+--- a/drivers/nvme/host/tcp.c
++++ b/drivers/nvme/host/tcp.c
+@@ -121,7 +121,6 @@ struct nvme_tcp_queue {
+ 	struct mutex		send_mutex;
+ 	struct llist_head	req_list;
+ 	struct list_head	send_list;
+-	bool			more_requests;
+ 
+ 	/* recv state */
+ 	void			*pdu;
+@@ -318,7 +317,7 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue)
+ static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
+ {
+ 	return !list_empty(&queue->send_list) ||
+-		!llist_empty(&queue->req_list) || queue->more_requests;
++		!llist_empty(&queue->req_list);
+ }
+ 
+ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
+@@ -337,9 +336,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
+ 	 */
+ 	if (queue->io_cpu == raw_smp_processor_id() &&
+ 	    sync && empty && mutex_trylock(&queue->send_mutex)) {
+-		queue->more_requests = !last;
+ 		nvme_tcp_send_all(queue);
+-		queue->more_requests = false;
+ 		mutex_unlock(&queue->send_mutex);
+ 	}
+ 
+@@ -1227,7 +1224,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
+ 		else if (unlikely(result < 0))
+ 			return;
+ 
+-		if (!pending)
++		if (!pending || !queue->rd_enabled)
+ 			return;
+ 
+ 	} while (!time_after(jiffies, deadline)); /* quota is exhausted */
+diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
+index c27660a660d9a..a339719100051 100644
+--- a/drivers/nvme/target/core.c
++++ b/drivers/nvme/target/core.c
+@@ -735,6 +735,8 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status)
+ 
+ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
+ {
++	struct nvmet_ns *ns = req->ns;
++
+ 	if (!req->sq->sqhd_disabled)
+ 		nvmet_update_sq_head(req);
+ 	req->cqe->sq_id = cpu_to_le16(req->sq->qid);
+@@ -745,9 +747,9 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
+ 
+ 	trace_nvmet_req_complete(req);
+ 
+-	if (req->ns)
+-		nvmet_put_namespace(req->ns);
+ 	req->ops->queue_response(req);
++	if (ns)
++		nvmet_put_namespace(ns);
+ }
+ 
+ void nvmet_req_complete(struct nvmet_req *req, u16 status)
+diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c
+index 82b61acf7a72b..1956be87ac5ff 100644
+--- a/drivers/nvme/target/zns.c
++++ b/drivers/nvme/target/zns.c
+@@ -100,6 +100,7 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
+ 	struct nvme_id_ns_zns *id_zns;
+ 	u64 zsze;
+ 	u16 status;
++	u32 mar, mor;
+ 
+ 	if (le32_to_cpu(req->cmd->identify.nsid) == NVME_NSID_ALL) {
+ 		req->error_loc = offsetof(struct nvme_identify, nsid);
+@@ -130,8 +131,20 @@ void nvmet_execute_identify_cns_cs_ns(struct nvmet_req *req)
+ 	zsze = (bdev_zone_sectors(req->ns->bdev) << 9) >>
+ 					req->ns->blksize_shift;
+ 	id_zns->lbafe[0].zsze = cpu_to_le64(zsze);
+-	id_zns->mor = cpu_to_le32(bdev_max_open_zones(req->ns->bdev));
+-	id_zns->mar = cpu_to_le32(bdev_max_active_zones(req->ns->bdev));
++
++	mor = bdev_max_open_zones(req->ns->bdev);
++	if (!mor)
++		mor = U32_MAX;
++	else
++		mor--;
++	id_zns->mor = cpu_to_le32(mor);
++
++	mar = bdev_max_active_zones(req->ns->bdev);
++	if (!mar)
++		mar = U32_MAX;
++	else
++		mar--;
++	id_zns->mar = cpu_to_le32(mar);
+ 
+ done:
+ 	status = nvmet_copy_to_sgl(req, 0, id_zns, sizeof(*id_zns));
+diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
+index 9be007c9420f9..f69ab90b5e22d 100644
+--- a/drivers/parisc/ccio-dma.c
++++ b/drivers/parisc/ccio-dma.c
+@@ -1380,15 +1380,17 @@ ccio_init_resource(struct resource *res, char *name, void __iomem *ioaddr)
+ 	}
+ }
+ 
+-static void __init ccio_init_resources(struct ioc *ioc)
++static int __init ccio_init_resources(struct ioc *ioc)
+ {
+ 	struct resource *res = ioc->mmio_region;
+ 	char *name = kmalloc(14, GFP_KERNEL);
+-
++	if (unlikely(!name))
++		return -ENOMEM;
+ 	snprintf(name, 14, "GSC Bus [%d/]", ioc->hw_path);
+ 
+ 	ccio_init_resource(res, name, &ioc->ioc_regs->io_io_low);
+ 	ccio_init_resource(res + 1, name, &ioc->ioc_regs->io_io_low_hv);
++	return 0;
+ }
+ 
+ static int new_ioc_area(struct resource *res, unsigned long size,
+@@ -1543,7 +1545,10 @@ static int __init ccio_probe(struct parisc_device *dev)
+ 		return -ENOMEM;
+ 	}
+ 	ccio_ioc_init(ioc);
+-	ccio_init_resources(ioc);
++	if (ccio_init_resources(ioc)) {
++		kfree(ioc);
++		return -ENOMEM;
++	}
+ 	hppa_dma_ops = &ccio_ops;
+ 
+ 	hba = kzalloc(sizeof(*hba), GFP_KERNEL);
+diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
+index 231d86d3949c0..1ec5baa673f92 100644
+--- a/drivers/perf/riscv_pmu_sbi.c
++++ b/drivers/perf/riscv_pmu_sbi.c
+@@ -467,7 +467,7 @@ static int pmu_sbi_get_ctrinfo(int nctr)
+ 	if (!pmu_ctr_list)
+ 		return -ENOMEM;
+ 
+-	for (i = 0; i <= nctr; i++) {
++	for (i = 0; i < nctr; i++) {
+ 		ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0);
+ 		if (ret.error)
+ 			/* The logical counter ids are not expected to be contiguous */
+diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
+index 1e54a833f2cf0..a9daaf4d5aaab 100644
+--- a/drivers/regulator/core.c
++++ b/drivers/regulator/core.c
+@@ -2732,13 +2732,18 @@ static int _regulator_do_enable(struct regulator_dev *rdev)
+  */
+ static int _regulator_handle_consumer_enable(struct regulator *regulator)
+ {
++	int ret;
+ 	struct regulator_dev *rdev = regulator->rdev;
+ 
+ 	lockdep_assert_held_once(&rdev->mutex.base);
+ 
+ 	regulator->enable_count++;
+-	if (regulator->uA_load && regulator->enable_count == 1)
+-		return drms_uA_update(rdev);
++	if (regulator->uA_load && regulator->enable_count == 1) {
++		ret = drms_uA_update(rdev);
++		if (ret)
++			regulator->enable_count--;
++		return ret;
++	}
+ 
+ 	return 0;
+ }
+diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
+index 750dd1e9f2cc7..2ddc431cbd337 100644
+--- a/drivers/scsi/lpfc/lpfc_init.c
++++ b/drivers/scsi/lpfc/lpfc_init.c
+@@ -8061,7 +8061,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
+ 	/* Allocate device driver memory */
+ 	rc = lpfc_mem_alloc(phba, SGL_ALIGN_SZ);
+ 	if (rc)
+-		return -ENOMEM;
++		goto out_destroy_workqueue;
+ 
+ 	/* IF Type 2 ports get initialized now. */
+ 	if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >=
+@@ -8489,6 +8489,9 @@ out_free_bsmbx:
+ 	lpfc_destroy_bootstrap_mbox(phba);
+ out_free_mem:
+ 	lpfc_mem_free(phba);
++out_destroy_workqueue:
++	destroy_workqueue(phba->wq);
++	phba->wq = NULL;
+ 	return rc;
+ }
+ 
+diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
+index 5b5885d9732b6..3e9b2b0099c7a 100644
+--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
++++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
+@@ -5311,7 +5311,6 @@ megasas_alloc_fusion_context(struct megasas_instance *instance)
+ 		if (!fusion->log_to_span) {
+ 			dev_err(&instance->pdev->dev, "Failed from %s %d\n",
+ 				__func__, __LINE__);
+-			kfree(instance->ctrl_context);
+ 			return -ENOMEM;
+ 		}
+ 	}
+diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+index 5e8887fa02c8a..e3b7ebf464244 100644
+--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
++++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+@@ -3670,6 +3670,7 @@ static struct fw_event_work *dequeue_next_fw_event(struct MPT3SAS_ADAPTER *ioc)
+ 		fw_event = list_first_entry(&ioc->fw_event_list,
+ 				struct fw_event_work, list);
+ 		list_del_init(&fw_event->list);
++		fw_event_work_put(fw_event);
+ 	}
+ 	spin_unlock_irqrestore(&ioc->fw_event_lock, flags);
+ 
+@@ -3751,7 +3752,6 @@ _scsih_fw_event_cleanup_queue(struct MPT3SAS_ADAPTER *ioc)
+ 		if (cancel_work_sync(&fw_event->work))
+ 			fw_event_work_put(fw_event);
+ 
+-		fw_event_work_put(fw_event);
+ 	}
+ 	ioc->fw_events_cleanup = 0;
+ }
+diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
+index 2b2f682883752..62666df1a59eb 100644
+--- a/drivers/scsi/qla2xxx/qla_target.c
++++ b/drivers/scsi/qla2xxx/qla_target.c
+@@ -6935,14 +6935,8 @@ qlt_24xx_config_rings(struct scsi_qla_host *vha)
+ 
+ 	if (ha->flags.msix_enabled) {
+ 		if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
+-			if (IS_QLA2071(ha)) {
+-				/* 4 ports Baker: Enable Interrupt Handshake */
+-				icb->msix_atio = 0;
+-				icb->firmware_options_2 |= cpu_to_le32(BIT_26);
+-			} else {
+-				icb->msix_atio = cpu_to_le16(msix->entry);
+-				icb->firmware_options_2 &= cpu_to_le32(~BIT_26);
+-			}
++			icb->msix_atio = cpu_to_le16(msix->entry);
++			icb->firmware_options_2 &= cpu_to_le32(~BIT_26);
+ 			ql_dbg(ql_dbg_init, vha, 0xf072,
+ 			    "Registering ICB vector 0x%x for atio que.\n",
+ 			    msix->entry);
+diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
+index 78edb1ea4748d..f5c876d03c1ad 100644
+--- a/drivers/scsi/scsi_lib.c
++++ b/drivers/scsi/scsi_lib.c
+@@ -118,7 +118,7 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
+ 	}
+ }
+ 
+-static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
++static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd, unsigned long msecs)
+ {
+ 	struct request *rq = scsi_cmd_to_rq(cmd);
+ 
+@@ -128,7 +128,12 @@ static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
+ 	} else {
+ 		WARN_ON_ONCE(true);
+ 	}
+-	blk_mq_requeue_request(rq, true);
++
++	if (msecs) {
++		blk_mq_requeue_request(rq, false);
++		blk_mq_delay_kick_requeue_list(rq->q, msecs);
++	} else
++		blk_mq_requeue_request(rq, true);
+ }
+ 
+ /**
+@@ -658,14 +663,6 @@ static unsigned int scsi_rq_err_bytes(const struct request *rq)
+ 	return bytes;
+ }
+ 
+-/* Helper for scsi_io_completion() when "reprep" action required. */
+-static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
+-				      struct request_queue *q)
+-{
+-	/* A new command will be prepared and issued. */
+-	scsi_mq_requeue_cmd(cmd);
+-}
+-
+ static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
+ {
+ 	struct request *req = scsi_cmd_to_rq(cmd);
+@@ -683,14 +680,21 @@ static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
+ 	return false;
+ }
+ 
++/*
++ * When ALUA transition state is returned, reprep the cmd to
++ * use the ALUA handler's transition timeout. Delay the reprep
++ * 1 sec to avoid aggressive retries of the target in that
++ * state.
++ */
++#define ALUA_TRANSITION_REPREP_DELAY	1000
++
+ /* Helper for scsi_io_completion() when special action required. */
+ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
+ {
+-	struct request_queue *q = cmd->device->request_queue;
+ 	struct request *req = scsi_cmd_to_rq(cmd);
+ 	int level = 0;
+-	enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
+-	      ACTION_DELAYED_RETRY} action;
++	enum {ACTION_FAIL, ACTION_REPREP, ACTION_DELAYED_REPREP,
++	      ACTION_RETRY, ACTION_DELAYED_RETRY} action;
+ 	struct scsi_sense_hdr sshdr;
+ 	bool sense_valid;
+ 	bool sense_current = true;      /* false implies "deferred sense" */
+@@ -779,8 +783,8 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
+ 					action = ACTION_DELAYED_RETRY;
+ 					break;
+ 				case 0x0a: /* ALUA state transition */
+-					blk_stat = BLK_STS_TRANSPORT;
+-					fallthrough;
++					action = ACTION_DELAYED_REPREP;
++					break;
+ 				default:
+ 					action = ACTION_FAIL;
+ 					break;
+@@ -839,7 +843,10 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
+ 			return;
+ 		fallthrough;
+ 	case ACTION_REPREP:
+-		scsi_io_completion_reprep(cmd, q);
++		scsi_mq_requeue_cmd(cmd, 0);
++		break;
++	case ACTION_DELAYED_REPREP:
++		scsi_mq_requeue_cmd(cmd, ALUA_TRANSITION_REPREP_DELAY);
+ 		break;
+ 	case ACTION_RETRY:
+ 		/* Retry the same command immediately */
+@@ -933,7 +940,7 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result,
+  * command block will be released and the queue function will be goosed. If we
+  * are not done then we have to figure out what to do next:
+  *
+- *   a) We can call scsi_io_completion_reprep().  The request will be
++ *   a) We can call scsi_mq_requeue_cmd().  The request will be
+  *	unprepared and put back on the queue.  Then a new command will
+  *	be created for it.  This should be used if we made forward
+  *	progress, or if we want to switch from READ(10) to READ(6) for
+@@ -949,7 +956,6 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result,
+ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
+ {
+ 	int result = cmd->result;
+-	struct request_queue *q = cmd->device->request_queue;
+ 	struct request *req = scsi_cmd_to_rq(cmd);
+ 	blk_status_t blk_stat = BLK_STS_OK;
+ 
+@@ -986,7 +992,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
+ 	 * request just queue the command up again.
+ 	 */
+ 	if (likely(result == 0))
+-		scsi_io_completion_reprep(cmd, q);
++		scsi_mq_requeue_cmd(cmd, 0);
+ 	else
+ 		scsi_io_completion_action(cmd, result);
+ }
+diff --git a/drivers/soc/bcm/brcmstb/pm/pm-arm.c b/drivers/soc/bcm/brcmstb/pm/pm-arm.c
+index 70ad0f3dce283..286f5d57c0cab 100644
+--- a/drivers/soc/bcm/brcmstb/pm/pm-arm.c
++++ b/drivers/soc/bcm/brcmstb/pm/pm-arm.c
+@@ -684,13 +684,14 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+ 	const struct of_device_id *of_id = NULL;
+ 	struct device_node *dn;
+ 	void __iomem *base;
+-	int ret, i;
++	int ret, i, s;
+ 
+ 	/* AON ctrl registers */
+ 	base = brcmstb_ioremap_match(aon_ctrl_dt_ids, 0, NULL);
+ 	if (IS_ERR(base)) {
+ 		pr_err("error mapping AON_CTRL\n");
+-		return PTR_ERR(base);
++		ret = PTR_ERR(base);
++		goto aon_err;
+ 	}
+ 	ctrl.aon_ctrl_base = base;
+ 
+@@ -700,8 +701,10 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+ 		/* Assume standard offset */
+ 		ctrl.aon_sram = ctrl.aon_ctrl_base +
+ 				     AON_CTRL_SYSTEM_DATA_RAM_OFS;
++		s = 0;
+ 	} else {
+ 		ctrl.aon_sram = base;
++		s = 1;
+ 	}
+ 
+ 	writel_relaxed(0, ctrl.aon_sram + AON_REG_PANIC);
+@@ -711,7 +714,8 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+ 				     (const void **)&ddr_phy_data);
+ 	if (IS_ERR(base)) {
+ 		pr_err("error mapping DDR PHY\n");
+-		return PTR_ERR(base);
++		ret = PTR_ERR(base);
++		goto ddr_phy_err;
+ 	}
+ 	ctrl.support_warm_boot = ddr_phy_data->supports_warm_boot;
+ 	ctrl.pll_status_offset = ddr_phy_data->pll_status_offset;
+@@ -731,17 +735,20 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+ 	for_each_matching_node(dn, ddr_shimphy_dt_ids) {
+ 		i = ctrl.num_memc;
+ 		if (i >= MAX_NUM_MEMC) {
++			of_node_put(dn);
+ 			pr_warn("too many MEMCs (max %d)\n", MAX_NUM_MEMC);
+ 			break;
+ 		}
+ 
+ 		base = of_io_request_and_map(dn, 0, dn->full_name);
+ 		if (IS_ERR(base)) {
++			of_node_put(dn);
+ 			if (!ctrl.support_warm_boot)
+ 				break;
+ 
+ 			pr_err("error mapping DDR SHIMPHY %d\n", i);
+-			return PTR_ERR(base);
++			ret = PTR_ERR(base);
++			goto ddr_shimphy_err;
+ 		}
+ 		ctrl.memcs[i].ddr_shimphy_base = base;
+ 		ctrl.num_memc++;
+@@ -752,14 +759,18 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+ 	for_each_matching_node(dn, brcmstb_memc_of_match) {
+ 		base = of_iomap(dn, 0);
+ 		if (!base) {
++			of_node_put(dn);
+ 			pr_err("error mapping DDR Sequencer %d\n", i);
+-			return -ENOMEM;
++			ret = -ENOMEM;
++			goto brcmstb_memc_err;
+ 		}
+ 
+ 		of_id = of_match_node(brcmstb_memc_of_match, dn);
+ 		if (!of_id) {
+ 			iounmap(base);
+-			return -EINVAL;
++			of_node_put(dn);
++			ret = -EINVAL;
++			goto brcmstb_memc_err;
+ 		}
+ 
+ 		ddr_seq_data = of_id->data;
+@@ -779,21 +790,24 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+ 	dn = of_find_matching_node(NULL, sram_dt_ids);
+ 	if (!dn) {
+ 		pr_err("SRAM not found\n");
+-		return -EINVAL;
++		ret = -EINVAL;
++		goto brcmstb_memc_err;
+ 	}
+ 
+ 	ret = brcmstb_init_sram(dn);
+ 	of_node_put(dn);
+ 	if (ret) {
+ 		pr_err("error setting up SRAM for PM\n");
+-		return ret;
++		goto brcmstb_memc_err;
+ 	}
+ 
+ 	ctrl.pdev = pdev;
+ 
+ 	ctrl.s3_params = kmalloc(sizeof(*ctrl.s3_params), GFP_KERNEL);
+-	if (!ctrl.s3_params)
+-		return -ENOMEM;
++	if (!ctrl.s3_params) {
++		ret = -ENOMEM;
++		goto s3_params_err;
++	}
+ 	ctrl.s3_params_pa = dma_map_single(&pdev->dev, ctrl.s3_params,
+ 					   sizeof(*ctrl.s3_params),
+ 					   DMA_TO_DEVICE);
+@@ -813,7 +827,21 @@ static int brcmstb_pm_probe(struct platform_device *pdev)
+ 
+ out:
+ 	kfree(ctrl.s3_params);
+-
++s3_params_err:
++	iounmap(ctrl.boot_sram);
++brcmstb_memc_err:
++	for (i--; i >= 0; i--)
++		iounmap(ctrl.memcs[i].ddr_ctrl);
++ddr_shimphy_err:
++	for (i = 0; i < ctrl.num_memc; i++)
++		iounmap(ctrl.memcs[i].ddr_shimphy_base);
++
++	iounmap(ctrl.memcs[0].ddr_phy_base);
++ddr_phy_err:
++	iounmap(ctrl.aon_ctrl_base);
++	if (s)
++		iounmap(ctrl.aon_sram);
++aon_err:
+ 	pr_warn("PM: initialization failed with code %d\n", ret);
+ 
+ 	return ret;
+diff --git a/drivers/soc/fsl/Kconfig b/drivers/soc/fsl/Kconfig
+index 07d52cafbb313..fcec6ed83d5e2 100644
+--- a/drivers/soc/fsl/Kconfig
++++ b/drivers/soc/fsl/Kconfig
+@@ -24,6 +24,7 @@ config FSL_MC_DPIO
+         tristate "QorIQ DPAA2 DPIO driver"
+         depends on FSL_MC_BUS
+         select SOC_BUS
++        select FSL_GUTS
+         select DIMLIB
+         help
+ 	  Driver for the DPAA2 DPIO object.  A DPIO provides queue and
+diff --git a/drivers/soc/imx/gpcv2.c b/drivers/soc/imx/gpcv2.c
+index 85aa86e1338af..5a3809f6a698f 100644
+--- a/drivers/soc/imx/gpcv2.c
++++ b/drivers/soc/imx/gpcv2.c
+@@ -333,6 +333,8 @@ static int imx_pgc_power_up(struct generic_pm_domain *genpd)
+ 		}
+ 	}
+ 
++	reset_control_assert(domain->reset);
++
+ 	/* Enable reset clocks for all devices in the domain */
+ 	ret = clk_bulk_prepare_enable(domain->num_clks, domain->clks);
+ 	if (ret) {
+@@ -340,7 +342,8 @@ static int imx_pgc_power_up(struct generic_pm_domain *genpd)
+ 		goto out_regulator_disable;
+ 	}
+ 
+-	reset_control_assert(domain->reset);
++	/* delays for reset to propagate */
++	udelay(5);
+ 
+ 	if (domain->bits.pxx) {
+ 		/* request the domain to power up */
+diff --git a/drivers/soc/imx/imx8m-blk-ctrl.c b/drivers/soc/imx/imx8m-blk-ctrl.c
+index 7ebc28709e945..2782a7e0a8719 100644
+--- a/drivers/soc/imx/imx8m-blk-ctrl.c
++++ b/drivers/soc/imx/imx8m-blk-ctrl.c
+@@ -242,7 +242,6 @@ static int imx8m_blk_ctrl_probe(struct platform_device *pdev)
+ 			ret = PTR_ERR(domain->power_dev);
+ 			goto cleanup_pds;
+ 		}
+-		dev_set_name(domain->power_dev, "%s", data->name);
+ 
+ 		domain->genpd.name = data->name;
+ 		domain->genpd.power_on = imx8m_blk_ctrl_power_on;
+diff --git a/drivers/spi/spi-bitbang-txrx.h b/drivers/spi/spi-bitbang-txrx.h
+index 267342dfa7388..2dcbe166df63e 100644
+--- a/drivers/spi/spi-bitbang-txrx.h
++++ b/drivers/spi/spi-bitbang-txrx.h
+@@ -116,6 +116,7 @@ bitbang_txrx_le_cpha0(struct spi_device *spi,
+ {
+ 	/* if (cpol == 0) this is SPI_MODE_0; else this is SPI_MODE_2 */
+ 
++	u8 rxbit = bits - 1;
+ 	u32 oldbit = !(word & 1);
+ 	/* clock starts at inactive polarity */
+ 	for (; likely(bits); bits--) {
+@@ -135,7 +136,7 @@ bitbang_txrx_le_cpha0(struct spi_device *spi,
+ 		/* sample LSB (from slave) on leading edge */
+ 		word >>= 1;
+ 		if ((flags & SPI_MASTER_NO_RX) == 0)
+-			word |= getmiso(spi) << (bits - 1);
++			word |= getmiso(spi) << rxbit;
+ 		setsck(spi, cpol);
+ 	}
+ 	return word;
+@@ -148,6 +149,7 @@ bitbang_txrx_le_cpha1(struct spi_device *spi,
+ {
+ 	/* if (cpol == 0) this is SPI_MODE_1; else this is SPI_MODE_3 */
+ 
++	u8 rxbit = bits - 1;
+ 	u32 oldbit = !(word & 1);
+ 	/* clock starts at inactive polarity */
+ 	for (; likely(bits); bits--) {
+@@ -168,7 +170,7 @@ bitbang_txrx_le_cpha1(struct spi_device *spi,
+ 		/* sample LSB (from slave) on trailing edge */
+ 		word >>= 1;
+ 		if ((flags & SPI_MASTER_NO_RX) == 0)
+-			word |= getmiso(spi) << (bits - 1);
++			word |= getmiso(spi) << rxbit;
+ 	}
+ 	return word;
+ }
+diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c
+index 1175f3a46859f..27295bda3e0bd 100644
+--- a/drivers/tee/tee_shm.c
++++ b/drivers/tee/tee_shm.c
+@@ -9,6 +9,7 @@
+ #include <linux/sched.h>
+ #include <linux/slab.h>
+ #include <linux/tee_drv.h>
++#include <linux/uaccess.h>
+ #include <linux/uio.h>
+ #include "tee_private.h"
+ 
+diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+index 80d4e0676083a..365489bf4b8c1 100644
+--- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
++++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
+@@ -527,7 +527,7 @@ static void int3400_setup_gddv(struct int3400_thermal_priv *priv)
+ 	priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer,
+ 				   obj->package.elements[0].buffer.length,
+ 				   GFP_KERNEL);
+-	if (!priv->data_vault)
++	if (ZERO_OR_NULL_PTR(priv->data_vault))
+ 		goto out_free;
+ 
+ 	bin_attr_data_vault.private = priv->data_vault;
+@@ -597,7 +597,7 @@ static int int3400_thermal_probe(struct platform_device *pdev)
+ 			goto free_imok;
+ 	}
+ 
+-	if (priv->data_vault) {
++	if (!ZERO_OR_NULL_PTR(priv->data_vault)) {
+ 		result = sysfs_create_group(&pdev->dev.kobj,
+ 					    &data_attribute_group);
+ 		if (result)
+@@ -615,7 +615,8 @@ static int int3400_thermal_probe(struct platform_device *pdev)
+ free_sysfs:
+ 	cleanup_odvp(priv);
+ 	if (priv->data_vault) {
+-		sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group);
++		if (!ZERO_OR_NULL_PTR(priv->data_vault))
++			sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group);
+ 		kfree(priv->data_vault);
+ 	}
+ free_uuid:
+@@ -647,7 +648,7 @@ static int int3400_thermal_remove(struct platform_device *pdev)
+ 	if (!priv->rel_misc_dev_res)
+ 		acpi_thermal_rel_misc_device_remove(priv->adev->handle);
+ 
+-	if (priv->data_vault)
++	if (!ZERO_OR_NULL_PTR(priv->data_vault))
+ 		sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group);
+ 	sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group);
+ 	sysfs_remove_group(&pdev->dev.kobj, &imok_attribute_group);
+diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
+index a51ca56a0ebe7..829da9cb14a86 100644
+--- a/drivers/ufs/core/ufshcd.c
++++ b/drivers/ufs/core/ufshcd.c
+@@ -8723,6 +8723,8 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
+ 	struct scsi_device *sdp;
+ 	unsigned long flags;
+ 	int ret, retries;
++	unsigned long deadline;
++	int32_t remaining;
+ 
+ 	spin_lock_irqsave(hba->host->host_lock, flags);
+ 	sdp = hba->ufs_device_wlun;
+@@ -8755,9 +8757,14 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
+ 	 * callbacks hence set the RQF_PM flag so that it doesn't resume the
+ 	 * already suspended childs.
+ 	 */
++	deadline = jiffies + 10 * HZ;
+ 	for (retries = 3; retries > 0; --retries) {
++		ret = -ETIMEDOUT;
++		remaining = deadline - jiffies;
++		if (remaining <= 0)
++			break;
+ 		ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
+-				START_STOP_TIMEOUT, 0, 0, RQF_PM, NULL);
++				   remaining / HZ, 0, 0, RQF_PM, NULL);
+ 		if (!scsi_status_is_check_condition(ret) ||
+ 				!scsi_sense_valid(&sshdr) ||
+ 				sshdr.sense_key != UNIT_ATTENTION)
+diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
+index c13b9290e3575..d0057d18d2f4a 100644
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -557,6 +557,18 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
+ 	ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM,
+ 				    pages, NULL, NULL);
+ 	if (ret > 0) {
++		int i;
++
++		/*
++		 * The zero page is always resident, we don't need to pin it
++		 * and it falls into our invalid/reserved test so we don't
++		 * unpin in put_pfn().  Unpin all zero pages in the batch here.
++		 */
++		for (i = 0 ; i < ret; i++) {
++			if (unlikely(is_zero_pfn(page_to_pfn(pages[i]))))
++				unpin_user_page(pages[i]);
++		}
++
+ 		*pfn = page_to_pfn(pages[0]);
+ 		goto done;
+ 	}
+diff --git a/drivers/video/fbdev/chipsfb.c b/drivers/video/fbdev/chipsfb.c
+index 393894af26f84..2b00a9d554fc0 100644
+--- a/drivers/video/fbdev/chipsfb.c
++++ b/drivers/video/fbdev/chipsfb.c
+@@ -430,6 +430,7 @@ static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent)
+  err_release_fb:
+ 	framebuffer_release(p);
+  err_disable:
++	pci_disable_device(dp);
+  err_out:
+ 	return rc;
+ }
+diff --git a/drivers/video/fbdev/core/fbsysfs.c b/drivers/video/fbdev/core/fbsysfs.c
+index c2a60b187467e..4d7f63892dcc4 100644
+--- a/drivers/video/fbdev/core/fbsysfs.c
++++ b/drivers/video/fbdev/core/fbsysfs.c
+@@ -84,6 +84,10 @@ void framebuffer_release(struct fb_info *info)
+ 	if (WARN_ON(refcount_read(&info->count)))
+ 		return;
+ 
++#if IS_ENABLED(CONFIG_FB_BACKLIGHT)
++	mutex_destroy(&info->bl_curve_mutex);
++#endif
++
+ 	kfree(info->apertures);
+ 	kfree(info);
+ }
+diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c
+index 292fcb0a24fc9..6ff237cee7f87 100644
+--- a/drivers/video/fbdev/omap/omapfb_main.c
++++ b/drivers/video/fbdev/omap/omapfb_main.c
+@@ -1643,14 +1643,14 @@ static int omapfb_do_probe(struct platform_device *pdev,
+ 		goto cleanup;
+ 	}
+ 	fbdev->int_irq = platform_get_irq(pdev, 0);
+-	if (!fbdev->int_irq) {
++	if (fbdev->int_irq < 0) {
+ 		dev_err(&pdev->dev, "unable to get irq\n");
+ 		r = ENXIO;
+ 		goto cleanup;
+ 	}
+ 
+ 	fbdev->ext_irq = platform_get_irq(pdev, 1);
+-	if (!fbdev->ext_irq) {
++	if (fbdev->ext_irq < 0) {
+ 		dev_err(&pdev->dev, "unable to get irq\n");
+ 		r = ENXIO;
+ 		goto cleanup;
+diff --git a/fs/afs/flock.c b/fs/afs/flock.c
+index c4210a3964d8b..bbcc5afd15760 100644
+--- a/fs/afs/flock.c
++++ b/fs/afs/flock.c
+@@ -76,7 +76,7 @@ void afs_lock_op_done(struct afs_call *call)
+ 	if (call->error == 0) {
+ 		spin_lock(&vnode->lock);
+ 		trace_afs_flock_ev(vnode, NULL, afs_flock_timestamp, 0);
+-		vnode->locked_at = call->reply_time;
++		vnode->locked_at = call->issue_time;
+ 		afs_schedule_lock_extension(vnode);
+ 		spin_unlock(&vnode->lock);
+ 	}
+diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
+index 4943413d9c5f7..7d37f63ef0f09 100644
+--- a/fs/afs/fsclient.c
++++ b/fs/afs/fsclient.c
+@@ -131,7 +131,7 @@ bad:
+ 
+ static time64_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
+ {
+-	return ktime_divns(call->reply_time, NSEC_PER_SEC) + expiry;
++	return ktime_divns(call->issue_time, NSEC_PER_SEC) + expiry;
+ }
+ 
+ static void xdr_decode_AFSCallBack(const __be32 **_bp,
+diff --git a/fs/afs/internal.h b/fs/afs/internal.h
+index a6f25d9e75b52..28bdd0387e5ea 100644
+--- a/fs/afs/internal.h
++++ b/fs/afs/internal.h
+@@ -137,7 +137,6 @@ struct afs_call {
+ 	bool			need_attention;	/* T if RxRPC poked us */
+ 	bool			async;		/* T if asynchronous */
+ 	bool			upgrade;	/* T to request service upgrade */
+-	bool			have_reply_time; /* T if have got reply_time */
+ 	bool			intr;		/* T if interruptible */
+ 	bool			unmarshalling_error; /* T if an unmarshalling error occurred */
+ 	u16			service_id;	/* Actual service ID (after upgrade) */
+@@ -151,7 +150,7 @@ struct afs_call {
+ 		} __attribute__((packed));
+ 		__be64		tmp64;
+ 	};
+-	ktime_t			reply_time;	/* Time of first reply packet */
++	ktime_t			issue_time;	/* Time of issue of operation */
+ };
+ 
+ struct afs_call_type {
+diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
+index a5434f3e57c68..e3de7fea36435 100644
+--- a/fs/afs/rxrpc.c
++++ b/fs/afs/rxrpc.c
+@@ -347,6 +347,7 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
+ 	if (call->max_lifespan)
+ 		rxrpc_kernel_set_max_life(call->net->socket, rxcall,
+ 					  call->max_lifespan);
++	call->issue_time = ktime_get_real();
+ 
+ 	/* send the request */
+ 	iov[0].iov_base	= call->request;
+@@ -497,12 +498,6 @@ static void afs_deliver_to_call(struct afs_call *call)
+ 			return;
+ 		}
+ 
+-		if (!call->have_reply_time &&
+-		    rxrpc_kernel_get_reply_time(call->net->socket,
+-						call->rxcall,
+-						&call->reply_time))
+-			call->have_reply_time = true;
+-
+ 		ret = call->type->deliver(call);
+ 		state = READ_ONCE(call->state);
+ 		if (ret == 0 && call->unmarshalling_error)
+diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
+index fdc7d675b4b0c..11571cca86c19 100644
+--- a/fs/afs/yfsclient.c
++++ b/fs/afs/yfsclient.c
+@@ -232,8 +232,7 @@ static void xdr_decode_YFSCallBack(const __be32 **_bp,
+ 	struct afs_callback *cb = &scb->callback;
+ 	ktime_t cb_expiry;
+ 
+-	cb_expiry = call->reply_time;
+-	cb_expiry = ktime_add(cb_expiry, xdr_to_u64(x->expiration_time) * 100);
++	cb_expiry = ktime_add(call->issue_time, xdr_to_u64(x->expiration_time) * 100);
+ 	cb->expires_at	= ktime_divns(cb_expiry, NSEC_PER_SEC);
+ 	scb->have_cb	= true;
+ 	*_bp += xdr_size(x);
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 4d8acd7e63eb5..1bbc810574f22 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1065,8 +1065,6 @@ struct btrfs_fs_info {
+ 
+ 	spinlock_t zone_active_bgs_lock;
+ 	struct list_head zone_active_bgs;
+-	/* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */
+-	wait_queue_head_t zone_finish_wait;
+ 
+ #ifdef CONFIG_BTRFS_FS_REF_VERIFY
+ 	spinlock_t ref_verify_lock;
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index a2505cfc6bc10..781952c5a5c23 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3173,7 +3173,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
+ 	init_waitqueue_head(&fs_info->transaction_blocked_wait);
+ 	init_waitqueue_head(&fs_info->async_submit_wait);
+ 	init_waitqueue_head(&fs_info->delayed_iputs_wait);
+-	init_waitqueue_head(&fs_info->zone_finish_wait);
+ 
+ 	/* Usable values until the real ones are cached from the superblock */
+ 	fs_info->nodesize = 4096;
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 61496ecb1e201..f79f8d7cffcf2 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -1643,10 +1643,9 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
+ 			done_offset = end;
+ 
+ 		if (done_offset == start) {
+-			struct btrfs_fs_info *info = inode->root->fs_info;
+-
+-			wait_var_event(&info->zone_finish_wait,
+-				       !test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags));
++			wait_on_bit_io(&inode->root->fs_info->flags,
++				       BTRFS_FS_NEED_ZONE_FINISH,
++				       TASK_UNINTERRUPTIBLE);
+ 			continue;
+ 		}
+ 
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index b0c5b4738b1f7..17623e6410c5d 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -199,7 +199,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
+ 	ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
+ 
+ 	if (flags & BTRFS_BLOCK_GROUP_DATA)
+-		return SZ_1G;
++		return BTRFS_MAX_DATA_CHUNK_SIZE;
+ 	else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
+ 		return SZ_32M;
+ 
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index 3460fd6743807..16e01fbdcec83 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -5266,6 +5266,9 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
+ 				       ctl->stripe_size);
+ 	}
+ 
++	/* Stripe size should not go beyond 1G. */
++	ctl->stripe_size = min_t(u64, ctl->stripe_size, SZ_1G);
++
+ 	/* Align to BTRFS_STRIPE_LEN */
+ 	ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN);
+ 	ctl->chunk_size = ctl->stripe_size * data_stripes;
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 31cb11daa8e82..1386362fad3b8 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -421,10 +421,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
+ 	 * since btrfs adds the pages one by one to a bio, and btrfs cannot
+ 	 * increase the metadata reservation even if it increases the number of
+ 	 * extents, it is safe to stick with the limit.
++	 *
++	 * With the zoned emulation, we can have non-zoned device on the zoned
++	 * mode. In this case, we don't have a valid max zone append size. So,
++	 * use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
+ 	 */
+-	zone_info->max_zone_append_size =
+-		min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
+-		      (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
++	if (bdev_is_zoned(bdev)) {
++		zone_info->max_zone_append_size = min_t(u64,
++			(u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
++			(u64)bdev_max_segments(bdev) << PAGE_SHIFT);
++	} else {
++		zone_info->max_zone_append_size =
++			(u64)bdev_max_segments(bdev) << PAGE_SHIFT;
++	}
+ 	if (!IS_ALIGNED(nr_sectors, zone_sectors))
+ 		zone_info->nr_zones++;
+ 
+@@ -1178,7 +1187,7 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
+  * offset.
+  */
+ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
+-				   u64 *offset_ret)
++				   u64 *offset_ret, bool new)
+ {
+ 	struct btrfs_fs_info *fs_info = cache->fs_info;
+ 	struct btrfs_root *root;
+@@ -1188,6 +1197,21 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
+ 	int ret;
+ 	u64 length;
+ 
++	/*
++	 * Avoid  tree lookups for a new block group, there's no use for it.
++	 * It must always be 0.
++	 *
++	 * Also, we have a lock chain of extent buffer lock -> chunk mutex.
++	 * For new a block group, this function is called from
++	 * btrfs_make_block_group() which is already taking the chunk mutex.
++	 * Thus, we cannot call calculate_alloc_pointer() which takes extent
++	 * buffer locks to avoid deadlock.
++	 */
++	if (new) {
++		*offset_ret = 0;
++		return 0;
++	}
++
+ 	path = btrfs_alloc_path();
+ 	if (!path)
+ 		return -ENOMEM;
+@@ -1323,6 +1347,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
+ 		else
+ 			num_conventional++;
+ 
++		/*
++		 * Consider a zone as active if we can allow any number of
++		 * active zones.
++		 */
++		if (!device->zone_info->max_active_zones)
++			__set_bit(i, active);
++
+ 		if (!is_sequential) {
+ 			alloc_offsets[i] = WP_CONVENTIONAL;
+ 			continue;
+@@ -1389,45 +1420,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
+ 			__set_bit(i, active);
+ 			break;
+ 		}
+-
+-		/*
+-		 * Consider a zone as active if we can allow any number of
+-		 * active zones.
+-		 */
+-		if (!device->zone_info->max_active_zones)
+-			__set_bit(i, active);
+ 	}
+ 
+ 	if (num_sequential > 0)
+ 		cache->seq_zone = true;
+ 
+ 	if (num_conventional > 0) {
+-		/*
+-		 * Avoid calling calculate_alloc_pointer() for new BG. It
+-		 * is no use for new BG. It must be always 0.
+-		 *
+-		 * Also, we have a lock chain of extent buffer lock ->
+-		 * chunk mutex.  For new BG, this function is called from
+-		 * btrfs_make_block_group() which is already taking the
+-		 * chunk mutex. Thus, we cannot call
+-		 * calculate_alloc_pointer() which takes extent buffer
+-		 * locks to avoid deadlock.
+-		 */
+-
+ 		/* Zone capacity is always zone size in emulation */
+ 		cache->zone_capacity = cache->length;
+-		if (new) {
+-			cache->alloc_offset = 0;
+-			goto out;
+-		}
+-		ret = calculate_alloc_pointer(cache, &last_alloc);
+-		if (ret || map->num_stripes == num_conventional) {
+-			if (!ret)
+-				cache->alloc_offset = last_alloc;
+-			else
+-				btrfs_err(fs_info,
++		ret = calculate_alloc_pointer(cache, &last_alloc, new);
++		if (ret) {
++			btrfs_err(fs_info,
+ 			"zoned: failed to determine allocation offset of bg %llu",
+-					  cache->start);
++				  cache->start);
++			goto out;
++		} else if (map->num_stripes == num_conventional) {
++			cache->alloc_offset = last_alloc;
++			cache->zone_is_active = 1;
+ 			goto out;
+ 		}
+ 	}
+@@ -1495,13 +1504,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
+ 		goto out;
+ 	}
+ 
+-	if (cache->zone_is_active) {
+-		btrfs_get_block_group(cache);
+-		spin_lock(&fs_info->zone_active_bgs_lock);
+-		list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs);
+-		spin_unlock(&fs_info->zone_active_bgs_lock);
+-	}
+-
+ out:
+ 	if (cache->alloc_offset > fs_info->zone_size) {
+ 		btrfs_err(fs_info,
+@@ -1526,10 +1528,16 @@ out:
+ 		ret = -EIO;
+ 	}
+ 
+-	if (!ret)
++	if (!ret) {
+ 		cache->meta_write_pointer = cache->alloc_offset + cache->start;
+-
+-	if (ret) {
++		if (cache->zone_is_active) {
++			btrfs_get_block_group(cache);
++			spin_lock(&fs_info->zone_active_bgs_lock);
++			list_add_tail(&cache->active_bg_list,
++				      &fs_info->zone_active_bgs);
++			spin_unlock(&fs_info->zone_active_bgs_lock);
++		}
++	} else {
+ 		kfree(cache->physical_map);
+ 		cache->physical_map = NULL;
+ 	}
+@@ -2007,8 +2015,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
+ 	/* For active_bg_list */
+ 	btrfs_put_block_group(block_group);
+ 
+-	clear_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
+-	wake_up_all(&fs_info->zone_finish_wait);
++	clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
+ 
+ 	return 0;
+ }
+diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
+index f5dcc4940b6da..9dfd2dd612c25 100644
+--- a/fs/cifs/smb2file.c
++++ b/fs/cifs/smb2file.c
+@@ -61,7 +61,6 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
+ 		nr_ioctl_req.Reserved = 0;
+ 		rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid,
+ 			fid->volatile_fid, FSCTL_LMR_REQUEST_RESILIENCY,
+-			true /* is_fsctl */,
+ 			(char *)&nr_ioctl_req, sizeof(nr_ioctl_req),
+ 			CIFSMaxBufSize, NULL, NULL /* no return info */);
+ 		if (rc == -EOPNOTSUPP) {
+diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
+index 3898ec2632dc4..e8a8daa82ed76 100644
+--- a/fs/cifs/smb2ops.c
++++ b/fs/cifs/smb2ops.c
+@@ -680,7 +680,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
+ 	struct cifs_ses *ses = tcon->ses;
+ 
+ 	rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
+-			FSCTL_QUERY_NETWORK_INTERFACE_INFO, true /* is_fsctl */,
++			FSCTL_QUERY_NETWORK_INTERFACE_INFO,
+ 			NULL /* no data input */, 0 /* no data input */,
+ 			CIFSMaxBufSize, (char **)&out_buf, &ret_data_len);
+ 	if (rc == -EOPNOTSUPP) {
+@@ -1609,9 +1609,8 @@ SMB2_request_res_key(const unsigned int xid, struct cifs_tcon *tcon,
+ 	struct resume_key_req *res_key;
+ 
+ 	rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
+-			FSCTL_SRV_REQUEST_RESUME_KEY, true /* is_fsctl */,
+-			NULL, 0 /* no input */, CIFSMaxBufSize,
+-			(char **)&res_key, &ret_data_len);
++			FSCTL_SRV_REQUEST_RESUME_KEY, NULL, 0 /* no input */,
++			CIFSMaxBufSize, (char **)&res_key, &ret_data_len);
+ 
+ 	if (rc == -EOPNOTSUPP) {
+ 		pr_warn_once("Server share %s does not support copy range\n", tcon->treeName);
+@@ -1753,7 +1752,7 @@ smb2_ioctl_query_info(const unsigned int xid,
+ 		rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE;
+ 
+ 		rc = SMB2_ioctl_init(tcon, server, &rqst[1], COMPOUND_FID, COMPOUND_FID,
+-				     qi.info_type, true, buffer, qi.output_buffer_length,
++				     qi.info_type, buffer, qi.output_buffer_length,
+ 				     CIFSMaxBufSize - MAX_SMB2_CREATE_RESPONSE_SIZE -
+ 				     MAX_SMB2_CLOSE_RESPONSE_SIZE);
+ 		free_req1_func = SMB2_ioctl_free;
+@@ -1929,9 +1928,8 @@ smb2_copychunk_range(const unsigned int xid,
+ 		retbuf = NULL;
+ 		rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
+ 			trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
+-			true /* is_fsctl */, (char *)pcchunk,
+-			sizeof(struct copychunk_ioctl),	CIFSMaxBufSize,
+-			(char **)&retbuf, &ret_data_len);
++			(char *)pcchunk, sizeof(struct copychunk_ioctl),
++			CIFSMaxBufSize, (char **)&retbuf, &ret_data_len);
+ 		if (rc == 0) {
+ 			if (ret_data_len !=
+ 					sizeof(struct copychunk_ioctl_rsp)) {
+@@ -2091,7 +2089,6 @@ static bool smb2_set_sparse(const unsigned int xid, struct cifs_tcon *tcon,
+ 
+ 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ 			cfile->fid.volatile_fid, FSCTL_SET_SPARSE,
+-			true /* is_fctl */,
+ 			&setsparse, 1, CIFSMaxBufSize, NULL, NULL);
+ 	if (rc) {
+ 		tcon->broken_sparse_sup = true;
+@@ -2174,7 +2171,6 @@ smb2_duplicate_extents(const unsigned int xid,
+ 	rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
+ 			trgtfile->fid.volatile_fid,
+ 			FSCTL_DUPLICATE_EXTENTS_TO_FILE,
+-			true /* is_fsctl */,
+ 			(char *)&dup_ext_buf,
+ 			sizeof(struct duplicate_extents_to_file),
+ 			CIFSMaxBufSize, NULL,
+@@ -2209,7 +2205,6 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
+ 	return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ 			cfile->fid.volatile_fid,
+ 			FSCTL_SET_INTEGRITY_INFORMATION,
+-			true /* is_fsctl */,
+ 			(char *)&integr_info,
+ 			sizeof(struct fsctl_set_integrity_information_req),
+ 			CIFSMaxBufSize, NULL,
+@@ -2262,7 +2257,6 @@ smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
+ 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ 			cfile->fid.volatile_fid,
+ 			FSCTL_SRV_ENUMERATE_SNAPSHOTS,
+-			true /* is_fsctl */,
+ 			NULL, 0 /* no input data */, max_response_size,
+ 			(char **)&retbuf,
+ 			&ret_data_len);
+@@ -2982,7 +2976,6 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
+ 	do {
+ 		rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
+ 				FSCTL_DFS_GET_REFERRALS,
+-				true /* is_fsctl */,
+ 				(char *)dfs_req, dfs_req_size, CIFSMaxBufSize,
+ 				(char **)&dfs_rsp, &dfs_rsp_size);
+ 		if (!is_retryable_error(rc))
+@@ -3189,8 +3182,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
+ 
+ 	rc = SMB2_ioctl_init(tcon, server,
+ 			     &rqst[1], fid.persistent_fid,
+-			     fid.volatile_fid, FSCTL_GET_REPARSE_POINT,
+-			     true /* is_fctl */, NULL, 0,
++			     fid.volatile_fid, FSCTL_GET_REPARSE_POINT, NULL, 0,
+ 			     CIFSMaxBufSize -
+ 			     MAX_SMB2_CREATE_RESPONSE_SIZE -
+ 			     MAX_SMB2_CLOSE_RESPONSE_SIZE);
+@@ -3370,8 +3362,7 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon,
+ 
+ 	rc = SMB2_ioctl_init(tcon, server,
+ 			     &rqst[1], COMPOUND_FID,
+-			     COMPOUND_FID, FSCTL_GET_REPARSE_POINT,
+-			     true /* is_fctl */, NULL, 0,
++			     COMPOUND_FID, FSCTL_GET_REPARSE_POINT, NULL, 0,
+ 			     CIFSMaxBufSize -
+ 			     MAX_SMB2_CREATE_RESPONSE_SIZE -
+ 			     MAX_SMB2_CLOSE_RESPONSE_SIZE);
+@@ -3599,26 +3590,43 @@ get_smb2_acl(struct cifs_sb_info *cifs_sb,
+ 	return pntsd;
+ }
+ 
++static long smb3_zero_data(struct file *file, struct cifs_tcon *tcon,
++			     loff_t offset, loff_t len, unsigned int xid)
++{
++	struct cifsFileInfo *cfile = file->private_data;
++	struct file_zero_data_information fsctl_buf;
++
++	cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len);
++
++	fsctl_buf.FileOffset = cpu_to_le64(offset);
++	fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len);
++
++	return SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
++			  cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
++			  (char *)&fsctl_buf,
++			  sizeof(struct file_zero_data_information),
++			  0, NULL, NULL);
++}
++
+ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
+ 			    loff_t offset, loff_t len, bool keep_size)
+ {
+ 	struct cifs_ses *ses = tcon->ses;
+-	struct inode *inode;
+-	struct cifsInodeInfo *cifsi;
++	struct inode *inode = file_inode(file);
++	struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ 	struct cifsFileInfo *cfile = file->private_data;
+-	struct file_zero_data_information fsctl_buf;
+ 	long rc;
+ 	unsigned int xid;
+ 	__le64 eof;
+ 
+ 	xid = get_xid();
+ 
+-	inode = d_inode(cfile->dentry);
+-	cifsi = CIFS_I(inode);
+-
+ 	trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid,
+ 			      ses->Suid, offset, len);
+ 
++	inode_lock(inode);
++	filemap_invalidate_lock(inode->i_mapping);
++
+ 	/*
+ 	 * We zero the range through ioctl, so we need remove the page caches
+ 	 * first, otherwise the data may be inconsistent with the server.
+@@ -3626,26 +3634,12 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
+ 	truncate_pagecache_range(inode, offset, offset + len - 1);
+ 
+ 	/* if file not oplocked can't be sure whether asking to extend size */
+-	if (!CIFS_CACHE_READ(cifsi))
+-		if (keep_size == false) {
+-			rc = -EOPNOTSUPP;
+-			trace_smb3_zero_err(xid, cfile->fid.persistent_fid,
+-				tcon->tid, ses->Suid, offset, len, rc);
+-			free_xid(xid);
+-			return rc;
+-		}
+-
+-	cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len);
+-
+-	fsctl_buf.FileOffset = cpu_to_le64(offset);
+-	fsctl_buf.BeyondFinalZero = cpu_to_le64(offset + len);
++	rc = -EOPNOTSUPP;
++	if (keep_size == false && !CIFS_CACHE_READ(cifsi))
++		goto zero_range_exit;
+ 
+-	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+-			cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA, true,
+-			(char *)&fsctl_buf,
+-			sizeof(struct file_zero_data_information),
+-			0, NULL, NULL);
+-	if (rc)
++	rc = smb3_zero_data(file, tcon, offset, len, xid);
++	if (rc < 0)
+ 		goto zero_range_exit;
+ 
+ 	/*
+@@ -3658,6 +3652,8 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
+ 	}
+ 
+  zero_range_exit:
++	filemap_invalidate_unlock(inode->i_mapping);
++	inode_unlock(inode);
+ 	free_xid(xid);
+ 	if (rc)
+ 		trace_smb3_zero_err(xid, cfile->fid.persistent_fid, tcon->tid,
+@@ -3702,7 +3698,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
+ 
+ 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ 			cfile->fid.volatile_fid, FSCTL_SET_ZERO_DATA,
+-			true /* is_fctl */, (char *)&fsctl_buf,
++			(char *)&fsctl_buf,
+ 			sizeof(struct file_zero_data_information),
+ 			CIFSMaxBufSize, NULL, NULL);
+ 	filemap_invalidate_unlock(inode->i_mapping);
+@@ -3764,7 +3760,7 @@ static int smb3_simple_fallocate_range(unsigned int xid,
+ 	in_data.length = cpu_to_le64(len);
+ 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ 			cfile->fid.volatile_fid,
+-			FSCTL_QUERY_ALLOCATED_RANGES, true,
++			FSCTL_QUERY_ALLOCATED_RANGES,
+ 			(char *)&in_data, sizeof(in_data),
+ 			1024 * sizeof(struct file_allocated_range_buffer),
+ 			(char **)&out_data, &out_data_len);
+@@ -4085,7 +4081,7 @@ static loff_t smb3_llseek(struct file *file, struct cifs_tcon *tcon, loff_t offs
+ 
+ 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ 			cfile->fid.volatile_fid,
+-			FSCTL_QUERY_ALLOCATED_RANGES, true,
++			FSCTL_QUERY_ALLOCATED_RANGES,
+ 			(char *)&in_data, sizeof(in_data),
+ 			sizeof(struct file_allocated_range_buffer),
+ 			(char **)&out_data, &out_data_len);
+@@ -4145,7 +4141,7 @@ static int smb3_fiemap(struct cifs_tcon *tcon,
+ 
+ 	rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+ 			cfile->fid.volatile_fid,
+-			FSCTL_QUERY_ALLOCATED_RANGES, true,
++			FSCTL_QUERY_ALLOCATED_RANGES,
+ 			(char *)&in_data, sizeof(in_data),
+ 			1024 * sizeof(struct file_allocated_range_buffer),
+ 			(char **)&out_data, &out_data_len);
+diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
+index ba58d7fd54f9e..31d37afae741f 100644
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -1174,7 +1174,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
+ 	}
+ 
+ 	rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
+-		FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */,
++		FSCTL_VALIDATE_NEGOTIATE_INFO,
+ 		(char *)pneg_inbuf, inbuflen, CIFSMaxBufSize,
+ 		(char **)&pneg_rsp, &rsplen);
+ 	if (rc == -EOPNOTSUPP) {
+@@ -3053,7 +3053,7 @@ int
+ SMB2_ioctl_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server,
+ 		struct smb_rqst *rqst,
+ 		u64 persistent_fid, u64 volatile_fid, u32 opcode,
+-		bool is_fsctl, char *in_data, u32 indatalen,
++		char *in_data, u32 indatalen,
+ 		__u32 max_response_size)
+ {
+ 	struct smb2_ioctl_req *req;
+@@ -3128,10 +3128,8 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server,
+ 	req->hdr.CreditCharge =
+ 		cpu_to_le16(DIV_ROUND_UP(max(indatalen, max_response_size),
+ 					 SMB2_MAX_BUFFER_SIZE));
+-	if (is_fsctl)
+-		req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL);
+-	else
+-		req->Flags = 0;
++	/* always an FSCTL (for now) */
++	req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL);
+ 
+ 	/* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */
+ 	if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO)
+@@ -3158,9 +3156,9 @@ SMB2_ioctl_free(struct smb_rqst *rqst)
+  */
+ int
+ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
+-	   u64 volatile_fid, u32 opcode, bool is_fsctl,
+-	   char *in_data, u32 indatalen, u32 max_out_data_len,
+-	   char **out_data, u32 *plen /* returned data len */)
++	   u64 volatile_fid, u32 opcode, char *in_data, u32 indatalen,
++	   u32 max_out_data_len, char **out_data,
++	   u32 *plen /* returned data len */)
+ {
+ 	struct smb_rqst rqst;
+ 	struct smb2_ioctl_rsp *rsp = NULL;
+@@ -3202,7 +3200,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
+ 
+ 	rc = SMB2_ioctl_init(tcon, server,
+ 			     &rqst, persistent_fid, volatile_fid, opcode,
+-			     is_fsctl, in_data, indatalen, max_out_data_len);
++			     in_data, indatalen, max_out_data_len);
+ 	if (rc)
+ 		goto ioctl_exit;
+ 
+@@ -3294,7 +3292,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
+ 			cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
+ 
+ 	rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
+-			FSCTL_SET_COMPRESSION, true /* is_fsctl */,
++			FSCTL_SET_COMPRESSION,
+ 			(char *)&fsctl_input /* data input */,
+ 			2 /* in data len */, CIFSMaxBufSize /* max out data */,
+ 			&ret_data /* out data */, NULL);
+diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
+index a69f1eed1cfe5..d57d7202dc367 100644
+--- a/fs/cifs/smb2proto.h
++++ b/fs/cifs/smb2proto.h
+@@ -147,13 +147,13 @@ extern int SMB2_open_init(struct cifs_tcon *tcon,
+ extern void SMB2_open_free(struct smb_rqst *rqst);
+ extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
+ 		     u64 persistent_fid, u64 volatile_fid, u32 opcode,
+-		     bool is_fsctl, char *in_data, u32 indatalen, u32 maxoutlen,
++		     char *in_data, u32 indatalen, u32 maxoutlen,
+ 		     char **out_data, u32 *plen /* returned data len */);
+ extern int SMB2_ioctl_init(struct cifs_tcon *tcon,
+ 			   struct TCP_Server_Info *server,
+ 			   struct smb_rqst *rqst,
+ 			   u64 persistent_fid, u64 volatile_fid, u32 opcode,
+-			   bool is_fsctl, char *in_data, u32 indatalen,
++			   char *in_data, u32 indatalen,
+ 			   __u32 max_response_size);
+ extern void SMB2_ioctl_free(struct smb_rqst *rqst);
+ extern int SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
+diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
+index 3dcf0b8b4e932..232cfdf095aeb 100644
+--- a/fs/debugfs/inode.c
++++ b/fs/debugfs/inode.c
+@@ -744,6 +744,28 @@ void debugfs_remove(struct dentry *dentry)
+ }
+ EXPORT_SYMBOL_GPL(debugfs_remove);
+ 
++/**
++ * debugfs_lookup_and_remove - lookup a directory or file and recursively remove it
++ * @name: a pointer to a string containing the name of the item to look up.
++ * @parent: a pointer to the parent dentry of the item.
++ *
++ * This is the equlivant of doing something like
++ * debugfs_remove(debugfs_lookup(..)) but with the proper reference counting
++ * handled for the directory being looked up.
++ */
++void debugfs_lookup_and_remove(const char *name, struct dentry *parent)
++{
++	struct dentry *dentry;
++
++	dentry = debugfs_lookup(name, parent);
++	if (!dentry)
++		return;
++
++	debugfs_remove(dentry);
++	dput(dentry);
++}
++EXPORT_SYMBOL_GPL(debugfs_lookup_and_remove);
++
+ /**
+  * debugfs_rename - rename a file/directory in the debugfs filesystem
+  * @old_dir: a pointer to the parent dentry for the renamed object. This
+diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
+index 8e01d89c3319e..b5fd9d71e67f1 100644
+--- a/fs/erofs/fscache.c
++++ b/fs/erofs/fscache.c
+@@ -222,8 +222,10 @@ static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
+ 
+ 	rreq = erofs_fscache_alloc_request(folio_mapping(folio),
+ 				folio_pos(folio), folio_size(folio));
+-	if (IS_ERR(rreq))
++	if (IS_ERR(rreq)) {
++		ret = PTR_ERR(rreq);
+ 		goto out;
++	}
+ 
+ 	return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
+ 				rreq, mdev.m_pa);
+@@ -301,8 +303,10 @@ static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
+ 
+ 	rreq = erofs_fscache_alloc_request(folio_mapping(folio),
+ 				folio_pos(folio), folio_size(folio));
+-	if (IS_ERR(rreq))
++	if (IS_ERR(rreq)) {
++		ret = PTR_ERR(rreq);
+ 		goto out_unlock;
++	}
+ 
+ 	pstart = mdev.m_pa + (pos - map.m_la);
+ 	return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
+diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
+index cfee49d33b95a..a01cc82795a25 100644
+--- a/fs/erofs/internal.h
++++ b/fs/erofs/internal.h
+@@ -195,7 +195,6 @@ struct erofs_workgroup {
+ 	atomic_t refcount;
+ };
+ 
+-#if defined(CONFIG_SMP)
+ static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
+ 						 int val)
+ {
+@@ -224,34 +223,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
+ 	return atomic_cond_read_relaxed(&grp->refcount,
+ 					VAL != EROFS_LOCKED_MAGIC);
+ }
+-#else
+-static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
+-						 int val)
+-{
+-	preempt_disable();
+-	/* no need to spin on UP platforms, let's just disable preemption. */
+-	if (val != atomic_read(&grp->refcount)) {
+-		preempt_enable();
+-		return false;
+-	}
+-	return true;
+-}
+-
+-static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
+-					    int orig_val)
+-{
+-	preempt_enable();
+-}
+-
+-static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
+-{
+-	int v = atomic_read(&grp->refcount);
+-
+-	/* workgroup is never freezed on uniprocessor systems */
+-	DBG_BUGON(v == EROFS_LOCKED_MAGIC);
+-	return v;
+-}
+-#endif	/* !CONFIG_SMP */
+ #endif	/* !CONFIG_EROFS_FS_ZIP */
+ 
+ /* we strictly follow PAGE_SIZE and no buffer head yet */
+diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
+index 81d26abf486fa..da85b39791957 100644
+--- a/fs/tracefs/inode.c
++++ b/fs/tracefs/inode.c
+@@ -141,6 +141,8 @@ struct tracefs_mount_opts {
+ 	kuid_t uid;
+ 	kgid_t gid;
+ 	umode_t mode;
++	/* Opt_* bitfield. */
++	unsigned int opts;
+ };
+ 
+ enum {
+@@ -241,6 +243,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
+ 	kgid_t gid;
+ 	char *p;
+ 
++	opts->opts = 0;
+ 	opts->mode = TRACEFS_DEFAULT_MODE;
+ 
+ 	while ((p = strsep(&data, ",")) != NULL) {
+@@ -275,24 +278,36 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
+ 		 * but traditionally tracefs has ignored all mount options
+ 		 */
+ 		}
++
++		opts->opts |= BIT(token);
+ 	}
+ 
+ 	return 0;
+ }
+ 
+-static int tracefs_apply_options(struct super_block *sb)
++static int tracefs_apply_options(struct super_block *sb, bool remount)
+ {
+ 	struct tracefs_fs_info *fsi = sb->s_fs_info;
+ 	struct inode *inode = d_inode(sb->s_root);
+ 	struct tracefs_mount_opts *opts = &fsi->mount_opts;
+ 
+-	inode->i_mode &= ~S_IALLUGO;
+-	inode->i_mode |= opts->mode;
++	/*
++	 * On remount, only reset mode/uid/gid if they were provided as mount
++	 * options.
++	 */
++
++	if (!remount || opts->opts & BIT(Opt_mode)) {
++		inode->i_mode &= ~S_IALLUGO;
++		inode->i_mode |= opts->mode;
++	}
+ 
+-	inode->i_uid = opts->uid;
++	if (!remount || opts->opts & BIT(Opt_uid))
++		inode->i_uid = opts->uid;
+ 
+-	/* Set all the group ids to the mount option */
+-	set_gid(sb->s_root, opts->gid);
++	if (!remount || opts->opts & BIT(Opt_gid)) {
++		/* Set all the group ids to the mount option */
++		set_gid(sb->s_root, opts->gid);
++	}
+ 
+ 	return 0;
+ }
+@@ -307,7 +322,7 @@ static int tracefs_remount(struct super_block *sb, int *flags, char *data)
+ 	if (err)
+ 		goto fail;
+ 
+-	tracefs_apply_options(sb);
++	tracefs_apply_options(sb, true);
+ 
+ fail:
+ 	return err;
+@@ -359,7 +374,7 @@ static int trace_fill_super(struct super_block *sb, void *data, int silent)
+ 
+ 	sb->s_op = &tracefs_super_operations;
+ 
+-	tracefs_apply_options(sb);
++	tracefs_apply_options(sb, false);
+ 
+ 	return 0;
+ 
+diff --git a/include/kunit/test.h b/include/kunit/test.h
+index 8ffcd7de96070..648dbb00a3008 100644
+--- a/include/kunit/test.h
++++ b/include/kunit/test.h
+@@ -863,7 +863,7 @@ do {									       \
+ 
+ #define KUNIT_EXPECT_LE_MSG(test, left, right, fmt, ...)		       \
+ 	KUNIT_BINARY_INT_ASSERTION(test,				       \
+-				   KUNIT_ASSERTION,			       \
++				   KUNIT_EXPECTATION,			       \
+ 				   left, <=, right,			       \
+ 				   fmt,					       \
+ 				    ##__VA_ARGS__)
+@@ -1153,7 +1153,7 @@ do {									       \
+ 
+ #define KUNIT_ASSERT_LT_MSG(test, left, right, fmt, ...)		       \
+ 	KUNIT_BINARY_INT_ASSERTION(test,				       \
+-				   KUNIT_EXPECTATION,			       \
++				   KUNIT_ASSERTION,			       \
+ 				   left, <, right,			       \
+ 				   fmt,					       \
+ 				    ##__VA_ARGS__)
+@@ -1194,7 +1194,7 @@ do {									       \
+ 
+ #define KUNIT_ASSERT_GT_MSG(test, left, right, fmt, ...)		       \
+ 	KUNIT_BINARY_INT_ASSERTION(test,				       \
+-				   KUNIT_EXPECTATION,			       \
++				   KUNIT_ASSERTION,			       \
+ 				   left, >, right,			       \
+ 				   fmt,					       \
+ 				    ##__VA_ARGS__)
+diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
+index badcc0e3418f2..262664107b839 100644
+--- a/include/linux/buffer_head.h
++++ b/include/linux/buffer_head.h
+@@ -136,6 +136,17 @@ BUFFER_FNS(Defer_Completion, defer_completion)
+ 
+ static __always_inline void set_buffer_uptodate(struct buffer_head *bh)
+ {
++	/*
++	 * If somebody else already set this uptodate, they will
++	 * have done the memory barrier, and a reader will thus
++	 * see *some* valid buffer state.
++	 *
++	 * Any other serialization (with IO errors or whatever that
++	 * might clear the bit) has to come from other state (eg BH_Lock).
++	 */
++	if (test_bit(BH_Uptodate, &bh->b_state))
++		return;
++
+ 	/*
+ 	 * make it consistent with folio_mark_uptodate
+ 	 * pairs with smp_load_acquire in buffer_uptodate
+diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
+index c869f1e73d755..f60674692d365 100644
+--- a/include/linux/debugfs.h
++++ b/include/linux/debugfs.h
+@@ -91,6 +91,8 @@ struct dentry *debugfs_create_automount(const char *name,
+ void debugfs_remove(struct dentry *dentry);
+ #define debugfs_remove_recursive debugfs_remove
+ 
++void debugfs_lookup_and_remove(const char *name, struct dentry *parent);
++
+ const struct file_operations *debugfs_real_fops(const struct file *filp);
+ 
+ int debugfs_file_get(struct dentry *dentry);
+@@ -225,6 +227,10 @@ static inline void debugfs_remove(struct dentry *dentry)
+ static inline void debugfs_remove_recursive(struct dentry *dentry)
+ { }
+ 
++static inline void debugfs_lookup_and_remove(const char *name,
++					     struct dentry *parent)
++{ }
++
+ const struct file_operations *debugfs_real_fops(const struct file *filp);
+ 
+ static inline int debugfs_file_get(struct dentry *dentry)
+diff --git a/include/linux/dmar.h b/include/linux/dmar.h
+index cbd714a198a0a..f3a3d95df5325 100644
+--- a/include/linux/dmar.h
++++ b/include/linux/dmar.h
+@@ -69,6 +69,7 @@ struct dmar_pci_notify_info {
+ 
+ extern struct rw_semaphore dmar_global_lock;
+ extern struct list_head dmar_drhd_units;
++extern int intel_iommu_enabled;
+ 
+ #define for_each_drhd_unit(drhd)					\
+ 	list_for_each_entry_rcu(drhd, &dmar_drhd_units, list,		\
+@@ -92,7 +93,8 @@ extern struct list_head dmar_drhd_units;
+ static inline bool dmar_rcu_check(void)
+ {
+ 	return rwsem_is_locked(&dmar_global_lock) ||
+-	       system_state == SYSTEM_BOOTING;
++	       system_state == SYSTEM_BOOTING ||
++	       (IS_ENABLED(CONFIG_INTEL_IOMMU) && !intel_iommu_enabled);
+ }
+ 
+ #define	dmar_rcu_dereference(p)	rcu_dereference_check((p), dmar_rcu_check())
+diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
+index eafa1d2489fda..4e94755098f19 100644
+--- a/include/linux/lsm_hook_defs.h
++++ b/include/linux/lsm_hook_defs.h
+@@ -406,4 +406,5 @@ LSM_HOOK(int, 0, perf_event_write, struct perf_event *event)
+ #ifdef CONFIG_IO_URING
+ LSM_HOOK(int, 0, uring_override_creds, const struct cred *new)
+ LSM_HOOK(int, 0, uring_sqpoll, void)
++LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd)
+ #endif /* CONFIG_IO_URING */
+diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
+index 91c8146649f59..b681cfce6190a 100644
+--- a/include/linux/lsm_hooks.h
++++ b/include/linux/lsm_hooks.h
+@@ -1575,6 +1575,9 @@
+  *      Check whether the current task is allowed to spawn a io_uring polling
+  *      thread (IORING_SETUP_SQPOLL).
+  *
++ * @uring_cmd:
++ *      Check whether the file_operations uring_cmd is allowed to run.
++ *
+  */
+ union security_list_options {
+ 	#define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__);
+diff --git a/include/linux/security.h b/include/linux/security.h
+index 7fc4e9f49f542..3cc127bb5bfd4 100644
+--- a/include/linux/security.h
++++ b/include/linux/security.h
+@@ -2051,6 +2051,7 @@ static inline int security_perf_event_write(struct perf_event *event)
+ #ifdef CONFIG_SECURITY
+ extern int security_uring_override_creds(const struct cred *new);
+ extern int security_uring_sqpoll(void);
++extern int security_uring_cmd(struct io_uring_cmd *ioucmd);
+ #else
+ static inline int security_uring_override_creds(const struct cred *new)
+ {
+@@ -2060,6 +2061,10 @@ static inline int security_uring_sqpoll(void)
+ {
+ 	return 0;
+ }
++static inline int security_uring_cmd(struct io_uring_cmd *ioucmd)
++{
++	return 0;
++}
+ #endif /* CONFIG_SECURITY */
+ #endif /* CONFIG_IO_URING */
+ 
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 2f41364a6791e..63d0a21b63162 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -2528,6 +2528,22 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+ 	return skb_headlen(skb) + __skb_pagelen(skb);
+ }
+ 
++static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
++					      int i, struct page *page,
++					      int off, int size)
++{
++	skb_frag_t *frag = &shinfo->frags[i];
++
++	/*
++	 * Propagate page pfmemalloc to the skb if we can. The problem is
++	 * that not all callers have unique ownership of the page but rely
++	 * on page_is_pfmemalloc doing the right thing(tm).
++	 */
++	frag->bv_page		  = page;
++	frag->bv_offset		  = off;
++	skb_frag_size_set(frag, size);
++}
++
+ /**
+  * __skb_fill_page_desc - initialise a paged fragment in an skb
+  * @skb: buffer containing fragment to be initialised
+@@ -2544,17 +2560,7 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
+ 					struct page *page, int off, int size)
+ {
+-	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+-
+-	/*
+-	 * Propagate page pfmemalloc to the skb if we can. The problem is
+-	 * that not all callers have unique ownership of the page but rely
+-	 * on page_is_pfmemalloc doing the right thing(tm).
+-	 */
+-	frag->bv_page		  = page;
+-	frag->bv_offset		  = off;
+-	skb_frag_size_set(frag, size);
+-
++	__skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size);
+ 	page = compound_head(page);
+ 	if (page_is_pfmemalloc(page))
+ 		skb->pfmemalloc	= true;
+@@ -2581,6 +2587,27 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
+ 	skb_shinfo(skb)->nr_frags = i + 1;
+ }
+ 
++/**
++ * skb_fill_page_desc_noacc - initialise a paged fragment in an skb
++ * @skb: buffer containing fragment to be initialised
++ * @i: paged fragment index to initialise
++ * @page: the page to use for this fragment
++ * @off: the offset to the data with @page
++ * @size: the length of the data
++ *
++ * Variant of skb_fill_page_desc() which does not deal with
++ * pfmemalloc, if page is not owned by us.
++ */
++static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i,
++					    struct page *page, int off,
++					    int size)
++{
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++
++	__skb_fill_page_desc_noacc(shinfo, i, page, off, size);
++	shinfo->nr_frags = i + 1;
++}
++
+ void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
+ 		     int size, unsigned int truesize);
+ 
+diff --git a/include/linux/time64.h b/include/linux/time64.h
+index 81b9686a20799..2fb8232cff1d5 100644
+--- a/include/linux/time64.h
++++ b/include/linux/time64.h
+@@ -20,6 +20,9 @@ struct itimerspec64 {
+ 	struct timespec64 it_value;
+ };
+ 
++/* Parameters used to convert the timespec values: */
++#define PSEC_PER_NSEC			1000L
++
+ /* Located here for timespec[64]_valid_strict */
+ #define TIME64_MAX			((s64)~((u64)1 << 63))
+ #define TIME64_MIN			(-TIME64_MAX - 1)
+diff --git a/include/linux/udp.h b/include/linux/udp.h
+index 254a2654400f8..e96da4157d04d 100644
+--- a/include/linux/udp.h
++++ b/include/linux/udp.h
+@@ -70,6 +70,7 @@ struct udp_sock {
+ 	 * For encapsulation sockets.
+ 	 */
+ 	int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
++	void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset);
+ 	int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb);
+ 	void (*encap_destroy)(struct sock *sk);
+ 
+diff --git a/include/net/bonding.h b/include/net/bonding.h
+index cb904d356e31e..3b816ae8b1f3b 100644
+--- a/include/net/bonding.h
++++ b/include/net/bonding.h
+@@ -161,8 +161,9 @@ struct slave {
+ 	struct net_device *dev; /* first - useful for panic debug */
+ 	struct bonding *bond; /* our master */
+ 	int    delay;
+-	/* all three in jiffies */
++	/* all 4 in jiffies */
+ 	unsigned long last_link_up;
++	unsigned long last_tx;
+ 	unsigned long last_rx;
+ 	unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];
+ 	s8     link;		/* one of BOND_LINK_XXXX */
+@@ -539,6 +540,16 @@ static inline unsigned long slave_last_rx(struct bonding *bond,
+ 	return slave->last_rx;
+ }
+ 
++static inline void slave_update_last_tx(struct slave *slave)
++{
++	WRITE_ONCE(slave->last_tx, jiffies);
++}
++
++static inline unsigned long slave_last_tx(struct slave *slave)
++{
++	return READ_ONCE(slave->last_tx);
++}
++
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+ static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave,
+ 					 struct sk_buff *skb)
+diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
+index afc7ce713657b..72394f441dad8 100644
+--- a/include/net/udp_tunnel.h
++++ b/include/net/udp_tunnel.h
+@@ -67,6 +67,9 @@ static inline int udp_sock_create(struct net *net,
+ typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
+ typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk,
+ 					     struct sk_buff *skb);
++typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk,
++					   struct sk_buff *skb,
++					   unsigned int udp_offset);
+ typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
+ typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk,
+ 						    struct list_head *head,
+@@ -80,6 +83,7 @@ struct udp_tunnel_sock_cfg {
+ 	__u8  encap_type;
+ 	udp_tunnel_encap_rcv_t encap_rcv;
+ 	udp_tunnel_encap_err_lookup_t encap_err_lookup;
++	udp_tunnel_encap_err_rcv_t encap_err_rcv;
+ 	udp_tunnel_encap_destroy_t encap_destroy;
+ 	udp_tunnel_gro_receive_t gro_receive;
+ 	udp_tunnel_gro_complete_t gro_complete;
+diff --git a/include/soc/at91/sama7-ddr.h b/include/soc/at91/sama7-ddr.h
+index 9e17247474fa9..6ce3bd22f6c69 100644
+--- a/include/soc/at91/sama7-ddr.h
++++ b/include/soc/at91/sama7-ddr.h
+@@ -38,6 +38,14 @@
+ #define		DDR3PHY_DSGCR_ODTPDD_ODT0	(1 << 20)	/* ODT[0] Power Down Driver */
+ 
+ #define DDR3PHY_ZQ0SR0				(0x188)		/* ZQ status register 0 */
++#define DDR3PHY_ZQ0SR0_PDO_OFF			(0)		/* Pull-down output impedance select offset */
++#define DDR3PHY_ZQ0SR0_PUO_OFF			(5)		/* Pull-up output impedance select offset */
++#define DDR3PHY_ZQ0SR0_PDODT_OFF		(10)		/* Pull-down on-die termination impedance select offset */
++#define DDR3PHY_ZQ0SRO_PUODT_OFF		(15)		/* Pull-up on-die termination impedance select offset */
++
++#define	DDR3PHY_DX0DLLCR			(0x1CC)		/* DDR3PHY DATX8 DLL Control Register */
++#define	DDR3PHY_DX1DLLCR			(0x20C)		/* DDR3PHY DATX8 DLL Control Register */
++#define		DDR3PHY_DXDLLCR_DLLDIS		(1 << 31)	/* DLL Disable */
+ 
+ /* UDDRC */
+ #define UDDRC_STAT				(0x04)		/* UDDRC Operating Mode Status Register */
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index cd155b7e1346d..48833d0edd089 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -4878,6 +4878,10 @@ static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
+ 	if (!req->file->f_op->uring_cmd)
+ 		return -EOPNOTSUPP;
+ 
++	ret = security_uring_cmd(ioucmd);
++	if (ret)
++		return ret;
++
+ 	if (ctx->flags & IORING_SETUP_SQE128)
+ 		issue_flags |= IO_URING_F_SQE128;
+ 	if (ctx->flags & IORING_SETUP_CQE32)
+@@ -8260,6 +8264,7 @@ static void io_queue_async(struct io_kiocb *req, int ret)
+ 
+ 	switch (io_arm_poll_handler(req, 0)) {
+ 	case IO_APOLL_READY:
++		io_kbuf_recycle(req, 0);
+ 		io_req_task_queue(req);
+ 		break;
+ 	case IO_APOLL_ABORTED:
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index ce95aee05e8ae..e702ca368539a 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -2346,6 +2346,47 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+ }
+ EXPORT_SYMBOL_GPL(task_cgroup_path);
+ 
++/**
++ * cgroup_attach_lock - Lock for ->attach()
++ * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem
++ *
++ * cgroup migration sometimes needs to stabilize threadgroups against forks and
++ * exits by write-locking cgroup_threadgroup_rwsem. However, some ->attach()
++ * implementations (e.g. cpuset), also need to disable CPU hotplug.
++ * Unfortunately, letting ->attach() operations acquire cpus_read_lock() can
++ * lead to deadlocks.
++ *
++ * Bringing up a CPU may involve creating and destroying tasks which requires
++ * read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside
++ * cpus_read_lock(). If we call an ->attach() which acquires the cpus lock while
++ * write-locking threadgroup_rwsem, the locking order is reversed and we end up
++ * waiting for an on-going CPU hotplug operation which in turn is waiting for
++ * the threadgroup_rwsem to be released to create new tasks. For more details:
++ *
++ *   http://lkml.kernel.org/r/20220711174629.uehfmqegcwn2lqzu@wubuntu
++ *
++ * Resolve the situation by always acquiring cpus_read_lock() before optionally
++ * write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that
++ * CPU hotplug is disabled on entry.
++ */
++static void cgroup_attach_lock(bool lock_threadgroup)
++{
++	cpus_read_lock();
++	if (lock_threadgroup)
++		percpu_down_write(&cgroup_threadgroup_rwsem);
++}
++
++/**
++ * cgroup_attach_unlock - Undo cgroup_attach_lock()
++ * @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem
++ */
++static void cgroup_attach_unlock(bool lock_threadgroup)
++{
++	if (lock_threadgroup)
++		percpu_up_write(&cgroup_threadgroup_rwsem);
++	cpus_read_unlock();
++}
++
+ /**
+  * cgroup_migrate_add_task - add a migration target task to a migration context
+  * @task: target task
+@@ -2822,8 +2863,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
+ }
+ 
+ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
+-					     bool *locked)
+-	__acquires(&cgroup_threadgroup_rwsem)
++					     bool *threadgroup_locked)
+ {
+ 	struct task_struct *tsk;
+ 	pid_t pid;
+@@ -2840,12 +2880,8 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
+ 	 * Therefore, we can skip the global lock.
+ 	 */
+ 	lockdep_assert_held(&cgroup_mutex);
+-	if (pid || threadgroup) {
+-		percpu_down_write(&cgroup_threadgroup_rwsem);
+-		*locked = true;
+-	} else {
+-		*locked = false;
+-	}
++	*threadgroup_locked = pid || threadgroup;
++	cgroup_attach_lock(*threadgroup_locked);
+ 
+ 	rcu_read_lock();
+ 	if (pid) {
+@@ -2876,17 +2912,14 @@ struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
+ 	goto out_unlock_rcu;
+ 
+ out_unlock_threadgroup:
+-	if (*locked) {
+-		percpu_up_write(&cgroup_threadgroup_rwsem);
+-		*locked = false;
+-	}
++	cgroup_attach_unlock(*threadgroup_locked);
++	*threadgroup_locked = false;
+ out_unlock_rcu:
+ 	rcu_read_unlock();
+ 	return tsk;
+ }
+ 
+-void cgroup_procs_write_finish(struct task_struct *task, bool locked)
+-	__releases(&cgroup_threadgroup_rwsem)
++void cgroup_procs_write_finish(struct task_struct *task, bool threadgroup_locked)
+ {
+ 	struct cgroup_subsys *ss;
+ 	int ssid;
+@@ -2894,8 +2927,8 @@ void cgroup_procs_write_finish(struct task_struct *task, bool locked)
+ 	/* release reference from cgroup_procs_write_start() */
+ 	put_task_struct(task);
+ 
+-	if (locked)
+-		percpu_up_write(&cgroup_threadgroup_rwsem);
++	cgroup_attach_unlock(threadgroup_locked);
++
+ 	for_each_subsys(ss, ssid)
+ 		if (ss->post_attach)
+ 			ss->post_attach();
+@@ -2950,12 +2983,11 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ 	struct cgroup_subsys_state *d_css;
+ 	struct cgroup *dsct;
+ 	struct css_set *src_cset;
++	bool has_tasks;
+ 	int ret;
+ 
+ 	lockdep_assert_held(&cgroup_mutex);
+ 
+-	percpu_down_write(&cgroup_threadgroup_rwsem);
+-
+ 	/* look up all csses currently attached to @cgrp's subtree */
+ 	spin_lock_irq(&css_set_lock);
+ 	cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
+@@ -2966,6 +2998,15 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ 	}
+ 	spin_unlock_irq(&css_set_lock);
+ 
++	/*
++	 * We need to write-lock threadgroup_rwsem while migrating tasks.
++	 * However, if there are no source csets for @cgrp, changing its
++	 * controllers isn't gonna produce any task migrations and the
++	 * write-locking can be skipped safely.
++	 */
++	has_tasks = !list_empty(&mgctx.preloaded_src_csets);
++	cgroup_attach_lock(has_tasks);
++
+ 	/* NULL dst indicates self on default hierarchy */
+ 	ret = cgroup_migrate_prepare_dst(&mgctx);
+ 	if (ret)
+@@ -2985,7 +3026,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+ 	ret = cgroup_migrate_execute(&mgctx);
+ out_finish:
+ 	cgroup_migrate_finish(&mgctx);
+-	percpu_up_write(&cgroup_threadgroup_rwsem);
++	cgroup_attach_unlock(has_tasks);
+ 	return ret;
+ }
+ 
+@@ -4933,13 +4974,13 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
+ 	struct task_struct *task;
+ 	const struct cred *saved_cred;
+ 	ssize_t ret;
+-	bool locked;
++	bool threadgroup_locked;
+ 
+ 	dst_cgrp = cgroup_kn_lock_live(of->kn, false);
+ 	if (!dst_cgrp)
+ 		return -ENODEV;
+ 
+-	task = cgroup_procs_write_start(buf, threadgroup, &locked);
++	task = cgroup_procs_write_start(buf, threadgroup, &threadgroup_locked);
+ 	ret = PTR_ERR_OR_ZERO(task);
+ 	if (ret)
+ 		goto out_unlock;
+@@ -4965,7 +5006,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
+ 	ret = cgroup_attach_task(dst_cgrp, task, threadgroup);
+ 
+ out_finish:
+-	cgroup_procs_write_finish(task, locked);
++	cgroup_procs_write_finish(task, threadgroup_locked);
+ out_unlock:
+ 	cgroup_kn_unlock(of->kn);
+ 
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 58aadfda9b8b3..1f3a55297f39d 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -2289,7 +2289,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
+ 	cgroup_taskset_first(tset, &css);
+ 	cs = css_cs(css);
+ 
+-	cpus_read_lock();
++	lockdep_assert_cpus_held();	/* see cgroup_attach_lock() */
+ 	percpu_down_write(&cpuset_rwsem);
+ 
+ 	guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+@@ -2343,7 +2343,6 @@ static void cpuset_attach(struct cgroup_taskset *tset)
+ 		wake_up(&cpuset_attach_wq);
+ 
+ 	percpu_up_write(&cpuset_rwsem);
+-	cpus_read_unlock();
+ }
+ 
+ /* The various types of files and directories in a cpuset file system */
+diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
+index 5830dce6081b3..ce34d50f7a9bb 100644
+--- a/kernel/dma/swiotlb.c
++++ b/kernel/dma/swiotlb.c
+@@ -464,7 +464,10 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
+ 	}
+ }
+ 
+-#define slot_addr(start, idx)	((start) + ((idx) << IO_TLB_SHIFT))
++static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx)
++{
++	return start + (idx << IO_TLB_SHIFT);
++}
+ 
+ /*
+  * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 9d44f2d46c696..d587c85f35b1e 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1225,6 +1225,7 @@ void mmput_async(struct mm_struct *mm)
+ 		schedule_work(&mm->async_put_work);
+ 	}
+ }
++EXPORT_SYMBOL_GPL(mmput_async);
+ #endif
+ 
+ /**
+diff --git a/kernel/kprobes.c b/kernel/kprobes.c
+index 08350e35aba24..ca9d834d0b843 100644
+--- a/kernel/kprobes.c
++++ b/kernel/kprobes.c
+@@ -1562,6 +1562,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
+ 	/* Ensure it is not in reserved area nor out of text */
+ 	if (!(core_kernel_text((unsigned long) p->addr) ||
+ 	    is_module_text_address((unsigned long) p->addr)) ||
++	    in_gate_area_no_mm((unsigned long) p->addr) ||
+ 	    within_kprobe_blacklist((unsigned long) p->addr) ||
+ 	    jump_label_text_reserved(p->addr, p->addr) ||
+ 	    static_call_text_reserved(p->addr, p->addr) ||
+diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
+index bb3d63bdf4ae8..667876da8382d 100644
+--- a/kernel/sched/debug.c
++++ b/kernel/sched/debug.c
+@@ -416,7 +416,7 @@ void update_sched_domain_debugfs(void)
+ 		char buf[32];
+ 
+ 		snprintf(buf, sizeof(buf), "cpu%d", cpu);
+-		debugfs_remove(debugfs_lookup(buf, sd_dentry));
++		debugfs_lookup_and_remove(buf, sd_dentry);
+ 		d_cpu = debugfs_create_dir(buf, sd_dentry);
+ 
+ 		i = 0;
+diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
+index cb866c3141af2..918730d749325 100644
+--- a/kernel/trace/trace_events_trigger.c
++++ b/kernel/trace/trace_events_trigger.c
+@@ -142,7 +142,8 @@ static bool check_user_trigger(struct trace_event_file *file)
+ {
+ 	struct event_trigger_data *data;
+ 
+-	list_for_each_entry_rcu(data, &file->triggers, list) {
++	list_for_each_entry_rcu(data, &file->triggers, list,
++				lockdep_is_held(&event_mutex)) {
+ 		if (data->flags & EVENT_TRIGGER_FL_PROBE)
+ 			continue;
+ 		return true;
+diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c
+index 95b58bd757ce4..1e130da1b742c 100644
+--- a/kernel/trace/trace_preemptirq.c
++++ b/kernel/trace/trace_preemptirq.c
+@@ -95,14 +95,14 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
+ 	}
+ 
+ 	lockdep_hardirqs_on_prepare();
+-	lockdep_hardirqs_on(CALLER_ADDR0);
++	lockdep_hardirqs_on(caller_addr);
+ }
+ EXPORT_SYMBOL(trace_hardirqs_on_caller);
+ NOKPROBE_SYMBOL(trace_hardirqs_on_caller);
+ 
+ __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
+ {
+-	lockdep_hardirqs_off(CALLER_ADDR0);
++	lockdep_hardirqs_off(caller_addr);
+ 
+ 	if (!this_cpu_read(tracing_irq_cpu)) {
+ 		this_cpu_write(tracing_irq_cpu, 1);
+diff --git a/mm/kmemleak.c b/mm/kmemleak.c
+index a182f5ddaf68b..acd7cbb82e160 100644
+--- a/mm/kmemleak.c
++++ b/mm/kmemleak.c
+@@ -1132,7 +1132,7 @@ EXPORT_SYMBOL(kmemleak_no_scan);
+ void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count,
+ 			       gfp_t gfp)
+ {
+-	if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
++	if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
+ 		kmemleak_alloc(__va(phys), size, min_count, gfp);
+ }
+ EXPORT_SYMBOL(kmemleak_alloc_phys);
+@@ -1146,7 +1146,7 @@ EXPORT_SYMBOL(kmemleak_alloc_phys);
+  */
+ void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size)
+ {
+-	if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
++	if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
+ 		kmemleak_free_part(__va(phys), size);
+ }
+ EXPORT_SYMBOL(kmemleak_free_part_phys);
+@@ -1158,7 +1158,7 @@ EXPORT_SYMBOL(kmemleak_free_part_phys);
+  */
+ void __ref kmemleak_not_leak_phys(phys_addr_t phys)
+ {
+-	if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
++	if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
+ 		kmemleak_not_leak(__va(phys));
+ }
+ EXPORT_SYMBOL(kmemleak_not_leak_phys);
+@@ -1170,7 +1170,7 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys);
+  */
+ void __ref kmemleak_ignore_phys(phys_addr_t phys)
+ {
+-	if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
++	if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
+ 		kmemleak_ignore(__va(phys));
+ }
+ EXPORT_SYMBOL(kmemleak_ignore_phys);
+diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
+index ff47790366497..f20f4373ff408 100644
+--- a/net/bridge/br_netfilter_hooks.c
++++ b/net/bridge/br_netfilter_hooks.c
+@@ -384,6 +384,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
+ 				/* - Bridged-and-DNAT'ed traffic doesn't
+ 				 *   require ip_forwarding. */
+ 				if (rt->dst.dev == dev) {
++					skb_dst_drop(skb);
+ 					skb_dst_set(skb, &rt->dst);
+ 					goto bridged_dnat;
+ 				}
+@@ -413,6 +414,7 @@ bridged_dnat:
+ 			kfree_skb(skb);
+ 			return 0;
+ 		}
++		skb_dst_drop(skb);
+ 		skb_dst_set_noref(skb, &rt->dst);
+ 	}
+ 
+diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
+index e4e0c836c3f51..6b07f30675bb0 100644
+--- a/net/bridge/br_netfilter_ipv6.c
++++ b/net/bridge/br_netfilter_ipv6.c
+@@ -197,6 +197,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
+ 			kfree_skb(skb);
+ 			return 0;
+ 		}
++		skb_dst_drop(skb);
+ 		skb_dst_set_noref(skb, &rt->dst);
+ 	}
+ 
+diff --git a/net/core/datagram.c b/net/core/datagram.c
+index 50f4faeea76cc..48e82438acb02 100644
+--- a/net/core/datagram.c
++++ b/net/core/datagram.c
+@@ -675,7 +675,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
+ 				page_ref_sub(last_head, refs);
+ 				refs = 0;
+ 			}
+-			skb_fill_page_desc(skb, frag++, head, start, size);
++			skb_fill_page_desc_noacc(skb, frag++, head, start, size);
+ 		}
+ 		if (refs)
+ 			page_ref_sub(last_head, refs);
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index bebf58464d667..4b2b07a9422cf 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -4179,9 +4179,8 @@ normal:
+ 				SKB_GSO_CB(nskb)->csum_start =
+ 					skb_headroom(nskb) + doffset;
+ 			} else {
+-				skb_copy_bits(head_skb, offset,
+-					      skb_put(nskb, len),
+-					      len);
++				if (skb_copy_bits(head_skb, offset, skb_put(nskb, len), len))
++					goto err;
+ 			}
+ 			continue;
+ 		}
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 3d446773ff2a5..ab03977b65781 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1015,7 +1015,7 @@ new_segment:
+ 		skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+ 	} else {
+ 		get_page(page);
+-		skb_fill_page_desc(skb, i, page, offset, copy);
++		skb_fill_page_desc_noacc(skb, i, page, offset, copy);
+ 	}
+ 
+ 	if (!(flags & MSG_NO_SHARED_FRAGS))
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index e5435156e545d..c30696eafc361 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2514,6 +2514,21 @@ static inline bool tcp_may_undo(const struct tcp_sock *tp)
+ 	return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
+ }
+ 
++static bool tcp_is_non_sack_preventing_reopen(struct sock *sk)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++
++	if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
++		/* Hold old state until something *above* high_seq
++		 * is ACKed. For Reno it is MUST to prevent false
++		 * fast retransmits (RFC2582). SACK TCP is safe. */
++		if (!tcp_any_retrans_done(sk))
++			tp->retrans_stamp = 0;
++		return true;
++	}
++	return false;
++}
++
+ /* People celebrate: "We love our President!" */
+ static bool tcp_try_undo_recovery(struct sock *sk)
+ {
+@@ -2536,14 +2551,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
+ 	} else if (tp->rack.reo_wnd_persist) {
+ 		tp->rack.reo_wnd_persist--;
+ 	}
+-	if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
+-		/* Hold old state until something *above* high_seq
+-		 * is ACKed. For Reno it is MUST to prevent false
+-		 * fast retransmits (RFC2582). SACK TCP is safe. */
+-		if (!tcp_any_retrans_done(sk))
+-			tp->retrans_stamp = 0;
++	if (tcp_is_non_sack_preventing_reopen(sk))
+ 		return true;
+-	}
+ 	tcp_set_ca_state(sk, TCP_CA_Open);
+ 	tp->is_sack_reneg = 0;
+ 	return false;
+@@ -2579,6 +2588,8 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
+ 			NET_INC_STATS(sock_net(sk),
+ 					LINUX_MIB_TCPSPURIOUSRTOS);
+ 		inet_csk(sk)->icsk_retransmits = 0;
++		if (tcp_is_non_sack_preventing_reopen(sk))
++			return true;
+ 		if (frto_undo || tcp_is_sack(tp)) {
+ 			tcp_set_ca_state(sk, TCP_CA_Open);
+ 			tp->is_sack_reneg = 0;
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index aa9f2ec3dc468..01e1d36bdf135 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -781,6 +781,8 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
+ 	 */
+ 	if (tunnel) {
+ 		/* ...not for tunnels though: we don't have a sending socket */
++		if (udp_sk(sk)->encap_err_rcv)
++			udp_sk(sk)->encap_err_rcv(sk, skb, iph->ihl << 2);
+ 		goto out;
+ 	}
+ 	if (!inet->recverr) {
+diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
+index 8efaf8c3fe2a9..8242c8947340e 100644
+--- a/net/ipv4/udp_tunnel_core.c
++++ b/net/ipv4/udp_tunnel_core.c
+@@ -72,6 +72,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
+ 
+ 	udp_sk(sk)->encap_type = cfg->encap_type;
+ 	udp_sk(sk)->encap_rcv = cfg->encap_rcv;
++	udp_sk(sk)->encap_err_rcv = cfg->encap_err_rcv;
+ 	udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup;
+ 	udp_sk(sk)->encap_destroy = cfg->encap_destroy;
+ 	udp_sk(sk)->gro_receive = cfg->gro_receive;
+diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+index b738eb7e1cae8..04cf06866e765 100644
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -3557,11 +3557,15 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
+ 		fallthrough;
+ 	case NETDEV_UP:
+ 	case NETDEV_CHANGE:
+-		if (dev->flags & IFF_SLAVE)
++		if (idev && idev->cnf.disable_ipv6)
+ 			break;
+ 
+-		if (idev && idev->cnf.disable_ipv6)
++		if (dev->flags & IFF_SLAVE) {
++			if (event == NETDEV_UP && !IS_ERR_OR_NULL(idev) &&
++			    dev->flags & IFF_UP && dev->flags & IFF_MULTICAST)
++				ipv6_mc_up(idev);
+ 			break;
++		}
+ 
+ 		if (event == NETDEV_UP) {
+ 			/* restore routes for permanent addresses */
+diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
+index 73aaabf0e9665..0b0e34ddc64e0 100644
+--- a/net/ipv6/seg6.c
++++ b/net/ipv6/seg6.c
+@@ -191,6 +191,11 @@ static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info)
+ 		goto out_unlock;
+ 	}
+ 
++	if (slen > nla_len(info->attrs[SEG6_ATTR_SECRET])) {
++		err = -EINVAL;
++		goto out_unlock;
++	}
++
+ 	if (hinfo) {
+ 		err = seg6_hmac_info_del(net, hmackeyid);
+ 		if (err)
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index e2f2e087a7531..40074bc7274ea 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -616,8 +616,11 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ 	}
+ 
+ 	/* Tunnels don't have an application socket: don't pass errors back */
+-	if (tunnel)
++	if (tunnel) {
++		if (udp_sk(sk)->encap_err_rcv)
++			udp_sk(sk)->encap_err_rcv(sk, skb, offset);
+ 		goto out;
++	}
+ 
+ 	if (!np->recverr) {
+ 		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
+diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
+index 1796c456ac98b..992decbcaa5c1 100644
+--- a/net/netfilter/nf_conntrack_irc.c
++++ b/net/netfilter/nf_conntrack_irc.c
+@@ -194,8 +194,9 @@ static int help(struct sk_buff *skb, unsigned int protoff,
+ 
+ 			/* dcc_ip can be the internal OR external (NAT'ed) IP */
+ 			tuple = &ct->tuplehash[dir].tuple;
+-			if (tuple->src.u3.ip != dcc_ip &&
+-			    tuple->dst.u3.ip != dcc_ip) {
++			if ((tuple->src.u3.ip != dcc_ip &&
++			     ct->tuplehash[!dir].tuple.dst.u3.ip != dcc_ip) ||
++			    dcc_port == 0) {
+ 				net_warn_ratelimited("Forged DCC command from %pI4: %pI4:%u\n",
+ 						     &tuple->src.u3.ip,
+ 						     &dcc_ip, dcc_port);
+diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
+index a63b51dceaf2c..a634c72b1ffcf 100644
+--- a/net/netfilter/nf_conntrack_proto_tcp.c
++++ b/net/netfilter/nf_conntrack_proto_tcp.c
+@@ -655,6 +655,37 @@ static bool tcp_in_window(struct nf_conn *ct,
+ 		    tn->tcp_be_liberal)
+ 			res = true;
+ 		if (!res) {
++			bool seq_ok = before(seq, sender->td_maxend + 1);
++
++			if (!seq_ok) {
++				u32 overshot = end - sender->td_maxend + 1;
++				bool ack_ok;
++
++				ack_ok = after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1);
++
++				if (in_recv_win &&
++				    ack_ok &&
++				    overshot <= receiver->td_maxwin &&
++				    before(sack, receiver->td_end + 1)) {
++					/* Work around TCPs that send more bytes than allowed by
++					 * the receive window.
++					 *
++					 * If the (marked as invalid) packet is allowed to pass by
++					 * the ruleset and the peer acks this data, then its possible
++					 * all future packets will trigger 'ACK is over upper bound' check.
++					 *
++					 * Thus if only the sequence check fails then do update td_end so
++					 * possible ACK for this data can update internal state.
++					 */
++					sender->td_end = end;
++					sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
++
++					nf_ct_l4proto_log_invalid(skb, ct, hook_state,
++								  "%u bytes more than expected", overshot);
++					return res;
++				}
++			}
++
+ 			nf_ct_l4proto_log_invalid(skb, ct, hook_state,
+ 			"%s",
+ 			before(seq, sender->td_maxend + 1) ?
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index bc690238a3c56..848cc81d69926 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -2166,8 +2166,10 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
+ 	chain->flags |= NFT_CHAIN_BASE | flags;
+ 	basechain->policy = NF_ACCEPT;
+ 	if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+-	    !nft_chain_offload_support(basechain))
++	    !nft_chain_offload_support(basechain)) {
++		list_splice_init(&basechain->hook_list, &hook->list);
+ 		return -EOPNOTSUPP;
++	}
+ 
+ 	flow_block_init(&basechain->flow_block);
+ 
+diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
+index 571436064cd6f..62c70709d7980 100644
+--- a/net/rxrpc/ar-internal.h
++++ b/net/rxrpc/ar-internal.h
+@@ -982,6 +982,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *);
+ /*
+  * peer_event.c
+  */
++void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset);
+ void rxrpc_error_report(struct sock *);
+ void rxrpc_peer_keepalive_worker(struct work_struct *);
+ 
+diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
+index 96ecb7356c0fe..79bb02eb67b2b 100644
+--- a/net/rxrpc/local_object.c
++++ b/net/rxrpc/local_object.c
+@@ -137,6 +137,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
+ 
+ 	tuncfg.encap_type = UDP_ENCAP_RXRPC;
+ 	tuncfg.encap_rcv = rxrpc_input_packet;
++	tuncfg.encap_err_rcv = rxrpc_encap_err_rcv;
+ 	tuncfg.sk_user_data = local;
+ 	setup_udp_tunnel_sock(net, local->socket, &tuncfg);
+ 
+diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
+index be032850ae8ca..32561e9567fe3 100644
+--- a/net/rxrpc/peer_event.c
++++ b/net/rxrpc/peer_event.c
+@@ -16,22 +16,105 @@
+ #include <net/sock.h>
+ #include <net/af_rxrpc.h>
+ #include <net/ip.h>
++#include <net/icmp.h>
+ #include "ar-internal.h"
+ 
++static void rxrpc_adjust_mtu(struct rxrpc_peer *, unsigned int);
+ static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *);
+ static void rxrpc_distribute_error(struct rxrpc_peer *, int,
+ 				   enum rxrpc_call_completion);
+ 
+ /*
+- * Find the peer associated with an ICMP packet.
++ * Find the peer associated with an ICMPv4 packet.
+  */
+ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
+-						     const struct sk_buff *skb,
++						     struct sk_buff *skb,
++						     unsigned int udp_offset,
++						     unsigned int *info,
+ 						     struct sockaddr_rxrpc *srx)
+ {
+-	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
++	struct iphdr *ip, *ip0 = ip_hdr(skb);
++	struct icmphdr *icmp = icmp_hdr(skb);
++	struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
+ 
+-	_enter("");
++	_enter("%u,%u,%u", ip0->protocol, icmp->type, icmp->code);
++
++	switch (icmp->type) {
++	case ICMP_DEST_UNREACH:
++		*info = ntohs(icmp->un.frag.mtu);
++		fallthrough;
++	case ICMP_TIME_EXCEEDED:
++	case ICMP_PARAMETERPROB:
++		ip = (struct iphdr *)((void *)icmp + 8);
++		break;
++	default:
++		return NULL;
++	}
++
++	memset(srx, 0, sizeof(*srx));
++	srx->transport_type = local->srx.transport_type;
++	srx->transport_len = local->srx.transport_len;
++	srx->transport.family = local->srx.transport.family;
++
++	/* Can we see an ICMP4 packet on an ICMP6 listening socket?  and vice
++	 * versa?
++	 */
++	switch (srx->transport.family) {
++	case AF_INET:
++		srx->transport_len = sizeof(srx->transport.sin);
++		srx->transport.family = AF_INET;
++		srx->transport.sin.sin_port = udp->dest;
++		memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
++		       sizeof(struct in_addr));
++		break;
++
++#ifdef CONFIG_AF_RXRPC_IPV6
++	case AF_INET6:
++		srx->transport_len = sizeof(srx->transport.sin);
++		srx->transport.family = AF_INET;
++		srx->transport.sin.sin_port = udp->dest;
++		memcpy(&srx->transport.sin.sin_addr, &ip->daddr,
++		       sizeof(struct in_addr));
++		break;
++#endif
++
++	default:
++		WARN_ON_ONCE(1);
++		return NULL;
++	}
++
++	_net("ICMP {%pISp}", &srx->transport);
++	return rxrpc_lookup_peer_rcu(local, srx);
++}
++
++#ifdef CONFIG_AF_RXRPC_IPV6
++/*
++ * Find the peer associated with an ICMPv6 packet.
++ */
++static struct rxrpc_peer *rxrpc_lookup_peer_icmp6_rcu(struct rxrpc_local *local,
++						      struct sk_buff *skb,
++						      unsigned int udp_offset,
++						      unsigned int *info,
++						      struct sockaddr_rxrpc *srx)
++{
++	struct icmp6hdr *icmp = icmp6_hdr(skb);
++	struct ipv6hdr *ip, *ip0 = ipv6_hdr(skb);
++	struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset);
++
++	_enter("%u,%u,%u", ip0->nexthdr, icmp->icmp6_type, icmp->icmp6_code);
++
++	switch (icmp->icmp6_type) {
++	case ICMPV6_DEST_UNREACH:
++		*info = ntohl(icmp->icmp6_mtu);
++		fallthrough;
++	case ICMPV6_PKT_TOOBIG:
++	case ICMPV6_TIME_EXCEED:
++	case ICMPV6_PARAMPROB:
++		ip = (struct ipv6hdr *)((void *)icmp + 8);
++		break;
++	default:
++		return NULL;
++	}
+ 
+ 	memset(srx, 0, sizeof(*srx));
+ 	srx->transport_type = local->srx.transport_type;
+@@ -41,6 +124,165 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
+ 	/* Can we see an ICMP4 packet on an ICMP6 listening socket?  and vice
+ 	 * versa?
+ 	 */
++	switch (srx->transport.family) {
++	case AF_INET:
++		_net("Rx ICMP6 on v4 sock");
++		srx->transport_len = sizeof(srx->transport.sin);
++		srx->transport.family = AF_INET;
++		srx->transport.sin.sin_port = udp->dest;
++		memcpy(&srx->transport.sin.sin_addr,
++		       &ip->daddr.s6_addr32[3], sizeof(struct in_addr));
++		break;
++	case AF_INET6:
++		_net("Rx ICMP6");
++		srx->transport.sin.sin_port = udp->dest;
++		memcpy(&srx->transport.sin6.sin6_addr, &ip->daddr,
++		       sizeof(struct in6_addr));
++		break;
++	default:
++		WARN_ON_ONCE(1);
++		return NULL;
++	}
++
++	_net("ICMP {%pISp}", &srx->transport);
++	return rxrpc_lookup_peer_rcu(local, srx);
++}
++#endif /* CONFIG_AF_RXRPC_IPV6 */
++
++/*
++ * Handle an error received on the local endpoint as a tunnel.
++ */
++void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb,
++			 unsigned int udp_offset)
++{
++	struct sock_extended_err ee;
++	struct sockaddr_rxrpc srx;
++	struct rxrpc_local *local;
++	struct rxrpc_peer *peer;
++	unsigned int info = 0;
++	int err;
++	u8 version = ip_hdr(skb)->version;
++	u8 type = icmp_hdr(skb)->type;
++	u8 code = icmp_hdr(skb)->code;
++
++	rcu_read_lock();
++	local = rcu_dereference_sk_user_data(sk);
++	if (unlikely(!local)) {
++		rcu_read_unlock();
++		return;
++	}
++
++	rxrpc_new_skb(skb, rxrpc_skb_received);
++
++	switch (ip_hdr(skb)->version) {
++	case IPVERSION:
++		peer = rxrpc_lookup_peer_icmp_rcu(local, skb, udp_offset,
++						  &info, &srx);
++		break;
++#ifdef CONFIG_AF_RXRPC_IPV6
++	case 6:
++		peer = rxrpc_lookup_peer_icmp6_rcu(local, skb, udp_offset,
++						   &info, &srx);
++		break;
++#endif
++	default:
++		rcu_read_unlock();
++		return;
++	}
++
++	if (peer && !rxrpc_get_peer_maybe(peer))
++		peer = NULL;
++	if (!peer) {
++		rcu_read_unlock();
++		return;
++	}
++
++	memset(&ee, 0, sizeof(ee));
++
++	switch (version) {
++	case IPVERSION:
++		switch (type) {
++		case ICMP_DEST_UNREACH:
++			switch (code) {
++			case ICMP_FRAG_NEEDED:
++				rxrpc_adjust_mtu(peer, info);
++				rcu_read_unlock();
++				rxrpc_put_peer(peer);
++				return;
++			default:
++				break;
++			}
++
++			err = EHOSTUNREACH;
++			if (code <= NR_ICMP_UNREACH) {
++				/* Might want to do something different with
++				 * non-fatal errors
++				 */
++				//harderr = icmp_err_convert[code].fatal;
++				err = icmp_err_convert[code].errno;
++			}
++			break;
++
++		case ICMP_TIME_EXCEEDED:
++			err = EHOSTUNREACH;
++			break;
++		default:
++			err = EPROTO;
++			break;
++		}
++
++		ee.ee_origin = SO_EE_ORIGIN_ICMP;
++		ee.ee_type = type;
++		ee.ee_code = code;
++		ee.ee_errno = err;
++		break;
++
++#ifdef CONFIG_AF_RXRPC_IPV6
++	case 6:
++		switch (type) {
++		case ICMPV6_PKT_TOOBIG:
++			rxrpc_adjust_mtu(peer, info);
++			rcu_read_unlock();
++			rxrpc_put_peer(peer);
++			return;
++		}
++
++		icmpv6_err_convert(type, code, &err);
++
++		if (err == EACCES)
++			err = EHOSTUNREACH;
++
++		ee.ee_origin = SO_EE_ORIGIN_ICMP6;
++		ee.ee_type = type;
++		ee.ee_code = code;
++		ee.ee_errno = err;
++		break;
++#endif
++	}
++
++	trace_rxrpc_rx_icmp(peer, &ee, &srx);
++
++	rxrpc_distribute_error(peer, err, RXRPC_CALL_NETWORK_ERROR);
++	rcu_read_unlock();
++	rxrpc_put_peer(peer);
++}
++
++/*
++ * Find the peer associated with a local error.
++ */
++static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local,
++						      const struct sk_buff *skb,
++						      struct sockaddr_rxrpc *srx)
++{
++	struct sock_exterr_skb *serr = SKB_EXT_ERR(skb);
++
++	_enter("");
++
++	memset(srx, 0, sizeof(*srx));
++	srx->transport_type = local->srx.transport_type;
++	srx->transport_len = local->srx.transport_len;
++	srx->transport.family = local->srx.transport.family;
++
+ 	switch (srx->transport.family) {
+ 	case AF_INET:
+ 		srx->transport_len = sizeof(srx->transport.sin);
+@@ -104,10 +346,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
+ /*
+  * Handle an MTU/fragmentation problem.
+  */
+-static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *serr)
++static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu)
+ {
+-	u32 mtu = serr->ee.ee_info;
+-
+ 	_net("Rx ICMP Fragmentation Needed (%d)", mtu);
+ 
+ 	/* wind down the local interface MTU */
+@@ -148,7 +388,7 @@ void rxrpc_error_report(struct sock *sk)
+ 	struct sock_exterr_skb *serr;
+ 	struct sockaddr_rxrpc srx;
+ 	struct rxrpc_local *local;
+-	struct rxrpc_peer *peer;
++	struct rxrpc_peer *peer = NULL;
+ 	struct sk_buff *skb;
+ 
+ 	rcu_read_lock();
+@@ -172,41 +412,20 @@ void rxrpc_error_report(struct sock *sk)
+ 	}
+ 	rxrpc_new_skb(skb, rxrpc_skb_received);
+ 	serr = SKB_EXT_ERR(skb);
+-	if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
+-		_leave("UDP empty message");
+-		rcu_read_unlock();
+-		rxrpc_free_skb(skb, rxrpc_skb_freed);
+-		return;
+-	}
+ 
+-	peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx);
+-	if (peer && !rxrpc_get_peer_maybe(peer))
+-		peer = NULL;
+-	if (!peer) {
+-		rcu_read_unlock();
+-		rxrpc_free_skb(skb, rxrpc_skb_freed);
+-		_leave(" [no peer]");
+-		return;
+-	}
+-
+-	trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
+-
+-	if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP &&
+-	     serr->ee.ee_type == ICMP_DEST_UNREACH &&
+-	     serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
+-		rxrpc_adjust_mtu(peer, serr);
+-		rcu_read_unlock();
+-		rxrpc_free_skb(skb, rxrpc_skb_freed);
+-		rxrpc_put_peer(peer);
+-		_leave(" [MTU update]");
+-		return;
++	if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) {
++		peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx);
++		if (peer && !rxrpc_get_peer_maybe(peer))
++			peer = NULL;
++		if (peer) {
++			trace_rxrpc_rx_icmp(peer, &serr->ee, &srx);
++			rxrpc_store_error(peer, serr);
++		}
+ 	}
+ 
+-	rxrpc_store_error(peer, serr);
+ 	rcu_read_unlock();
+ 	rxrpc_free_skb(skb, rxrpc_skb_freed);
+ 	rxrpc_put_peer(peer);
+-
+ 	_leave("");
+ }
+ 
+diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
+index 08aab5c01437d..db47844f4ac99 100644
+--- a/net/rxrpc/rxkad.c
++++ b/net/rxrpc/rxkad.c
+@@ -540,7 +540,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
+ 	 * directly into the target buffer.
+ 	 */
+ 	sg = _sg;
+-	nsg = skb_shinfo(skb)->nr_frags;
++	nsg = skb_shinfo(skb)->nr_frags + 1;
+ 	if (nsg <= 4) {
+ 		nsg = 4;
+ 	} else {
+diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
+index 3d061a13d7ed2..2829455211f8c 100644
+--- a/net/sched/sch_sfb.c
++++ b/net/sched/sch_sfb.c
+@@ -135,15 +135,15 @@ static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q)
+ 	}
+ }
+ 
+-static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
++static void increment_qlen(const struct sfb_skb_cb *cb, struct sfb_sched_data *q)
+ {
+ 	u32 sfbhash;
+ 
+-	sfbhash = sfb_hash(skb, 0);
++	sfbhash = cb->hashes[0];
+ 	if (sfbhash)
+ 		increment_one_qlen(sfbhash, 0, q);
+ 
+-	sfbhash = sfb_hash(skb, 1);
++	sfbhash = cb->hashes[1];
+ 	if (sfbhash)
+ 		increment_one_qlen(sfbhash, 1, q);
+ }
+@@ -281,8 +281,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ {
+ 
+ 	struct sfb_sched_data *q = qdisc_priv(sch);
++	unsigned int len = qdisc_pkt_len(skb);
+ 	struct Qdisc *child = q->qdisc;
+ 	struct tcf_proto *fl;
++	struct sfb_skb_cb cb;
+ 	int i;
+ 	u32 p_min = ~0;
+ 	u32 minqlen = ~0;
+@@ -399,11 +401,12 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ 	}
+ 
+ enqueue:
++	memcpy(&cb, sfb_skb_cb(skb), sizeof(cb));
+ 	ret = qdisc_enqueue(skb, child, to_free);
+ 	if (likely(ret == NET_XMIT_SUCCESS)) {
+-		qdisc_qstats_backlog_inc(sch, skb);
++		sch->qstats.backlog += len;
+ 		sch->q.qlen++;
+-		increment_qlen(skb, q);
++		increment_qlen(&cb, q);
+ 	} else if (net_xmit_drop_count(ret)) {
+ 		q->stats.childdrop++;
+ 		qdisc_qstats_drop(sch);
+diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
+index b9c71a304d399..0b941dd63d268 100644
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -18,6 +18,7 @@
+ #include <linux/module.h>
+ #include <linux/spinlock.h>
+ #include <linux/rcupdate.h>
++#include <linux/time.h>
+ #include <net/netlink.h>
+ #include <net/pkt_sched.h>
+ #include <net/pkt_cls.h>
+@@ -176,7 +177,7 @@ static ktime_t get_interval_end_time(struct sched_gate_list *sched,
+ 
+ static int length_to_duration(struct taprio_sched *q, int len)
+ {
+-	return div_u64(len * atomic64_read(&q->picos_per_byte), 1000);
++	return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC);
+ }
+ 
+ /* Returns the entry corresponding to next available interval. If
+@@ -551,7 +552,7 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch)
+ static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
+ {
+ 	atomic_set(&entry->budget,
+-		   div64_u64((u64)entry->interval * 1000,
++		   div64_u64((u64)entry->interval * PSEC_PER_NSEC,
+ 			     atomic64_read(&q->picos_per_byte)));
+ }
+ 
+diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
+index f40f6ed0fbdb4..1f3bb1f6b1f7b 100644
+--- a/net/smc/smc_core.c
++++ b/net/smc/smc_core.c
+@@ -755,6 +755,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
+ 	lnk->lgr = lgr;
+ 	smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
+ 	lnk->link_idx = link_idx;
++	lnk->wr_rx_id_compl = 0;
+ 	smc_ibdev_cnt_inc(lnk);
+ 	smcr_copy_dev_info_to_link(lnk);
+ 	atomic_set(&lnk->conn_cnt, 0);
+diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
+index 4cb03e9423648..7b43a78c7f73a 100644
+--- a/net/smc/smc_core.h
++++ b/net/smc/smc_core.h
+@@ -115,8 +115,10 @@ struct smc_link {
+ 	dma_addr_t		wr_rx_dma_addr;	/* DMA address of wr_rx_bufs */
+ 	dma_addr_t		wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/
+ 	u64			wr_rx_id;	/* seq # of last recv WR */
++	u64			wr_rx_id_compl; /* seq # of last completed WR */
+ 	u32			wr_rx_cnt;	/* number of WR recv buffers */
+ 	unsigned long		wr_rx_tstamp;	/* jiffies when last buf rx */
++	wait_queue_head_t       wr_rx_empty_wait; /* wait for RQ empty */
+ 
+ 	struct ib_reg_wr	wr_reg;		/* WR register memory region */
+ 	wait_queue_head_t	wr_reg_wait;	/* wait for wr_reg result */
+diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
+index 26f8f240d9e84..b0678a417e09d 100644
+--- a/net/smc/smc_wr.c
++++ b/net/smc/smc_wr.c
+@@ -454,6 +454,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
+ 
+ 	for (i = 0; i < num; i++) {
+ 		link = wc[i].qp->qp_context;
++		link->wr_rx_id_compl = wc[i].wr_id;
+ 		if (wc[i].status == IB_WC_SUCCESS) {
+ 			link->wr_rx_tstamp = jiffies;
+ 			smc_wr_rx_demultiplex(&wc[i]);
+@@ -465,6 +466,8 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
+ 			case IB_WC_RNR_RETRY_EXC_ERR:
+ 			case IB_WC_WR_FLUSH_ERR:
+ 				smcr_link_down_cond_sched(link);
++				if (link->wr_rx_id_compl == link->wr_rx_id)
++					wake_up(&link->wr_rx_empty_wait);
+ 				break;
+ 			default:
+ 				smc_wr_rx_post(link); /* refill WR RX */
+@@ -639,6 +642,7 @@ void smc_wr_free_link(struct smc_link *lnk)
+ 		return;
+ 	ibdev = lnk->smcibdev->ibdev;
+ 
++	smc_wr_drain_cq(lnk);
+ 	smc_wr_wakeup_reg_wait(lnk);
+ 	smc_wr_wakeup_tx_wait(lnk);
+ 
+@@ -889,6 +893,7 @@ int smc_wr_create_link(struct smc_link *lnk)
+ 	atomic_set(&lnk->wr_tx_refcnt, 0);
+ 	init_waitqueue_head(&lnk->wr_reg_wait);
+ 	atomic_set(&lnk->wr_reg_refcnt, 0);
++	init_waitqueue_head(&lnk->wr_rx_empty_wait);
+ 	return rc;
+ 
+ dma_unmap:
+diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
+index a54e90a1110fd..45e9b894d3f8a 100644
+--- a/net/smc/smc_wr.h
++++ b/net/smc/smc_wr.h
+@@ -73,6 +73,11 @@ static inline void smc_wr_tx_link_put(struct smc_link *link)
+ 		wake_up_all(&link->wr_tx_wait);
+ }
+ 
++static inline void smc_wr_drain_cq(struct smc_link *lnk)
++{
++	wait_event(lnk->wr_rx_empty_wait, lnk->wr_rx_id_compl == lnk->wr_rx_id);
++}
++
+ static inline void smc_wr_wakeup_tx_wait(struct smc_link *lnk)
+ {
+ 	wake_up_all(&lnk->wr_tx_wait);
+diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
+index 2f4d23238a7e3..9618e4429f0fe 100644
+--- a/net/tipc/monitor.c
++++ b/net/tipc/monitor.c
+@@ -160,7 +160,7 @@ static void map_set(u64 *up_map, int i, unsigned int v)
+ 
+ static int map_get(u64 up_map, int i)
+ {
+-	return (up_map & (1 << i)) >> i;
++	return (up_map & (1ULL << i)) >> i;
+ }
+ 
+ static struct tipc_peer *peer_prev(struct tipc_peer *peer)
+diff --git a/security/security.c b/security/security.c
+index 188b8f7822206..8b62654ff3f97 100644
+--- a/security/security.c
++++ b/security/security.c
+@@ -2654,4 +2654,8 @@ int security_uring_sqpoll(void)
+ {
+ 	return call_int_hook(uring_sqpoll, 0);
+ }
++int security_uring_cmd(struct io_uring_cmd *ioucmd)
++{
++	return call_int_hook(uring_cmd, 0, ioucmd);
++}
+ #endif /* CONFIG_IO_URING */
+diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
+index 1bbd53321d133..e90dfa36f79aa 100644
+--- a/security/selinux/hooks.c
++++ b/security/selinux/hooks.c
+@@ -91,6 +91,7 @@
+ #include <uapi/linux/mount.h>
+ #include <linux/fsnotify.h>
+ #include <linux/fanotify.h>
++#include <linux/io_uring.h>
+ 
+ #include "avc.h"
+ #include "objsec.h"
+@@ -6990,6 +6991,28 @@ static int selinux_uring_sqpoll(void)
+ 	return avc_has_perm(&selinux_state, sid, sid,
+ 			    SECCLASS_IO_URING, IO_URING__SQPOLL, NULL);
+ }
++
++/**
++ * selinux_uring_cmd - check if IORING_OP_URING_CMD is allowed
++ * @ioucmd: the io_uring command structure
++ *
++ * Check to see if the current domain is allowed to execute an
++ * IORING_OP_URING_CMD against the device/file specified in @ioucmd.
++ *
++ */
++static int selinux_uring_cmd(struct io_uring_cmd *ioucmd)
++{
++	struct file *file = ioucmd->file;
++	struct inode *inode = file_inode(file);
++	struct inode_security_struct *isec = selinux_inode(inode);
++	struct common_audit_data ad;
++
++	ad.type = LSM_AUDIT_DATA_FILE;
++	ad.u.file = file;
++
++	return avc_has_perm(&selinux_state, current_sid(), isec->sid,
++			    SECCLASS_IO_URING, IO_URING__CMD, &ad);
++}
+ #endif /* CONFIG_IO_URING */
+ 
+ /*
+@@ -7234,6 +7257,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
+ #ifdef CONFIG_IO_URING
+ 	LSM_HOOK_INIT(uring_override_creds, selinux_uring_override_creds),
+ 	LSM_HOOK_INIT(uring_sqpoll, selinux_uring_sqpoll),
++	LSM_HOOK_INIT(uring_cmd, selinux_uring_cmd),
+ #endif
+ 
+ 	/*
+diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
+index ff757ae5f2537..1c2f41ff4e551 100644
+--- a/security/selinux/include/classmap.h
++++ b/security/selinux/include/classmap.h
+@@ -253,7 +253,7 @@ const struct security_class_mapping secclass_map[] = {
+ 	{ "anon_inode",
+ 	  { COMMON_FILE_PERMS, NULL } },
+ 	{ "io_uring",
+-	  { "override_creds", "sqpoll", NULL } },
++	  { "override_creds", "sqpoll", "cmd", NULL } },
+ 	{ NULL }
+   };
+ 
+diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
+index 6207762dbdb13..b30e20f64471c 100644
+--- a/security/smack/smack_lsm.c
++++ b/security/smack/smack_lsm.c
+@@ -42,6 +42,7 @@
+ #include <linux/fs_context.h>
+ #include <linux/fs_parser.h>
+ #include <linux/watch_queue.h>
++#include <linux/io_uring.h>
+ #include "smack.h"
+ 
+ #define TRANS_TRUE	"TRUE"
+@@ -4739,6 +4740,36 @@ static int smack_uring_sqpoll(void)
+ 	return -EPERM;
+ }
+ 
++/**
++ * smack_uring_cmd - check on file operations for io_uring
++ * @ioucmd: the command in question
++ *
++ * Make a best guess about whether a io_uring "command" should
++ * be allowed. Use the same logic used for determining if the
++ * file could be opened for read in the absence of better criteria.
++ */
++static int smack_uring_cmd(struct io_uring_cmd *ioucmd)
++{
++	struct file *file = ioucmd->file;
++	struct smk_audit_info ad;
++	struct task_smack *tsp;
++	struct inode *inode;
++	int rc;
++
++	if (!file)
++		return -EINVAL;
++
++	tsp = smack_cred(file->f_cred);
++	inode = file_inode(file);
++
++	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
++	smk_ad_setfield_u_fs_path(&ad, file->f_path);
++	rc = smk_tskacc(tsp, smk_of_inode(inode), MAY_READ, &ad);
++	rc = smk_bu_credfile(file->f_cred, file, MAY_READ, rc);
++
++	return rc;
++}
++
+ #endif /* CONFIG_IO_URING */
+ 
+ struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = {
+@@ -4896,6 +4927,7 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = {
+ #ifdef CONFIG_IO_URING
+ 	LSM_HOOK_INIT(uring_override_creds, smack_uring_override_creds),
+ 	LSM_HOOK_INIT(uring_sqpoll, smack_uring_sqpoll),
++	LSM_HOOK_INIT(uring_cmd, smack_uring_cmd),
+ #endif
+ };
+ 
+diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
+index 55b3c49ba61de..244afc38ddcaa 100644
+--- a/sound/core/memalloc.c
++++ b/sound/core/memalloc.c
+@@ -535,10 +535,13 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size)
+ 	dmab->dev.need_sync = dma_need_sync(dmab->dev.dev,
+ 					    sg_dma_address(sgt->sgl));
+ 	p = dma_vmap_noncontiguous(dmab->dev.dev, size, sgt);
+-	if (p)
++	if (p) {
+ 		dmab->private_data = sgt;
+-	else
++		/* store the first page address for convenience */
++		dmab->addr = snd_sgbuf_get_addr(dmab, 0);
++	} else {
+ 		dma_free_noncontiguous(dmab->dev.dev, size, sgt, dmab->dev.dir);
++	}
+ 	return p;
+ }
+ 
+@@ -772,6 +775,8 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size)
+ 	if (!p)
+ 		goto error;
+ 	dmab->private_data = sgbuf;
++	/* store the first page address for convenience */
++	dmab->addr = snd_sgbuf_get_addr(dmab, 0);
+ 	return p;
+ 
+  error:
+diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
+index 90c3a367d7de9..02df915eb3c66 100644
+--- a/sound/core/oss/pcm_oss.c
++++ b/sound/core/oss/pcm_oss.c
+@@ -1672,14 +1672,14 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file)
+ 		runtime = substream->runtime;
+ 		if (atomic_read(&substream->mmap_count))
+ 			goto __direct;
+-		err = snd_pcm_oss_make_ready(substream);
+-		if (err < 0)
+-			return err;
+ 		atomic_inc(&runtime->oss.rw_ref);
+ 		if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
+ 			atomic_dec(&runtime->oss.rw_ref);
+ 			return -ERESTARTSYS;
+ 		}
++		err = snd_pcm_oss_make_ready_locked(substream);
++		if (err < 0)
++			goto unlock;
+ 		format = snd_pcm_oss_format_from(runtime->oss.format);
+ 		width = snd_pcm_format_physical_width(format);
+ 		if (runtime->oss.buffer_used > 0) {
+diff --git a/sound/drivers/aloop.c b/sound/drivers/aloop.c
+index 9b4a7cdb103ad..12f12a294df5a 100644
+--- a/sound/drivers/aloop.c
++++ b/sound/drivers/aloop.c
+@@ -605,17 +605,18 @@ static unsigned int loopback_jiffies_timer_pos_update
+ 			cable->streams[SNDRV_PCM_STREAM_PLAYBACK];
+ 	struct loopback_pcm *dpcm_capt =
+ 			cable->streams[SNDRV_PCM_STREAM_CAPTURE];
+-	unsigned long delta_play = 0, delta_capt = 0;
++	unsigned long delta_play = 0, delta_capt = 0, cur_jiffies;
+ 	unsigned int running, count1, count2;
+ 
++	cur_jiffies = jiffies;
+ 	running = cable->running ^ cable->pause;
+ 	if (running & (1 << SNDRV_PCM_STREAM_PLAYBACK)) {
+-		delta_play = jiffies - dpcm_play->last_jiffies;
++		delta_play = cur_jiffies - dpcm_play->last_jiffies;
+ 		dpcm_play->last_jiffies += delta_play;
+ 	}
+ 
+ 	if (running & (1 << SNDRV_PCM_STREAM_CAPTURE)) {
+-		delta_capt = jiffies - dpcm_capt->last_jiffies;
++		delta_capt = cur_jiffies - dpcm_capt->last_jiffies;
+ 		dpcm_capt->last_jiffies += delta_capt;
+ 	}
+ 
+diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c
+index b2701a4452d86..48af77ae8020f 100644
+--- a/sound/pci/emu10k1/emupcm.c
++++ b/sound/pci/emu10k1/emupcm.c
+@@ -124,7 +124,7 @@ static int snd_emu10k1_pcm_channel_alloc(struct snd_emu10k1_pcm * epcm, int voic
+ 	epcm->voices[0]->epcm = epcm;
+ 	if (voices > 1) {
+ 		for (i = 1; i < voices; i++) {
+-			epcm->voices[i] = &epcm->emu->voices[epcm->voices[0]->number + i];
++			epcm->voices[i] = &epcm->emu->voices[(epcm->voices[0]->number + i) % NUM_G];
+ 			epcm->voices[i]->epcm = epcm;
+ 		}
+ 	}
+diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
+index a77165bd92a98..b20694fd69dea 100644
+--- a/sound/pci/hda/hda_intel.c
++++ b/sound/pci/hda/hda_intel.c
+@@ -1817,7 +1817,7 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci,
+ 
+ 	/* use the non-cached pages in non-snoop mode */
+ 	if (!azx_snoop(chip))
+-		azx_bus(chip)->dma_type = SNDRV_DMA_TYPE_DEV_WC;
++		azx_bus(chip)->dma_type = SNDRV_DMA_TYPE_DEV_WC_SG;
+ 
+ 	if (chip->driver_type == AZX_DRIVER_NVIDIA) {
+ 		dev_dbg(chip->card->dev, "Enable delay in RIRB handling\n");
+diff --git a/sound/soc/atmel/mchp-spdiftx.c b/sound/soc/atmel/mchp-spdiftx.c
+index d243800464352..bcca1cf3cd7b6 100644
+--- a/sound/soc/atmel/mchp-spdiftx.c
++++ b/sound/soc/atmel/mchp-spdiftx.c
+@@ -196,8 +196,7 @@ struct mchp_spdiftx_dev {
+ 	struct clk				*pclk;
+ 	struct clk				*gclk;
+ 	unsigned int				fmt;
+-	const struct mchp_i2s_caps		*caps;
+-	int					gclk_enabled:1;
++	unsigned int				gclk_enabled:1;
+ };
+ 
+ static inline int mchp_spdiftx_is_running(struct mchp_spdiftx_dev *dev)
+@@ -766,8 +765,6 @@ static const struct of_device_id mchp_spdiftx_dt_ids[] = {
+ MODULE_DEVICE_TABLE(of, mchp_spdiftx_dt_ids);
+ static int mchp_spdiftx_probe(struct platform_device *pdev)
+ {
+-	struct device_node *np = pdev->dev.of_node;
+-	const struct of_device_id *match;
+ 	struct mchp_spdiftx_dev *dev;
+ 	struct resource *mem;
+ 	struct regmap *regmap;
+@@ -781,11 +778,6 @@ static int mchp_spdiftx_probe(struct platform_device *pdev)
+ 	if (!dev)
+ 		return -ENOMEM;
+ 
+-	/* Get hardware capabilities. */
+-	match = of_match_node(mchp_spdiftx_dt_ids, np);
+-	if (match)
+-		dev->caps = match->data;
+-
+ 	/* Map I/O registers. */
+ 	base = devm_platform_get_and_ioremap_resource(pdev, 0, &mem);
+ 	if (IS_ERR(base))
+diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c
+index 4fade23887972..8cba3015398b7 100644
+--- a/sound/soc/codecs/cs42l42.c
++++ b/sound/soc/codecs/cs42l42.c
+@@ -1618,7 +1618,6 @@ static irqreturn_t cs42l42_irq_thread(int irq, void *data)
+ 	unsigned int current_plug_status;
+ 	unsigned int current_button_status;
+ 	unsigned int i;
+-	int report = 0;
+ 
+ 	mutex_lock(&cs42l42->irq_lock);
+ 	if (cs42l42->suspended) {
+@@ -1713,13 +1712,15 @@ static irqreturn_t cs42l42_irq_thread(int irq, void *data)
+ 
+ 			if (current_button_status & CS42L42_M_DETECT_TF_MASK) {
+ 				dev_dbg(cs42l42->dev, "Button released\n");
+-				report = 0;
++				snd_soc_jack_report(cs42l42->jack, 0,
++						    SND_JACK_BTN_0 | SND_JACK_BTN_1 |
++						    SND_JACK_BTN_2 | SND_JACK_BTN_3);
+ 			} else if (current_button_status & CS42L42_M_DETECT_FT_MASK) {
+-				report = cs42l42_handle_button_press(cs42l42);
+-
++				snd_soc_jack_report(cs42l42->jack,
++						    cs42l42_handle_button_press(cs42l42),
++						    SND_JACK_BTN_0 | SND_JACK_BTN_1 |
++						    SND_JACK_BTN_2 | SND_JACK_BTN_3);
+ 			}
+-			snd_soc_jack_report(cs42l42->jack, report, SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+-								   SND_JACK_BTN_2 | SND_JACK_BTN_3);
+ 		}
+ 	}
+ 
+diff --git a/sound/soc/qcom/sm8250.c b/sound/soc/qcom/sm8250.c
+index 6e1184c8b672a..c48ac107810d4 100644
+--- a/sound/soc/qcom/sm8250.c
++++ b/sound/soc/qcom/sm8250.c
+@@ -270,6 +270,7 @@ static int sm8250_platform_probe(struct platform_device *pdev)
+ 	if (!card)
+ 		return -ENOMEM;
+ 
++	card->owner = THIS_MODULE;
+ 	/* Allocate the private data */
+ 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+ 	if (!data)
+diff --git a/sound/soc/sof/Kconfig b/sound/soc/sof/Kconfig
+index 4542868cd730f..39216c09f1597 100644
+--- a/sound/soc/sof/Kconfig
++++ b/sound/soc/sof/Kconfig
+@@ -196,6 +196,7 @@ config SND_SOC_SOF_DEBUG_ENABLE_FIRMWARE_TRACE
+ 
+ config SND_SOC_SOF_DEBUG_IPC_FLOOD_TEST
+ 	tristate "SOF enable IPC flood test"
++	depends on SND_SOC_SOF
+ 	select SND_SOC_SOF_CLIENT
+ 	help
+ 	  This option enables a separate client device for IPC flood test
+@@ -214,6 +215,7 @@ config SND_SOC_SOF_DEBUG_IPC_FLOOD_TEST_NUM
+ 
+ config SND_SOC_SOF_DEBUG_IPC_MSG_INJECTOR
+ 	tristate "SOF enable IPC message injector"
++	depends on SND_SOC_SOF
+ 	select SND_SOC_SOF_CLIENT
+ 	help
+ 	  This option enables the IPC message injector which can be used to send
+diff --git a/sound/usb/card.c b/sound/usb/card.c
+index d356743de2ff9..706d249a9ad6b 100644
+--- a/sound/usb/card.c
++++ b/sound/usb/card.c
+@@ -699,7 +699,7 @@ static bool check_delayed_register_option(struct snd_usb_audio *chip, int iface)
+ 		if (delayed_register[i] &&
+ 		    sscanf(delayed_register[i], "%x:%x", &id, &inum) == 2 &&
+ 		    id == chip->usb_id)
+-			return inum != iface;
++			return iface < inum;
+ 	}
+ 
+ 	return false;
+diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
+index f9c921683948d..ff2aa13b7b26f 100644
+--- a/sound/usb/endpoint.c
++++ b/sound/usb/endpoint.c
+@@ -758,7 +758,8 @@ bool snd_usb_endpoint_compatible(struct snd_usb_audio *chip,
+  * The endpoint needs to be closed via snd_usb_endpoint_close() later.
+  *
+  * Note that this function doesn't configure the endpoint.  The substream
+- * needs to set it up later via snd_usb_endpoint_configure().
++ * needs to set it up later via snd_usb_endpoint_set_params() and
++ * snd_usb_endpoint_prepare().
+  */
+ struct snd_usb_endpoint *
+ snd_usb_endpoint_open(struct snd_usb_audio *chip,
+@@ -924,6 +925,8 @@ void snd_usb_endpoint_close(struct snd_usb_audio *chip,
+ 		endpoint_set_interface(chip, ep, false);
+ 
+ 	if (!--ep->opened) {
++		if (ep->clock_ref && !atomic_read(&ep->clock_ref->locked))
++			ep->clock_ref->rate = 0;
+ 		ep->iface = 0;
+ 		ep->altsetting = 0;
+ 		ep->cur_audiofmt = NULL;
+@@ -1290,12 +1293,13 @@ out_of_memory:
+ /*
+  * snd_usb_endpoint_set_params: configure an snd_usb_endpoint
+  *
++ * It's called either from hw_params callback.
+  * Determine the number of URBs to be used on this endpoint.
+  * An endpoint must be configured before it can be started.
+  * An endpoint that is already running can not be reconfigured.
+  */
+-static int snd_usb_endpoint_set_params(struct snd_usb_audio *chip,
+-				       struct snd_usb_endpoint *ep)
++int snd_usb_endpoint_set_params(struct snd_usb_audio *chip,
++				struct snd_usb_endpoint *ep)
+ {
+ 	const struct audioformat *fmt = ep->cur_audiofmt;
+ 	int err;
+@@ -1378,18 +1382,18 @@ static int init_sample_rate(struct snd_usb_audio *chip,
+ }
+ 
+ /*
+- * snd_usb_endpoint_configure: Configure the endpoint
++ * snd_usb_endpoint_prepare: Prepare the endpoint
+  *
+  * This function sets up the EP to be fully usable state.
+- * It's called either from hw_params or prepare callback.
++ * It's called either from prepare callback.
+  * The function checks need_setup flag, and performs nothing unless needed,
+  * so it's safe to call this multiple times.
+  *
+  * This returns zero if unchanged, 1 if the configuration has changed,
+  * or a negative error code.
+  */
+-int snd_usb_endpoint_configure(struct snd_usb_audio *chip,
+-			       struct snd_usb_endpoint *ep)
++int snd_usb_endpoint_prepare(struct snd_usb_audio *chip,
++			     struct snd_usb_endpoint *ep)
+ {
+ 	bool iface_first;
+ 	int err = 0;
+@@ -1410,9 +1414,6 @@ int snd_usb_endpoint_configure(struct snd_usb_audio *chip,
+ 			if (err < 0)
+ 				goto unlock;
+ 		}
+-		err = snd_usb_endpoint_set_params(chip, ep);
+-		if (err < 0)
+-			goto unlock;
+ 		goto done;
+ 	}
+ 
+@@ -1440,10 +1441,6 @@ int snd_usb_endpoint_configure(struct snd_usb_audio *chip,
+ 	if (err < 0)
+ 		goto unlock;
+ 
+-	err = snd_usb_endpoint_set_params(chip, ep);
+-	if (err < 0)
+-		goto unlock;
+-
+ 	err = snd_usb_select_mode_quirk(chip, ep->cur_audiofmt);
+ 	if (err < 0)
+ 		goto unlock;
+diff --git a/sound/usb/endpoint.h b/sound/usb/endpoint.h
+index 6a9af04cf175a..e67ea28faa54f 100644
+--- a/sound/usb/endpoint.h
++++ b/sound/usb/endpoint.h
+@@ -17,8 +17,10 @@ snd_usb_endpoint_open(struct snd_usb_audio *chip,
+ 		      bool is_sync_ep);
+ void snd_usb_endpoint_close(struct snd_usb_audio *chip,
+ 			    struct snd_usb_endpoint *ep);
+-int snd_usb_endpoint_configure(struct snd_usb_audio *chip,
+-			       struct snd_usb_endpoint *ep);
++int snd_usb_endpoint_set_params(struct snd_usb_audio *chip,
++				struct snd_usb_endpoint *ep);
++int snd_usb_endpoint_prepare(struct snd_usb_audio *chip,
++			     struct snd_usb_endpoint *ep);
+ int snd_usb_endpoint_get_clock_rate(struct snd_usb_audio *chip, int clock);
+ 
+ bool snd_usb_endpoint_compatible(struct snd_usb_audio *chip,
+diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
+index e692ae04436a5..02035b545f9dd 100644
+--- a/sound/usb/pcm.c
++++ b/sound/usb/pcm.c
+@@ -443,17 +443,17 @@ static int configure_endpoints(struct snd_usb_audio *chip,
+ 		if (stop_endpoints(subs, false))
+ 			sync_pending_stops(subs);
+ 		if (subs->sync_endpoint) {
+-			err = snd_usb_endpoint_configure(chip, subs->sync_endpoint);
++			err = snd_usb_endpoint_prepare(chip, subs->sync_endpoint);
+ 			if (err < 0)
+ 				return err;
+ 		}
+-		err = snd_usb_endpoint_configure(chip, subs->data_endpoint);
++		err = snd_usb_endpoint_prepare(chip, subs->data_endpoint);
+ 		if (err < 0)
+ 			return err;
+ 		snd_usb_set_format_quirk(subs, subs->cur_audiofmt);
+ 	} else {
+ 		if (subs->sync_endpoint) {
+-			err = snd_usb_endpoint_configure(chip, subs->sync_endpoint);
++			err = snd_usb_endpoint_prepare(chip, subs->sync_endpoint);
+ 			if (err < 0)
+ 				return err;
+ 		}
+@@ -551,7 +551,13 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream,
+ 	subs->cur_audiofmt = fmt;
+ 	mutex_unlock(&chip->mutex);
+ 
+-	ret = configure_endpoints(chip, subs);
++	if (subs->sync_endpoint) {
++		ret = snd_usb_endpoint_set_params(chip, subs->sync_endpoint);
++		if (ret < 0)
++			goto unlock;
++	}
++
++	ret = snd_usb_endpoint_set_params(chip, subs->data_endpoint);
+ 
+  unlock:
+ 	if (ret < 0)
+diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
+index 9bfead5efc4c1..5b4d8f5eade20 100644
+--- a/sound/usb/quirks.c
++++ b/sound/usb/quirks.c
+@@ -1764,7 +1764,7 @@ bool snd_usb_registration_quirk(struct snd_usb_audio *chip, int iface)
+ 
+ 	for (q = registration_quirks; q->usb_id; q++)
+ 		if (chip->usb_id == q->usb_id)
+-			return iface != q->interface;
++			return iface < q->interface;
+ 
+ 	/* Register as normal */
+ 	return false;
+diff --git a/sound/usb/stream.c b/sound/usb/stream.c
+index ceb93d798182c..f10f4e6d3fb85 100644
+--- a/sound/usb/stream.c
++++ b/sound/usb/stream.c
+@@ -495,6 +495,10 @@ static int __snd_usb_add_audio_stream(struct snd_usb_audio *chip,
+ 			return 0;
+ 		}
+ 	}
++
++	if (chip->card->registered)
++		chip->need_delayed_register = true;
++
+ 	/* look for an empty stream */
+ 	list_for_each_entry(as, &chip->pcm_list, list) {
+ 		if (as->fmt_type != fp->fmt_type)
+@@ -502,9 +506,6 @@ static int __snd_usb_add_audio_stream(struct snd_usb_audio *chip,
+ 		subs = &as->substream[stream];
+ 		if (subs->ep_num)
+ 			continue;
+-		if (snd_device_get_state(chip->card, as->pcm) !=
+-		    SNDRV_DEV_BUILD)
+-			chip->need_delayed_register = true;
+ 		err = snd_pcm_new_stream(as->pcm, stream, 1);
+ 		if (err < 0)
+ 			return err;
+@@ -1105,7 +1106,7 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip,
+ 	 * Dallas DS4201 workaround: It presents 5 altsettings, but the last
+ 	 * one misses syncpipe, and does not produce any sound.
+ 	 */
+-	if (chip->usb_id == USB_ID(0x04fa, 0x4201))
++	if (chip->usb_id == USB_ID(0x04fa, 0x4201) && num >= 4)
+ 		num = 4;
+ 
+ 	for (i = 0; i < num; i++) {
+diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
+index e6c98a6e3908e..6b1bafe267a42 100644
+--- a/tools/lib/perf/evlist.c
++++ b/tools/lib/perf/evlist.c
+@@ -486,6 +486,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+ 			if (ops->idx)
+ 				ops->idx(evlist, evsel, mp, idx);
+ 
++			pr_debug("idx %d: mmapping fd %d\n", idx, *output);
+ 			if (ops->mmap(map, mp, *output, evlist_cpu) < 0)
+ 				return -1;
+ 
+@@ -494,6 +495,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+ 			if (!idx)
+ 				perf_evlist__set_mmap_first(evlist, map, overwrite);
+ 		} else {
++			pr_debug("idx %d: set output fd %d -> %d\n", idx, fd, *output);
+ 			if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
+ 				return -1;
+ 
+@@ -519,6 +521,48 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+ 	return 0;
+ }
+ 
++static int
++mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
++		struct perf_mmap_param *mp)
++{
++	int nr_threads = perf_thread_map__nr(evlist->threads);
++	int nr_cpus    = perf_cpu_map__nr(evlist->all_cpus);
++	int cpu, thread, idx = 0;
++	int nr_mmaps = 0;
++
++	pr_debug("%s: nr cpu values (may include -1) %d nr threads %d\n",
++		 __func__, nr_cpus, nr_threads);
++
++	/* per-thread mmaps */
++	for (thread = 0; thread < nr_threads; thread++, idx++) {
++		int output = -1;
++		int output_overwrite = -1;
++
++		if (mmap_per_evsel(evlist, ops, idx, mp, 0, thread, &output,
++				   &output_overwrite, &nr_mmaps))
++			goto out_unmap;
++	}
++
++	/* system-wide mmaps i.e. per-cpu */
++	for (cpu = 1; cpu < nr_cpus; cpu++, idx++) {
++		int output = -1;
++		int output_overwrite = -1;
++
++		if (mmap_per_evsel(evlist, ops, idx, mp, cpu, 0, &output,
++				   &output_overwrite, &nr_mmaps))
++			goto out_unmap;
++	}
++
++	if (nr_mmaps != evlist->nr_mmaps)
++		pr_err("Miscounted nr_mmaps %d vs %d\n", nr_mmaps, evlist->nr_mmaps);
++
++	return 0;
++
++out_unmap:
++	perf_evlist__munmap(evlist);
++	return -1;
++}
++
+ static int
+ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+ 	     struct perf_mmap_param *mp)
+@@ -528,6 +572,8 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
+ 	int nr_mmaps = 0;
+ 	int cpu, thread;
+ 
++	pr_debug("%s: nr cpu values %d nr threads %d\n", __func__, nr_cpus, nr_threads);
++
+ 	for (cpu = 0; cpu < nr_cpus; cpu++) {
+ 		int output = -1;
+ 		int output_overwrite = -1;
+@@ -569,6 +615,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
+ 			  struct perf_evlist_mmap_ops *ops,
+ 			  struct perf_mmap_param *mp)
+ {
++	const struct perf_cpu_map *cpus = evlist->all_cpus;
+ 	struct perf_evsel *evsel;
+ 
+ 	if (!ops || !ops->get || !ops->mmap)
+@@ -588,6 +635,9 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
+ 	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
+ 		return -ENOMEM;
+ 
++	if (perf_cpu_map__empty(cpus))
++		return mmap_per_thread(evlist, ops, mp);
++
+ 	return mmap_per_cpu(evlist, ops, mp);
+ }
+ 
+diff --git a/tools/objtool/check.c b/tools/objtool/check.c
+index 31c719f99f66e..5d87e0b0d85f9 100644
+--- a/tools/objtool/check.c
++++ b/tools/objtool/check.c
+@@ -162,32 +162,34 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
+ 
+ 	/*
+ 	 * Unfortunately these have to be hard coded because the noreturn
+-	 * attribute isn't provided in ELF data.
++	 * attribute isn't provided in ELF data. Keep 'em sorted.
+ 	 */
+ 	static const char * const global_noreturns[] = {
++		"__invalid_creds",
++		"__module_put_and_kthread_exit",
++		"__reiserfs_panic",
+ 		"__stack_chk_fail",
+-		"panic",
++		"__ubsan_handle_builtin_unreachable",
++		"cpu_bringup_and_idle",
++		"cpu_startup_entry",
+ 		"do_exit",
++		"do_group_exit",
+ 		"do_task_dead",
+-		"kthread_exit",
+-		"make_task_dead",
+-		"__module_put_and_kthread_exit",
++		"ex_handler_msr_mce",
++		"fortify_panic",
+ 		"kthread_complete_and_exit",
+-		"__reiserfs_panic",
++		"kthread_exit",
++		"kunit_try_catch_throw",
+ 		"lbug_with_loc",
+-		"fortify_panic",
+-		"usercopy_abort",
+ 		"machine_real_restart",
++		"make_task_dead",
++		"panic",
+ 		"rewind_stack_and_make_dead",
+-		"kunit_try_catch_throw",
+-		"xen_start_kernel",
+-		"cpu_bringup_and_idle",
+-		"do_group_exit",
++		"sev_es_terminate",
++		"snp_abort",
+ 		"stop_this_cpu",
+-		"__invalid_creds",
+-		"cpu_startup_entry",
+-		"__ubsan_handle_builtin_unreachable",
+-		"ex_handler_msr_mce",
++		"usercopy_abort",
++		"xen_start_kernel",
+ 	};
+ 
+ 	if (!func)
+diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
+index 68f681ad54c1e..777bdf182a582 100644
+--- a/tools/perf/arch/x86/util/evlist.c
++++ b/tools/perf/arch/x86/util/evlist.c
+@@ -8,8 +8,13 @@
+ #define TOPDOWN_L1_EVENTS	"{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}"
+ #define TOPDOWN_L2_EVENTS	"{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}"
+ 
+-int arch_evlist__add_default_attrs(struct evlist *evlist)
++int arch_evlist__add_default_attrs(struct evlist *evlist,
++				   struct perf_event_attr *attrs,
++				   size_t nr_attrs)
+ {
++	if (nr_attrs)
++		return __evlist__add_default_attrs(evlist, attrs, nr_attrs);
++
+ 	if (!pmu_have_event("cpu", "slots"))
+ 		return 0;
+ 
+diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
+index 9a71f0330137e..68c878b4e5e4c 100644
+--- a/tools/perf/builtin-record.c
++++ b/tools/perf/builtin-record.c
+@@ -1892,14 +1892,18 @@ static int record__synthesize(struct record *rec, bool tail)
+ 
+ 	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
+ 						machine, opts);
+-	if (err < 0)
++	if (err < 0) {
+ 		pr_warning("Couldn't synthesize bpf events.\n");
++		err = 0;
++	}
+ 
+ 	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
+ 		err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
+ 						     machine);
+-		if (err < 0)
++		if (err < 0) {
+ 			pr_warning("Couldn't synthesize cgroup events.\n");
++			err = 0;
++		}
+ 	}
+ 
+ 	if (rec->opts.nr_threads_synthesize > 1) {
+diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
+index c689054002cca..26a572c160d6f 100644
+--- a/tools/perf/builtin-script.c
++++ b/tools/perf/builtin-script.c
+@@ -441,6 +441,9 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
+ 	struct perf_event_attr *attr = &evsel->core.attr;
+ 	bool allow_user_set;
+ 
++	if (evsel__is_dummy_event(evsel))
++		return 0;
++
+ 	if (perf_header__has_feat(&session->header, HEADER_STAT))
+ 		return 0;
+ 
+diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
+index 5f0333a8acd8a..82e14faecc3e4 100644
+--- a/tools/perf/builtin-stat.c
++++ b/tools/perf/builtin-stat.c
+@@ -1778,6 +1778,9 @@ static int add_default_attributes(void)
+ 	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
+ 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+ };
++
++	struct perf_event_attr default_null_attrs[] = {};
++
+ 	/* Set attrs if no event is selected and !null_run: */
+ 	if (stat_config.null_run)
+ 		return 0;
+@@ -1941,6 +1944,9 @@ setup_metrics:
+ 		free(str);
+ 	}
+ 
++	if (!stat_config.topdown_level)
++		stat_config.topdown_level = TOPDOWN_MAX_LEVEL;
++
+ 	if (!evsel_list->core.nr_entries) {
+ 		if (target__has_cpu(&target))
+ 			default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
+@@ -1957,9 +1963,8 @@ setup_metrics:
+ 		}
+ 		if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
+ 			return -1;
+-
+-		stat_config.topdown_level = TOPDOWN_MAX_LEVEL;
+-		if (arch_evlist__add_default_attrs(evsel_list) < 0)
++		/* Platform specific attrs */
++		if (evlist__add_default_attrs(evsel_list, default_null_attrs) < 0)
+ 			return -1;
+ 	}
+ 
+diff --git a/tools/perf/dlfilters/dlfilter-show-cycles.c b/tools/perf/dlfilters/dlfilter-show-cycles.c
+index 9eccc97bff82f..6d47298ebe9f6 100644
+--- a/tools/perf/dlfilters/dlfilter-show-cycles.c
++++ b/tools/perf/dlfilters/dlfilter-show-cycles.c
+@@ -98,9 +98,9 @@ int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, vo
+ static void print_vals(__u64 cycles, __u64 delta)
+ {
+ 	if (delta)
+-		printf("%10llu %10llu ", cycles, delta);
++		printf("%10llu %10llu ", (unsigned long long)cycles, (unsigned long long)delta);
+ 	else
+-		printf("%10llu %10s ", cycles, "");
++		printf("%10llu %10s ", (unsigned long long)cycles, "");
+ }
+ 
+ int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
+diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
+index 48af7d379d822..efa5f006b5c61 100644
+--- a/tools/perf/util/evlist.c
++++ b/tools/perf/util/evlist.c
+@@ -342,9 +342,14 @@ int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *a
+ 	return evlist__add_attrs(evlist, attrs, nr_attrs);
+ }
+ 
+-__weak int arch_evlist__add_default_attrs(struct evlist *evlist __maybe_unused)
++__weak int arch_evlist__add_default_attrs(struct evlist *evlist,
++					  struct perf_event_attr *attrs,
++					  size_t nr_attrs)
+ {
+-	return 0;
++	if (!nr_attrs)
++		return 0;
++
++	return __evlist__add_default_attrs(evlist, attrs, nr_attrs);
+ }
+ 
+ struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id)
+diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
+index 1bde9ccf4e7da..129095c0fe6d3 100644
+--- a/tools/perf/util/evlist.h
++++ b/tools/perf/util/evlist.h
+@@ -107,10 +107,13 @@ static inline int evlist__add_default(struct evlist *evlist)
+ int __evlist__add_default_attrs(struct evlist *evlist,
+ 				     struct perf_event_attr *attrs, size_t nr_attrs);
+ 
++int arch_evlist__add_default_attrs(struct evlist *evlist,
++				   struct perf_event_attr *attrs,
++				   size_t nr_attrs);
++
+ #define evlist__add_default_attrs(evlist, array) \
+-	__evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
++	arch_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
+ 
+-int arch_evlist__add_default_attrs(struct evlist *evlist);
+ struct evsel *arch_evlist__leader(struct list_head *list);
+ 
+ int evlist__add_dummy(struct evlist *evlist);
diff --git a/sys-kernel/pinephone-sources/files/5.19.9-10.patch b/sys-kernel/pinephone-sources/files/5.19.9-10.patch
new file mode 100644
index 0000000..331692b
--- /dev/null
+++ b/sys-kernel/pinephone-sources/files/5.19.9-10.patch
@@ -0,0 +1,1723 @@
+diff --git a/Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml b/Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml
+index b6bbc312a7cf7..1414ba9977c16 100644
+--- a/Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml
++++ b/Documentation/devicetree/bindings/iio/gyroscope/bosch,bmg160.yaml
+@@ -24,8 +24,10 @@ properties:
+ 
+   interrupts:
+     minItems: 1
++    maxItems: 2
+     description:
+       Should be configured with type IRQ_TYPE_EDGE_RISING.
++      If two interrupts are provided, expected order is INT1 and INT2.
+ 
+ required:
+   - compatible
+diff --git a/Documentation/input/joydev/joystick.rst b/Documentation/input/joydev/joystick.rst
+index f615906a0821b..6d721396717a2 100644
+--- a/Documentation/input/joydev/joystick.rst
++++ b/Documentation/input/joydev/joystick.rst
+@@ -517,6 +517,7 @@ All I-Force devices are supported by the iforce module. This includes:
+ * AVB Mag Turbo Force
+ * AVB Top Shot Pegasus
+ * AVB Top Shot Force Feedback Racing Wheel
++* Boeder Force Feedback Wheel
+ * Logitech WingMan Force
+ * Logitech WingMan Force Wheel
+ * Guillemot Race Leader Force Feedback
+diff --git a/Makefile b/Makefile
+index 1f27c4bd09e67..33a9b6b547c47 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 5
+ PATCHLEVEL = 19
+-SUBLEVEL = 9
++SUBLEVEL = 10
+ EXTRAVERSION =
+ NAME = Superb Owl
+ 
+diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
+index 62b5b07fa4e1c..ca64bf5f5b038 100644
+--- a/arch/loongarch/Kconfig
++++ b/arch/loongarch/Kconfig
+@@ -36,6 +36,7 @@ config LOONGARCH
+ 	select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPTION
+ 	select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPTION
+ 	select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION
++	select ARCH_KEEP_MEMBLOCK
+ 	select ARCH_MIGHT_HAVE_PC_PARPORT
+ 	select ARCH_MIGHT_HAVE_PC_SERIO
+ 	select ARCH_SPARSEMEM_ENABLE
+diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h
+index 62044cd5b7bc5..825c2519b9d1f 100644
+--- a/arch/loongarch/include/asm/acpi.h
++++ b/arch/loongarch/include/asm/acpi.h
+@@ -15,7 +15,7 @@ extern int acpi_pci_disabled;
+ extern int acpi_noirq;
+ 
+ #define acpi_os_ioremap acpi_os_ioremap
+-void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
++void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
+ 
+ static inline void disable_acpi(void)
+ {
+diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
+index bb729ee8a2370..796a24055a942 100644
+--- a/arch/loongarch/kernel/acpi.c
++++ b/arch/loongarch/kernel/acpi.c
+@@ -113,7 +113,7 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
+ 	early_memunmap(map, size);
+ }
+ 
+-void __init __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
++void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
+ {
+ 	if (!memblock_is_memory(phys))
+ 		return ioremap(phys, size);
+diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
+index 7094a68c9b832..3c3fbff0b8f86 100644
+--- a/arch/loongarch/mm/init.c
++++ b/arch/loongarch/mm/init.c
+@@ -131,18 +131,6 @@ int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params)
+ 	return ret;
+ }
+ 
+-#ifdef CONFIG_NUMA
+-int memory_add_physaddr_to_nid(u64 start)
+-{
+-	int nid;
+-
+-	nid = pa_to_nid(start);
+-	return nid;
+-}
+-EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+-#endif
+-
+-#ifdef CONFIG_MEMORY_HOTREMOVE
+ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
+ {
+ 	unsigned long start_pfn = start >> PAGE_SHIFT;
+@@ -154,6 +142,16 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
+ 		page += vmem_altmap_offset(altmap);
+ 	__remove_pages(start_pfn, nr_pages, altmap);
+ }
++
++#ifdef CONFIG_NUMA
++int memory_add_physaddr_to_nid(u64 start)
++{
++	int nid;
++
++	nid = pa_to_nid(start);
++	return nid;
++}
++EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+ #endif
+ #endif
+ 
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 356226c7ebbdc..aa1ba803659cd 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -5907,47 +5907,18 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
+ 				      const struct kvm_memory_slot *memslot,
+ 				      int start_level)
+ {
+-	bool flush = false;
+-
+ 	if (kvm_memslots_have_rmaps(kvm)) {
+ 		write_lock(&kvm->mmu_lock);
+-		flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
+-					  start_level, KVM_MAX_HUGEPAGE_LEVEL,
+-					  false);
++		slot_handle_level(kvm, memslot, slot_rmap_write_protect,
++				  start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
+ 		write_unlock(&kvm->mmu_lock);
+ 	}
+ 
+ 	if (is_tdp_mmu_enabled(kvm)) {
+ 		read_lock(&kvm->mmu_lock);
+-		flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
++		kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
+ 		read_unlock(&kvm->mmu_lock);
+ 	}
+-
+-	/*
+-	 * Flush TLBs if any SPTEs had to be write-protected to ensure that
+-	 * guest writes are reflected in the dirty bitmap before the memslot
+-	 * update completes, i.e. before enabling dirty logging is visible to
+-	 * userspace.
+-	 *
+-	 * Perform the TLB flush outside the mmu_lock to reduce the amount of
+-	 * time the lock is held. However, this does mean that another CPU can
+-	 * now grab mmu_lock and encounter a write-protected SPTE while CPUs
+-	 * still have a writable mapping for the associated GFN in their TLB.
+-	 *
+-	 * This is safe but requires KVM to be careful when making decisions
+-	 * based on the write-protection status of an SPTE. Specifically, KVM
+-	 * also write-protects SPTEs to monitor changes to guest page tables
+-	 * during shadow paging, and must guarantee no CPUs can write to those
+-	 * page before the lock is dropped. As mentioned in the previous
+-	 * paragraph, a write-protected SPTE is no guarantee that CPU cannot
+-	 * perform writes. So to determine if a TLB flush is truly required, KVM
+-	 * will clear a separate software-only bit (MMU-writable) and skip the
+-	 * flush if-and-only-if this bit was already clear.
+-	 *
+-	 * See is_writable_pte() for more details.
+-	 */
+-	if (flush)
+-		kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+ }
+ 
+ /* Must be called with the mmu_lock held in write-mode. */
+@@ -6070,32 +6041,30 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
+ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
+ 				   const struct kvm_memory_slot *memslot)
+ {
+-	bool flush = false;
+-
+ 	if (kvm_memslots_have_rmaps(kvm)) {
+ 		write_lock(&kvm->mmu_lock);
+ 		/*
+ 		 * Clear dirty bits only on 4k SPTEs since the legacy MMU only
+ 		 * support dirty logging at a 4k granularity.
+ 		 */
+-		flush = slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false);
++		slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false);
+ 		write_unlock(&kvm->mmu_lock);
+ 	}
+ 
+ 	if (is_tdp_mmu_enabled(kvm)) {
+ 		read_lock(&kvm->mmu_lock);
+-		flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
++		kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
+ 		read_unlock(&kvm->mmu_lock);
+ 	}
+ 
+ 	/*
++	 * The caller will flush the TLBs after this function returns.
++	 *
+ 	 * It's also safe to flush TLBs out of mmu lock here as currently this
+ 	 * function is only used for dirty logging, in which case flushing TLB
+ 	 * out of mmu lock also guarantees no dirty pages will be lost in
+ 	 * dirty_bitmap.
+ 	 */
+-	if (flush)
+-		kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+ }
+ 
+ void kvm_mmu_zap_all(struct kvm *kvm)
+diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
+index f80dbb628df57..e09bdcf1e47c5 100644
+--- a/arch/x86/kvm/mmu/spte.h
++++ b/arch/x86/kvm/mmu/spte.h
+@@ -326,7 +326,7 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
+ }
+ 
+ /*
+- * An shadow-present leaf SPTE may be non-writable for 3 possible reasons:
++ * A shadow-present leaf SPTE may be non-writable for 4 possible reasons:
+  *
+  *  1. To intercept writes for dirty logging. KVM write-protects huge pages
+  *     so that they can be split be split down into the dirty logging
+@@ -344,8 +344,13 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
+  *     read-only memslot or guest memory backed by a read-only VMA. Writes to
+  *     such pages are disallowed entirely.
+  *
+- * To keep track of why a given SPTE is write-protected, KVM uses 2
+- * software-only bits in the SPTE:
++ *  4. To emulate the Accessed bit for SPTEs without A/D bits.  Note, in this
++ *     case, the SPTE is access-protected, not just write-protected!
++ *
++ * For cases #1 and #4, KVM can safely make such SPTEs writable without taking
++ * mmu_lock as capturing the Accessed/Dirty state doesn't require taking it.
++ * To differentiate #1 and #4 from #2 and #3, KVM uses two software-only bits
++ * in the SPTE:
+  *
+  *  shadow_mmu_writable_mask, aka MMU-writable -
+  *    Cleared on SPTEs that KVM is currently write-protecting for shadow paging
+@@ -374,7 +379,8 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
+  * shadow page tables between vCPUs. Write-protecting an SPTE for dirty logging
+  * (which does not clear the MMU-writable bit), does not flush TLBs before
+  * dropping the lock, as it only needs to synchronize guest writes with the
+- * dirty bitmap.
++ * dirty bitmap. Similarly, making the SPTE inaccessible (and non-writable) for
++ * access-tracking via the clear_young() MMU notifier also does not flush TLBs.
+  *
+  * So, there is the problem: clearing the MMU-writable bit can encounter a
+  * write-protected SPTE while CPUs still have writable mappings for that SPTE
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 55de0d1981e52..5b36866528568 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -12265,6 +12265,50 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
+ 		} else {
+ 			kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
+ 		}
++
++		/*
++		 * Unconditionally flush the TLBs after enabling dirty logging.
++		 * A flush is almost always going to be necessary (see below),
++		 * and unconditionally flushing allows the helpers to omit
++		 * the subtly complex checks when removing write access.
++		 *
++		 * Do the flush outside of mmu_lock to reduce the amount of
++		 * time mmu_lock is held.  Flushing after dropping mmu_lock is
++		 * safe as KVM only needs to guarantee the slot is fully
++		 * write-protected before returning to userspace, i.e. before
++		 * userspace can consume the dirty status.
++		 *
++		 * Flushing outside of mmu_lock requires KVM to be careful when
++		 * making decisions based on writable status of an SPTE, e.g. a
++		 * !writable SPTE doesn't guarantee a CPU can't perform writes.
++		 *
++		 * Specifically, KVM also write-protects guest page tables to
++		 * monitor changes when using shadow paging, and must guarantee
++		 * no CPUs can write to those page before mmu_lock is dropped.
++		 * Because CPUs may have stale TLB entries at this point, a
++		 * !writable SPTE doesn't guarantee CPUs can't perform writes.
++		 *
++		 * KVM also allows making SPTES writable outside of mmu_lock,
++		 * e.g. to allow dirty logging without taking mmu_lock.
++		 *
++		 * To handle these scenarios, KVM uses a separate software-only
++		 * bit (MMU-writable) to track if a SPTE is !writable due to
++		 * a guest page table being write-protected (KVM clears the
++		 * MMU-writable flag when write-protecting for shadow paging).
++		 *
++		 * The use of MMU-writable is also the primary motivation for
++		 * the unconditional flush.  Because KVM must guarantee that a
++		 * CPU doesn't contain stale, writable TLB entries for a
++		 * !MMU-writable SPTE, KVM must flush if it encounters any
++		 * MMU-writable SPTE regardless of whether the actual hardware
++		 * writable bit was set.  I.e. KVM is almost guaranteed to need
++		 * to flush, while unconditionally flushing allows the "remove
++		 * write access" helpers to ignore MMU-writable entirely.
++		 *
++		 * See is_writable_pte() for more details (the case involving
++		 * access-tracked SPTEs is particularly relevant).
++		 */
++		kvm_arch_flush_remote_tlbs_memslot(kvm, new);
+ 	}
+ }
+ 
+diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
+index c2d4947844250..510cdec375c4d 100644
+--- a/drivers/acpi/resource.c
++++ b/drivers/acpi/resource.c
+@@ -416,6 +416,16 @@ static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity,
+ {
+ 	int i;
+ 
++#ifdef CONFIG_X86
++	/*
++	 * IRQ override isn't needed on modern AMD Zen systems and
++	 * this override breaks active low IRQs on AMD Ryzen 6000 and
++	 * newer systems. Skip it.
++	 */
++	if (boot_cpu_has(X86_FEATURE_ZEN))
++		return false;
++#endif
++
+ 	for (i = 0; i < ARRAY_SIZE(skip_override_table); i++) {
+ 		const struct irq_override_cmp *entry = &skip_override_table[i];
+ 
+diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
+index f118ad9bcd33d..0e95351d47d49 100644
+--- a/drivers/gpio/gpio-104-dio-48e.c
++++ b/drivers/gpio/gpio-104-dio-48e.c
+@@ -271,6 +271,7 @@ static void dio48e_irq_mask(struct irq_data *data)
+ 		dio48egpio->irq_mask &= ~BIT(0);
+ 	else
+ 		dio48egpio->irq_mask &= ~BIT(1);
++	gpiochip_disable_irq(chip, offset);
+ 
+ 	if (!dio48egpio->irq_mask)
+ 		/* disable interrupts */
+@@ -298,6 +299,7 @@ static void dio48e_irq_unmask(struct irq_data *data)
+ 		iowrite8(0x00, dio48egpio->base + 0xB);
+ 	}
+ 
++	gpiochip_enable_irq(chip, offset);
+ 	if (offset == 19)
+ 		dio48egpio->irq_mask |= BIT(0);
+ 	else
+@@ -320,12 +322,14 @@ static int dio48e_irq_set_type(struct irq_data *data, unsigned int flow_type)
+ 	return 0;
+ }
+ 
+-static struct irq_chip dio48e_irqchip = {
++static const struct irq_chip dio48e_irqchip = {
+ 	.name = "104-dio-48e",
+ 	.irq_ack = dio48e_irq_ack,
+ 	.irq_mask = dio48e_irq_mask,
+ 	.irq_unmask = dio48e_irq_unmask,
+-	.irq_set_type = dio48e_irq_set_type
++	.irq_set_type = dio48e_irq_set_type,
++	.flags = IRQCHIP_IMMUTABLE,
++	GPIOCHIP_IRQ_RESOURCE_HELPERS,
+ };
+ 
+ static irqreturn_t dio48e_irq_handler(int irq, void *dev_id)
+@@ -414,7 +418,7 @@ static int dio48e_probe(struct device *dev, unsigned int id)
+ 	dio48egpio->chip.set_multiple = dio48e_gpio_set_multiple;
+ 
+ 	girq = &dio48egpio->chip.irq;
+-	girq->chip = &dio48e_irqchip;
++	gpio_irq_chip_set_chip(girq, &dio48e_irqchip);
+ 	/* This will let us handle the parent IRQ in the driver */
+ 	girq->parent_handler = NULL;
+ 	girq->num_parents = 0;
+diff --git a/drivers/gpio/gpio-104-idio-16.c b/drivers/gpio/gpio-104-idio-16.c
+index 45f7ad8573e19..a8b7c8eafac5a 100644
+--- a/drivers/gpio/gpio-104-idio-16.c
++++ b/drivers/gpio/gpio-104-idio-16.c
+@@ -150,10 +150,11 @@ static void idio_16_irq_mask(struct irq_data *data)
+ {
+ 	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+ 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
+-	const unsigned long mask = BIT(irqd_to_hwirq(data));
++	const unsigned long offset = irqd_to_hwirq(data);
+ 	unsigned long flags;
+ 
+-	idio16gpio->irq_mask &= ~mask;
++	idio16gpio->irq_mask &= ~BIT(offset);
++	gpiochip_disable_irq(chip, offset);
+ 
+ 	if (!idio16gpio->irq_mask) {
+ 		raw_spin_lock_irqsave(&idio16gpio->lock, flags);
+@@ -168,11 +169,12 @@ static void idio_16_irq_unmask(struct irq_data *data)
+ {
+ 	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
+ 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
+-	const unsigned long mask = BIT(irqd_to_hwirq(data));
++	const unsigned long offset = irqd_to_hwirq(data);
+ 	const unsigned long prev_irq_mask = idio16gpio->irq_mask;
+ 	unsigned long flags;
+ 
+-	idio16gpio->irq_mask |= mask;
++	gpiochip_enable_irq(chip, offset);
++	idio16gpio->irq_mask |= BIT(offset);
+ 
+ 	if (!prev_irq_mask) {
+ 		raw_spin_lock_irqsave(&idio16gpio->lock, flags);
+@@ -193,12 +195,14 @@ static int idio_16_irq_set_type(struct irq_data *data, unsigned int flow_type)
+ 	return 0;
+ }
+ 
+-static struct irq_chip idio_16_irqchip = {
++static const struct irq_chip idio_16_irqchip = {
+ 	.name = "104-idio-16",
+ 	.irq_ack = idio_16_irq_ack,
+ 	.irq_mask = idio_16_irq_mask,
+ 	.irq_unmask = idio_16_irq_unmask,
+-	.irq_set_type = idio_16_irq_set_type
++	.irq_set_type = idio_16_irq_set_type,
++	.flags = IRQCHIP_IMMUTABLE,
++	GPIOCHIP_IRQ_RESOURCE_HELPERS,
+ };
+ 
+ static irqreturn_t idio_16_irq_handler(int irq, void *dev_id)
+@@ -275,7 +279,7 @@ static int idio_16_probe(struct device *dev, unsigned int id)
+ 	idio16gpio->out_state = 0xFFFF;
+ 
+ 	girq = &idio16gpio->chip.irq;
+-	girq->chip = &idio_16_irqchip;
++	gpio_irq_chip_set_chip(girq, &idio_16_irqchip);
+ 	/* This will let us handle the parent IRQ in the driver */
+ 	girq->parent_handler = NULL;
+ 	girq->num_parents = 0;
+diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
+index 8943cea927642..a2e505a7545cd 100644
+--- a/drivers/gpio/gpio-mockup.c
++++ b/drivers/gpio/gpio-mockup.c
+@@ -373,6 +373,13 @@ static void gpio_mockup_debugfs_setup(struct device *dev,
+ 	}
+ }
+ 
++static void gpio_mockup_debugfs_cleanup(void *data)
++{
++	struct gpio_mockup_chip *chip = data;
++
++	debugfs_remove_recursive(chip->dbg_dir);
++}
++
+ static void gpio_mockup_dispose_mappings(void *data)
+ {
+ 	struct gpio_mockup_chip *chip = data;
+@@ -455,7 +462,7 @@ static int gpio_mockup_probe(struct platform_device *pdev)
+ 
+ 	gpio_mockup_debugfs_setup(dev, chip);
+ 
+-	return 0;
++	return devm_add_action_or_reset(dev, gpio_mockup_debugfs_cleanup, chip);
+ }
+ 
+ static const struct of_device_id gpio_mockup_of_match[] = {
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+index ecada5eadfe35..e325150879df7 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+@@ -66,10 +66,15 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev)
+ 		return true;
+ 	case CHIP_SIENNA_CICHLID:
+ 		if (strnstr(atom_ctx->vbios_version, "D603",
++		    sizeof(atom_ctx->vbios_version))) {
++			if (strnstr(atom_ctx->vbios_version, "D603GLXE",
+ 			    sizeof(atom_ctx->vbios_version)))
+-			return true;
+-		else
++				return false;
++			else
++				return true;
++		} else {
+ 			return false;
++		}
+ 	default:
+ 		return false;
+ 	}
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+index 2b00f8fe15a89..b19bf0c3f3737 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+@@ -2372,7 +2372,7 @@ static int psp_load_smu_fw(struct psp_context *psp)
+ static bool fw_load_skip_check(struct psp_context *psp,
+ 			       struct amdgpu_firmware_info *ucode)
+ {
+-	if (!ucode->fw)
++	if (!ucode->fw || !ucode->ucode_size)
+ 		return true;
+ 
+ 	if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC &&
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+index 9cde13b07dd26..d9a5209aa8433 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+@@ -382,11 +382,27 @@ static int smu_v13_0_7_append_powerplay_table(struct smu_context *smu)
+ 	return 0;
+ }
+ 
++static int smu_v13_0_7_get_pptable_from_pmfw(struct smu_context *smu,
++					     void **table,
++					     uint32_t *size)
++{
++	struct smu_table_context *smu_table = &smu->smu_table;
++	void *combo_pptable = smu_table->combo_pptable;
++	int ret = 0;
++
++	ret = smu_cmn_get_combo_pptable(smu);
++	if (ret)
++		return ret;
++
++	*table = combo_pptable;
++	*size = sizeof(struct smu_13_0_7_powerplay_table);
++
++	return 0;
++}
+ 
+ static int smu_v13_0_7_setup_pptable(struct smu_context *smu)
+ {
+ 	struct smu_table_context *smu_table = &smu->smu_table;
+-	void *combo_pptable = smu_table->combo_pptable;
+ 	struct amdgpu_device *adev = smu->adev;
+ 	int ret = 0;
+ 
+@@ -395,18 +411,11 @@ static int smu_v13_0_7_setup_pptable(struct smu_context *smu)
+ 	 * be used directly by driver. To get the raw pptable, we need to
+ 	 * rely on the combo pptable(and its revelant SMU message).
+ 	 */
+-	if (adev->scpm_enabled) {
+-		ret = smu_cmn_get_combo_pptable(smu);
+-		if (ret)
+-			return ret;
+-
+-		smu->smu_table.power_play_table = combo_pptable;
+-		smu->smu_table.power_play_table_size = sizeof(struct smu_13_0_7_powerplay_table);
+-	} else {
+-		ret = smu_v13_0_setup_pptable(smu);
+-		if (ret)
+-			return ret;
+-	}
++	ret = smu_v13_0_7_get_pptable_from_pmfw(smu,
++						&smu_table->power_play_table,
++						&smu_table->power_play_table_size);
++	if (ret)
++		return ret;
+ 
+ 	ret = smu_v13_0_7_store_powerplay_table(smu);
+ 	if (ret)
+diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
+index a92ffde53f0b3..db2f847c8535f 100644
+--- a/drivers/gpu/drm/msm/msm_rd.c
++++ b/drivers/gpu/drm/msm/msm_rd.c
+@@ -196,6 +196,9 @@ static int rd_open(struct inode *inode, struct file *file)
+ 	file->private_data = rd;
+ 	rd->open = true;
+ 
++	/* Reset fifo to clear any previously unread data: */
++	rd->fifo.head = rd->fifo.tail = 0;
++
+ 	/* the parsing tools need to know gpu-id to know which
+ 	 * register database to load.
+ 	 *
+diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.h b/drivers/hid/intel-ish-hid/ishtp-hid.h
+index 6a5cc11aefd89..35dddc5015b37 100644
+--- a/drivers/hid/intel-ish-hid/ishtp-hid.h
++++ b/drivers/hid/intel-ish-hid/ishtp-hid.h
+@@ -105,7 +105,7 @@ struct report_list {
+  * @multi_packet_cnt:	Count of fragmented packet count
+  *
+  * This structure is used to store completion flags and per client data like
+- * like report description, number of HID devices etc.
++ * report description, number of HID devices etc.
+  */
+ struct ishtp_cl_data {
+ 	/* completion flags */
+diff --git a/drivers/hid/intel-ish-hid/ishtp/client.c b/drivers/hid/intel-ish-hid/ishtp/client.c
+index 405e0d5212cc8..df0a825694f52 100644
+--- a/drivers/hid/intel-ish-hid/ishtp/client.c
++++ b/drivers/hid/intel-ish-hid/ishtp/client.c
+@@ -626,13 +626,14 @@ static void ishtp_cl_read_complete(struct ishtp_cl_rb *rb)
+ }
+ 
+ /**
+- * ipc_tx_callback() - IPC tx callback function
++ * ipc_tx_send() - IPC tx send function
+  * @prm: Pointer to client device instance
+  *
+- * Send message over IPC either first time or on callback on previous message
+- * completion
++ * Send message over IPC. Message will be split into fragments
++ * if message size is bigger than IPC FIFO size, and all
++ * fragments will be sent one by one.
+  */
+-static void ipc_tx_callback(void *prm)
++static void ipc_tx_send(void *prm)
+ {
+ 	struct ishtp_cl	*cl = prm;
+ 	struct ishtp_cl_tx_ring	*cl_msg;
+@@ -677,32 +678,41 @@ static void ipc_tx_callback(void *prm)
+ 			    list);
+ 	rem = cl_msg->send_buf.size - cl->tx_offs;
+ 
+-	ishtp_hdr.host_addr = cl->host_client_id;
+-	ishtp_hdr.fw_addr = cl->fw_client_id;
+-	ishtp_hdr.reserved = 0;
+-	pmsg = cl_msg->send_buf.data + cl->tx_offs;
++	while (rem > 0) {
++		ishtp_hdr.host_addr = cl->host_client_id;
++		ishtp_hdr.fw_addr = cl->fw_client_id;
++		ishtp_hdr.reserved = 0;
++		pmsg = cl_msg->send_buf.data + cl->tx_offs;
++
++		if (rem <= dev->mtu) {
++			/* Last fragment or only one packet */
++			ishtp_hdr.length = rem;
++			ishtp_hdr.msg_complete = 1;
++			/* Submit to IPC queue with no callback */
++			ishtp_write_message(dev, &ishtp_hdr, pmsg);
++			cl->tx_offs = 0;
++			cl->sending = 0;
+ 
+-	if (rem <= dev->mtu) {
+-		ishtp_hdr.length = rem;
+-		ishtp_hdr.msg_complete = 1;
+-		cl->sending = 0;
+-		list_del_init(&cl_msg->list);	/* Must be before write */
+-		spin_unlock_irqrestore(&cl->tx_list_spinlock, tx_flags);
+-		/* Submit to IPC queue with no callback */
+-		ishtp_write_message(dev, &ishtp_hdr, pmsg);
+-		spin_lock_irqsave(&cl->tx_free_list_spinlock, tx_free_flags);
+-		list_add_tail(&cl_msg->list, &cl->tx_free_list.list);
+-		++cl->tx_ring_free_size;
+-		spin_unlock_irqrestore(&cl->tx_free_list_spinlock,
+-			tx_free_flags);
+-	} else {
+-		/* Send IPC fragment */
+-		spin_unlock_irqrestore(&cl->tx_list_spinlock, tx_flags);
+-		cl->tx_offs += dev->mtu;
+-		ishtp_hdr.length = dev->mtu;
+-		ishtp_hdr.msg_complete = 0;
+-		ishtp_send_msg(dev, &ishtp_hdr, pmsg, ipc_tx_callback, cl);
++			break;
++		} else {
++			/* Send ipc fragment */
++			ishtp_hdr.length = dev->mtu;
++			ishtp_hdr.msg_complete = 0;
++			/* All fregments submitted to IPC queue with no callback */
++			ishtp_write_message(dev, &ishtp_hdr, pmsg);
++			cl->tx_offs += dev->mtu;
++			rem = cl_msg->send_buf.size - cl->tx_offs;
++		}
+ 	}
++
++	list_del_init(&cl_msg->list);
++	spin_unlock_irqrestore(&cl->tx_list_spinlock, tx_flags);
++
++	spin_lock_irqsave(&cl->tx_free_list_spinlock, tx_free_flags);
++	list_add_tail(&cl_msg->list, &cl->tx_free_list.list);
++	++cl->tx_ring_free_size;
++	spin_unlock_irqrestore(&cl->tx_free_list_spinlock,
++		tx_free_flags);
+ }
+ 
+ /**
+@@ -720,7 +730,7 @@ static void ishtp_cl_send_msg_ipc(struct ishtp_device *dev,
+ 		return;
+ 
+ 	cl->tx_offs = 0;
+-	ipc_tx_callback(cl);
++	ipc_tx_send(cl);
+ 	++cl->send_msg_cnt_ipc;
+ }
+ 
+diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
+index d003ad864ee44..a6e5d350a94ce 100644
+--- a/drivers/infiniband/hw/irdma/uk.c
++++ b/drivers/infiniband/hw/irdma/uk.c
+@@ -497,7 +497,8 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
+ 			      FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
+ 		i = 0;
+ 	} else {
+-		qp->wqe_ops.iw_set_fragment(wqe, 0, op_info->sg_list,
++		qp->wqe_ops.iw_set_fragment(wqe, 0,
++					    frag_cnt ? op_info->sg_list : NULL,
+ 					    qp->swqe_polarity);
+ 		i = 1;
+ 	}
+diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
+index 08371a80fdc26..be189e0525de6 100644
+--- a/drivers/infiniband/hw/mlx5/cq.c
++++ b/drivers/infiniband/hw/mlx5/cq.c
+@@ -523,6 +523,10 @@ repoll:
+ 			    "Requestor" : "Responder", cq->mcq.cqn);
+ 		mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n",
+ 			    err_cqe->syndrome, err_cqe->vendor_err_synd);
++		if (wc->status != IB_WC_WR_FLUSH_ERR &&
++		    (*cur_qp)->type == MLX5_IB_QPT_REG_UMR)
++			dev->umrc.state = MLX5_UMR_STATE_RECOVER;
++
+ 		if (opcode == MLX5_CQE_REQ_ERR) {
+ 			wq = &(*cur_qp)->sq;
+ 			wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
+diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
+index 63c89a72cc352..bb13164124fdb 100644
+--- a/drivers/infiniband/hw/mlx5/main.c
++++ b/drivers/infiniband/hw/mlx5/main.c
+@@ -4336,7 +4336,7 @@ static int mlx5r_probe(struct auxiliary_device *adev,
+ 	dev->mdev = mdev;
+ 	dev->num_ports = num_ports;
+ 
+-	if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_init_enabled(mdev))
++	if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev))
+ 		profile = &raw_eth_profile;
+ 	else
+ 		profile = &pf_profile;
+diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+index 998b67509a533..c2cca032a6ed4 100644
+--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
++++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
+@@ -717,13 +717,24 @@ struct mlx5_ib_umr_context {
+ 	struct completion	done;
+ };
+ 
++enum {
++	MLX5_UMR_STATE_UNINIT,
++	MLX5_UMR_STATE_ACTIVE,
++	MLX5_UMR_STATE_RECOVER,
++	MLX5_UMR_STATE_ERR,
++};
++
+ struct umr_common {
+ 	struct ib_pd	*pd;
+ 	struct ib_cq	*cq;
+ 	struct ib_qp	*qp;
+-	/* control access to UMR QP
++	/* Protects from UMR QP overflow
+ 	 */
+ 	struct semaphore	sem;
++	/* Protects from using UMR while the UMR is not active
++	 */
++	struct mutex lock;
++	unsigned int state;
+ };
+ 
+ struct mlx5_cache_ent {
+diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
+index 3a48364c09181..d5105b5c9979b 100644
+--- a/drivers/infiniband/hw/mlx5/umr.c
++++ b/drivers/infiniband/hw/mlx5/umr.c
+@@ -176,6 +176,8 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
+ 	dev->umrc.pd = pd;
+ 
+ 	sema_init(&dev->umrc.sem, MAX_UMR_WR);
++	mutex_init(&dev->umrc.lock);
++	dev->umrc.state = MLX5_UMR_STATE_ACTIVE;
+ 
+ 	return 0;
+ 
+@@ -190,11 +192,38 @@ destroy_pd:
+ 
+ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
+ {
++	if (dev->umrc.state == MLX5_UMR_STATE_UNINIT)
++		return;
+ 	ib_destroy_qp(dev->umrc.qp);
+ 	ib_free_cq(dev->umrc.cq);
+ 	ib_dealloc_pd(dev->umrc.pd);
+ }
+ 
++static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
++{
++	struct umr_common *umrc = &dev->umrc;
++	struct ib_qp_attr attr;
++	int err;
++
++	attr.qp_state = IB_QPS_RESET;
++	err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
++	if (err) {
++		mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
++		goto err;
++	}
++
++	err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
++	if (err)
++		goto err;
++
++	umrc->state = MLX5_UMR_STATE_ACTIVE;
++	return 0;
++
++err:
++	umrc->state = MLX5_UMR_STATE_ERR;
++	return err;
++}
++
+ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
+ 			       struct mlx5r_umr_wqe *wqe, bool with_data)
+ {
+@@ -231,7 +260,7 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
+ 
+ 	id.ib_cqe = cqe;
+ 	mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
+-			 MLX5_FENCE_MODE_NONE, MLX5_OPCODE_UMR);
++			 MLX5_FENCE_MODE_INITIATOR_SMALL, MLX5_OPCODE_UMR);
+ 
+ 	mlx5r_ring_db(qp, 1, ctrl);
+ 
+@@ -270,17 +299,49 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
+ 	mlx5r_umr_init_context(&umr_context);
+ 
+ 	down(&umrc->sem);
+-	err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
+-				  with_data);
+-	if (err)
+-		mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
+-	else {
+-		wait_for_completion(&umr_context.done);
+-		if (umr_context.status != IB_WC_SUCCESS) {
+-			mlx5_ib_warn(dev, "reg umr failed (%u)\n",
+-				     umr_context.status);
++	while (true) {
++		mutex_lock(&umrc->lock);
++		if (umrc->state == MLX5_UMR_STATE_ERR) {
++			mutex_unlock(&umrc->lock);
+ 			err = -EFAULT;
++			break;
++		}
++
++		if (umrc->state == MLX5_UMR_STATE_RECOVER) {
++			mutex_unlock(&umrc->lock);
++			usleep_range(3000, 5000);
++			continue;
++		}
++
++		err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
++					  with_data);
++		mutex_unlock(&umrc->lock);
++		if (err) {
++			mlx5_ib_warn(dev, "UMR post send failed, err %d\n",
++				     err);
++			break;
+ 		}
++
++		wait_for_completion(&umr_context.done);
++
++		if (umr_context.status == IB_WC_SUCCESS)
++			break;
++
++		if (umr_context.status == IB_WC_WR_FLUSH_ERR)
++			continue;
++
++		WARN_ON_ONCE(1);
++		mlx5_ib_warn(dev,
++			"reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n",
++			umr_context.status);
++		mutex_lock(&umrc->lock);
++		err = mlx5r_umr_recover(dev);
++		mutex_unlock(&umrc->lock);
++		if (err)
++			mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
++				     err);
++		err = -EFAULT;
++		break;
+ 	}
+ 	up(&umrc->sem);
+ 	return err;
+diff --git a/drivers/input/joystick/iforce/iforce-main.c b/drivers/input/joystick/iforce/iforce-main.c
+index b2a68bc9f0b4d..b86de1312512b 100644
+--- a/drivers/input/joystick/iforce/iforce-main.c
++++ b/drivers/input/joystick/iforce/iforce-main.c
+@@ -50,6 +50,7 @@ static struct iforce_device iforce_device[] = {
+ 	{ 0x046d, 0xc291, "Logitech WingMan Formula Force",		btn_wheel, abs_wheel, ff_iforce },
+ 	{ 0x05ef, 0x020a, "AVB Top Shot Pegasus",			btn_joystick_avb, abs_avb_pegasus, ff_iforce },
+ 	{ 0x05ef, 0x8884, "AVB Mag Turbo Force",			btn_wheel, abs_wheel, ff_iforce },
++	{ 0x05ef, 0x8886, "Boeder Force Feedback Wheel",		btn_wheel, abs_wheel, ff_iforce },
+ 	{ 0x05ef, 0x8888, "AVB Top Shot Force Feedback Racing Wheel",	btn_wheel, abs_wheel, ff_iforce }, //?
+ 	{ 0x061c, 0xc0a4, "ACT LABS Force RS",                          btn_wheel, abs_wheel, ff_iforce }, //?
+ 	{ 0x061c, 0xc084, "ACT LABS Force RS",				btn_wheel, abs_wheel, ff_iforce },
+diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
+index 40ac3a78d90ef..c0464959cbcdb 100644
+--- a/drivers/iommu/intel/iommu.c
++++ b/drivers/iommu/intel/iommu.c
+@@ -168,38 +168,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
+ 	return re->hi & VTD_PAGE_MASK;
+ }
+ 
+-static inline void context_clear_pasid_enable(struct context_entry *context)
+-{
+-	context->lo &= ~(1ULL << 11);
+-}
+-
+-static inline bool context_pasid_enabled(struct context_entry *context)
+-{
+-	return !!(context->lo & (1ULL << 11));
+-}
+-
+-static inline void context_set_copied(struct context_entry *context)
+-{
+-	context->hi |= (1ull << 3);
+-}
+-
+-static inline bool context_copied(struct context_entry *context)
+-{
+-	return !!(context->hi & (1ULL << 3));
+-}
+-
+-static inline bool __context_present(struct context_entry *context)
+-{
+-	return (context->lo & 1);
+-}
+-
+-bool context_present(struct context_entry *context)
+-{
+-	return context_pasid_enabled(context) ?
+-	     __context_present(context) :
+-	     __context_present(context) && !context_copied(context);
+-}
+-
+ static inline void context_set_present(struct context_entry *context)
+ {
+ 	context->lo |= 1;
+@@ -247,6 +215,26 @@ static inline void context_clear_entry(struct context_entry *context)
+ 	context->hi = 0;
+ }
+ 
++static inline bool context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
++{
++	if (!iommu->copied_tables)
++		return false;
++
++	return test_bit(((long)bus << 8) | devfn, iommu->copied_tables);
++}
++
++static inline void
++set_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
++{
++	set_bit(((long)bus << 8) | devfn, iommu->copied_tables);
++}
++
++static inline void
++clear_context_copied(struct intel_iommu *iommu, u8 bus, u8 devfn)
++{
++	clear_bit(((long)bus << 8) | devfn, iommu->copied_tables);
++}
++
+ /*
+  * This domain is a statically identity mapping domain.
+  *	1. This domain creats a static 1:1 mapping to all usable memory.
+@@ -644,6 +632,13 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+ 	struct context_entry *context;
+ 	u64 *entry;
+ 
++	/*
++	 * Except that the caller requested to allocate a new entry,
++	 * returning a copied context entry makes no sense.
++	 */
++	if (!alloc && context_copied(iommu, bus, devfn))
++		return NULL;
++
+ 	entry = &root->lo;
+ 	if (sm_supported(iommu)) {
+ 		if (devfn >= 0x80) {
+@@ -1770,6 +1765,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
+ 		iommu->domain_ids = NULL;
+ 	}
+ 
++	if (iommu->copied_tables) {
++		bitmap_free(iommu->copied_tables);
++		iommu->copied_tables = NULL;
++	}
++
+ 	g_iommus[iommu->seq_id] = NULL;
+ 
+ 	/* free context mapping */
+@@ -1978,7 +1978,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
+ 		goto out_unlock;
+ 
+ 	ret = 0;
+-	if (context_present(context))
++	if (context_present(context) && !context_copied(iommu, bus, devfn))
+ 		goto out_unlock;
+ 
+ 	/*
+@@ -1990,7 +1990,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
+ 	 * in-flight DMA will exist, and we don't need to worry anymore
+ 	 * hereafter.
+ 	 */
+-	if (context_copied(context)) {
++	if (context_copied(iommu, bus, devfn)) {
+ 		u16 did_old = context_domain_id(context);
+ 
+ 		if (did_old < cap_ndoms(iommu->cap)) {
+@@ -2001,6 +2001,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
+ 			iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
+ 						 DMA_TLB_DSI_FLUSH);
+ 		}
++
++		clear_context_copied(iommu, bus, devfn);
+ 	}
+ 
+ 	context_clear_entry(context);
+@@ -2783,32 +2785,14 @@ static int copy_context_table(struct intel_iommu *iommu,
+ 		/* Now copy the context entry */
+ 		memcpy(&ce, old_ce + idx, sizeof(ce));
+ 
+-		if (!__context_present(&ce))
++		if (!context_present(&ce))
+ 			continue;
+ 
+ 		did = context_domain_id(&ce);
+ 		if (did >= 0 && did < cap_ndoms(iommu->cap))
+ 			set_bit(did, iommu->domain_ids);
+ 
+-		/*
+-		 * We need a marker for copied context entries. This
+-		 * marker needs to work for the old format as well as
+-		 * for extended context entries.
+-		 *
+-		 * Bit 67 of the context entry is used. In the old
+-		 * format this bit is available to software, in the
+-		 * extended format it is the PGE bit, but PGE is ignored
+-		 * by HW if PASIDs are disabled (and thus still
+-		 * available).
+-		 *
+-		 * So disable PASIDs first and then mark the entry
+-		 * copied. This means that we don't copy PASID
+-		 * translations from the old kernel, but this is fine as
+-		 * faults there are not fatal.
+-		 */
+-		context_clear_pasid_enable(&ce);
+-		context_set_copied(&ce);
+-
++		set_context_copied(iommu, bus, devfn);
+ 		new_ce[idx] = ce;
+ 	}
+ 
+@@ -2835,8 +2819,8 @@ static int copy_translation_tables(struct intel_iommu *iommu)
+ 	bool new_ext, ext;
+ 
+ 	rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
+-	ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
+-	new_ext    = !!ecap_ecs(iommu->ecap);
++	ext        = !!(rtaddr_reg & DMA_RTADDR_SMT);
++	new_ext    = !!sm_supported(iommu);
+ 
+ 	/*
+ 	 * The RTT bit can only be changed when translation is disabled,
+@@ -2847,6 +2831,10 @@ static int copy_translation_tables(struct intel_iommu *iommu)
+ 	if (new_ext != ext)
+ 		return -EINVAL;
+ 
++	iommu->copied_tables = bitmap_zalloc(BIT_ULL(16), GFP_KERNEL);
++	if (!iommu->copied_tables)
++		return -ENOMEM;
++
+ 	old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
+ 	if (!old_rt_phys)
+ 		return -EINVAL;
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
+index c28f8cc00d1cf..a9cc85882b315 100644
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -18076,16 +18076,20 @@ static void tg3_shutdown(struct pci_dev *pdev)
+ 	struct net_device *dev = pci_get_drvdata(pdev);
+ 	struct tg3 *tp = netdev_priv(dev);
+ 
++	tg3_reset_task_cancel(tp);
++
+ 	rtnl_lock();
++
+ 	netif_device_detach(dev);
+ 
+ 	if (netif_running(dev))
+ 		dev_close(dev);
+ 
+-	if (system_state == SYSTEM_POWER_OFF)
+-		tg3_power_down(tp);
++	tg3_power_down(tp);
+ 
+ 	rtnl_unlock();
++
++	pci_disable_device(pdev);
+ }
+ 
+ /**
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+index cfb8bedba5124..079fa44ada71e 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+@@ -289,6 +289,10 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id)
+ 				       sw_owner_id[i]);
+ 	}
+ 
++	if (MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) &&
++	    dev->priv.sw_vhca_id > 0)
++		MLX5_SET(init_hca_in, in, sw_vhca_id, dev->priv.sw_vhca_id);
++
+ 	return mlx5_cmd_exec_in(dev, init_hca, in);
+ }
+ 
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index 616207c3b187a..6c8bb74bd8fc6 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -90,6 +90,8 @@ module_param_named(prof_sel, prof_sel, uint, 0444);
+ MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
+ 
+ static u32 sw_owner_id[4];
++#define MAX_SW_VHCA_ID (BIT(__mlx5_bit_sz(cmd_hca_cap_2, sw_vhca_id)) - 1)
++static DEFINE_IDA(sw_vhca_ida);
+ 
+ enum {
+ 	MLX5_ATOMIC_REQ_MODE_BE = 0x0,
+@@ -499,6 +501,49 @@ static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
+ 	return err;
+ }
+ 
++bool mlx5_is_roce_on(struct mlx5_core_dev *dev)
++{
++	struct devlink *devlink = priv_to_devlink(dev);
++	union devlink_param_value val;
++	int err;
++
++	err = devlink_param_driverinit_value_get(devlink,
++						 DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
++						 &val);
++
++	if (!err)
++		return val.vbool;
++
++	mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
++	return MLX5_CAP_GEN(dev, roce);
++}
++EXPORT_SYMBOL(mlx5_is_roce_on);
++
++static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx)
++{
++	void *set_hca_cap;
++	int err;
++
++	if (!MLX5_CAP_GEN_MAX(dev, hca_cap_2))
++		return 0;
++
++	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2);
++	if (err)
++		return err;
++
++	if (!MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) ||
++	    !(dev->priv.sw_vhca_id > 0))
++		return 0;
++
++	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
++				   capability);
++	memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur,
++	       MLX5_ST_SZ_BYTES(cmd_hca_cap_2));
++	MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1);
++
++	return set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2);
++}
++
+ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
+ {
+ 	struct mlx5_profile *prof = &dev->profile;
+@@ -577,7 +622,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
+ 			 MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
+ 
+ 	if (MLX5_CAP_GEN(dev, roce_rw_supported))
+-		MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev));
++		MLX5_SET(cmd_hca_cap, set_hca_cap, roce,
++			 mlx5_is_roce_on(dev));
+ 
+ 	max_uc_list = max_uc_list_get_devlink_param(dev);
+ 	if (max_uc_list > 0)
+@@ -603,7 +649,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
+  */
+ static bool is_roce_fw_disabled(struct mlx5_core_dev *dev)
+ {
+-	return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) ||
++	return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) ||
+ 		(!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce));
+ }
+ 
+@@ -669,6 +715,13 @@ static int set_hca_cap(struct mlx5_core_dev *dev)
+ 		goto out;
+ 	}
+ 
++	memset(set_ctx, 0, set_sz);
++	err = handle_hca_cap_2(dev, set_ctx);
++	if (err) {
++		mlx5_core_err(dev, "handle_hca_cap_2 failed\n");
++		goto out;
++	}
++
+ out:
+ 	kfree(set_ctx);
+ 	return err;
+@@ -1512,6 +1565,18 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
+ 	if (err)
+ 		goto err_hca_caps;
+ 
++	/* The conjunction of sw_vhca_id with sw_owner_id will be a global
++	 * unique id per function which uses mlx5_core.
++	 * Those values are supplied to FW as part of the init HCA command to
++	 * be used by both driver and FW when it's applicable.
++	 */
++	dev->priv.sw_vhca_id = ida_alloc_range(&sw_vhca_ida, 1,
++					       MAX_SW_VHCA_ID,
++					       GFP_KERNEL);
++	if (dev->priv.sw_vhca_id < 0)
++		mlx5_core_err(dev, "failed to allocate sw_vhca_id, err=%d\n",
++			      dev->priv.sw_vhca_id);
++
+ 	return 0;
+ 
+ err_hca_caps:
+@@ -1537,6 +1602,9 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
+ {
+ 	struct mlx5_priv *priv = &dev->priv;
+ 
++	if (priv->sw_vhca_id > 0)
++		ida_free(&sw_vhca_ida, dev->priv.sw_vhca_id);
++
+ 	mlx5_hca_caps_free(dev);
+ 	mlx5_adev_cleanup(dev);
+ 	mlx5_pagealloc_cleanup(dev);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+index ac020cb780727..d5c3173250309 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+@@ -1086,9 +1086,17 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
+ 		goto free;
+ 
+ 	MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1);
+-	MLX5_SET(modify_nic_vport_context_in, in,
+-		 nic_vport_context.affiliated_vhca_id,
+-		 MLX5_CAP_GEN(master_mdev, vhca_id));
++	if (MLX5_CAP_GEN_2(master_mdev, sw_vhca_id_valid)) {
++		MLX5_SET(modify_nic_vport_context_in, in,
++			 nic_vport_context.vhca_id_type, VHCA_ID_TYPE_SW);
++		MLX5_SET(modify_nic_vport_context_in, in,
++			 nic_vport_context.affiliated_vhca_id,
++			 MLX5_CAP_GEN_2(master_mdev, sw_vhca_id));
++	} else {
++		MLX5_SET(modify_nic_vport_context_in, in,
++			 nic_vport_context.affiliated_vhca_id,
++			 MLX5_CAP_GEN(master_mdev, vhca_id));
++	}
+ 	MLX5_SET(modify_nic_vport_context_in, in,
+ 		 nic_vport_context.affiliation_criteria,
+ 		 MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria));
+diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c
+index 1e1f40f628a02..c69b87d3837da 100644
+--- a/drivers/net/ieee802154/cc2520.c
++++ b/drivers/net/ieee802154/cc2520.c
+@@ -504,6 +504,7 @@ cc2520_tx(struct ieee802154_hw *hw, struct sk_buff *skb)
+ 		goto err_tx;
+ 
+ 	if (status & CC2520_STATUS_TX_UNDERFLOW) {
++		rc = -EINVAL;
+ 		dev_err(&priv->spi->dev, "cc2520 tx underflow exception\n");
+ 		goto err_tx;
+ 	}
+diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
+index 2de09ad5bac03..e11f70911acc1 100644
+--- a/drivers/net/usb/cdc_ether.c
++++ b/drivers/net/usb/cdc_ether.c
+@@ -777,6 +777,13 @@ static const struct usb_device_id	products[] = {
+ },
+ #endif
+ 
++/* Lenovo ThinkPad OneLink+ Dock (based on Realtek RTL8153) */
++{
++	USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3054, USB_CLASS_COMM,
++			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
++	.driver_info = 0,
++},
++
+ /* ThinkPad USB-C Dock (based on Realtek RTL8153) */
+ {
+ 	USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x3062, USB_CLASS_COMM,
+diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
+index d142ac8fcf6e2..688905ea0a6d3 100644
+--- a/drivers/net/usb/r8152.c
++++ b/drivers/net/usb/r8152.c
+@@ -770,6 +770,7 @@ enum rtl8152_flags {
+ 	RX_EPROTO,
+ };
+ 
++#define DEVICE_ID_THINKPAD_ONELINK_PLUS_DOCK		0x3054
+ #define DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2	0x3082
+ #define DEVICE_ID_THINKPAD_USB_C_DONGLE			0x720c
+ #define DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2		0xa387
+@@ -9581,6 +9582,7 @@ static bool rtl8152_supports_lenovo_macpassthru(struct usb_device *udev)
+ 
+ 	if (vendor_id == VENDOR_ID_LENOVO) {
+ 		switch (product_id) {
++		case DEVICE_ID_THINKPAD_ONELINK_PLUS_DOCK:
+ 		case DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2:
+ 		case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2:
+ 		case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN3:
+@@ -9828,6 +9830,7 @@ static const struct usb_device_id rtl8152_table[] = {
+ 	REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x0927),
+ 	REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101),
+ 	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x304f),
++	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3054),
+ 	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3062),
+ 	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3069),
+ 	REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3082),
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index 73d9fcba3b1c0..9f6614f7dbeb1 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -3517,6 +3517,8 @@ static const struct pci_device_id nvme_id_table[] = {
+ 		.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
+ 	{ PCI_DEVICE(0xc0a9, 0x540a),   /* Crucial P2 */
+ 		.driver_data = NVME_QUIRK_BOGUS_NID, },
++	{ PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */
++		.driver_data = NVME_QUIRK_BOGUS_NID, },
+ 	{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
+ 		.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
+ 	{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
+diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
+index dc3b4dc8fe08b..a3694a32f6d52 100644
+--- a/drivers/nvme/target/tcp.c
++++ b/drivers/nvme/target/tcp.c
+@@ -1506,6 +1506,9 @@ static void nvmet_tcp_state_change(struct sock *sk)
+ 		goto done;
+ 
+ 	switch (sk->sk_state) {
++	case TCP_FIN_WAIT2:
++	case TCP_LAST_ACK:
++		break;
+ 	case TCP_FIN_WAIT1:
+ 	case TCP_CLOSE_WAIT:
+ 	case TCP_CLOSE:
+diff --git a/drivers/peci/cpu.c b/drivers/peci/cpu.c
+index 68eb61c65d345..de4a7b3e5966e 100644
+--- a/drivers/peci/cpu.c
++++ b/drivers/peci/cpu.c
+@@ -188,8 +188,6 @@ static void adev_release(struct device *dev)
+ {
+ 	struct auxiliary_device *adev = to_auxiliary_dev(dev);
+ 
+-	auxiliary_device_uninit(adev);
+-
+ 	kfree(adev->name);
+ 	kfree(adev);
+ }
+@@ -234,6 +232,7 @@ static void unregister_adev(void *_adev)
+ 	struct auxiliary_device *adev = _adev;
+ 
+ 	auxiliary_device_delete(adev);
++	auxiliary_device_uninit(adev);
+ }
+ 
+ static int devm_adev_add(struct device *dev, int idx)
+diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
+index 513de1f54e2d7..933b96e243b84 100644
+--- a/drivers/perf/arm_pmu_platform.c
++++ b/drivers/perf/arm_pmu_platform.c
+@@ -117,7 +117,7 @@ static int pmu_parse_irqs(struct arm_pmu *pmu)
+ 
+ 	if (num_irqs == 1) {
+ 		int irq = platform_get_irq(pdev, 0);
+-		if (irq && irq_is_percpu_devid(irq))
++		if ((irq > 0) && irq_is_percpu_devid(irq))
+ 			return pmu_parse_percpu_irq(pmu, irq);
+ 	}
+ 
+diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
+index ce2bd88feeaa8..08019c6ccc9ca 100644
+--- a/drivers/platform/surface/surface_aggregator_registry.c
++++ b/drivers/platform/surface/surface_aggregator_registry.c
+@@ -556,6 +556,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = {
+ 	/* Surface Laptop Go 1 */
+ 	{ "MSHW0118", (unsigned long)ssam_node_group_slg1 },
+ 
++	/* Surface Laptop Go 2 */
++	{ "MSHW0290", (unsigned long)ssam_node_group_slg1 },
++
+ 	/* Surface Laptop Studio */
+ 	{ "MSHW0123", (unsigned long)ssam_node_group_sls },
+ 
+diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
+index 9c6943e401a6c..0fbcaffabbfc7 100644
+--- a/drivers/platform/x86/acer-wmi.c
++++ b/drivers/platform/x86/acer-wmi.c
+@@ -99,6 +99,7 @@ static const struct key_entry acer_wmi_keymap[] __initconst = {
+ 	{KE_KEY, 0x22, {KEY_PROG2} },    /* Arcade */
+ 	{KE_KEY, 0x23, {KEY_PROG3} },    /* P_Key */
+ 	{KE_KEY, 0x24, {KEY_PROG4} },    /* Social networking_Key */
++	{KE_KEY, 0x27, {KEY_HELP} },
+ 	{KE_KEY, 0x29, {KEY_PROG3} },    /* P_Key for TM8372 */
+ 	{KE_IGNORE, 0x41, {KEY_MUTE} },
+ 	{KE_IGNORE, 0x42, {KEY_PREVIOUSSONG} },
+@@ -112,7 +113,13 @@ static const struct key_entry acer_wmi_keymap[] __initconst = {
+ 	{KE_IGNORE, 0x48, {KEY_VOLUMEUP} },
+ 	{KE_IGNORE, 0x49, {KEY_VOLUMEDOWN} },
+ 	{KE_IGNORE, 0x4a, {KEY_VOLUMEDOWN} },
+-	{KE_IGNORE, 0x61, {KEY_SWITCHVIDEOMODE} },
++	/*
++	 * 0x61 is KEY_SWITCHVIDEOMODE. Usually this is a duplicate input event
++	 * with the "Video Bus" input device events. But sometimes it is not
++	 * a dup. Map it to KEY_UNKNOWN instead of using KE_IGNORE so that
++	 * udev/hwdb can override it on systems where it is not a dup.
++	 */
++	{KE_KEY, 0x61, {KEY_UNKNOWN} },
+ 	{KE_IGNORE, 0x62, {KEY_BRIGHTNESSUP} },
+ 	{KE_IGNORE, 0x63, {KEY_BRIGHTNESSDOWN} },
+ 	{KE_KEY, 0x64, {KEY_SWITCHVIDEOMODE} },	/* Display Switch */
+diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
+index 62ce198a34631..a0f31624aee97 100644
+--- a/drivers/platform/x86/asus-wmi.c
++++ b/drivers/platform/x86/asus-wmi.c
+@@ -107,7 +107,7 @@ module_param(fnlock_default, bool, 0444);
+ #define WMI_EVENT_MASK			0xFFFF
+ 
+ #define FAN_CURVE_POINTS		8
+-#define FAN_CURVE_BUF_LEN		(FAN_CURVE_POINTS * 2)
++#define FAN_CURVE_BUF_LEN		32
+ #define FAN_CURVE_DEV_CPU		0x00
+ #define FAN_CURVE_DEV_GPU		0x01
+ /* Mask to determine if setting temperature or percentage */
+@@ -2208,8 +2208,10 @@ static int fan_curve_get_factory_default(struct asus_wmi *asus, u32 fan_dev)
+ 	curves = &asus->custom_fan_curves[fan_idx];
+ 	err = asus_wmi_evaluate_method_buf(asus->dsts_id, fan_dev, mode, buf,
+ 					   FAN_CURVE_BUF_LEN);
+-	if (err)
++	if (err) {
++		pr_warn("%s (0x%08x) failed: %d\n", __func__, fan_dev, err);
+ 		return err;
++	}
+ 
+ 	fan_curve_copy_from_buf(curves, buf);
+ 	curves->device_id = fan_dev;
+@@ -2227,9 +2229,6 @@ static int fan_curve_check_present(struct asus_wmi *asus, bool *available,
+ 
+ 	err = fan_curve_get_factory_default(asus, fan_dev);
+ 	if (err) {
+-		pr_debug("fan_curve_get_factory_default(0x%08x) failed: %d\n",
+-			 fan_dev, err);
+-		/* Don't cause probe to fail on devices without fan-curves */
+ 		return 0;
+ 	}
+ 
+diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
+index 4051c8cd0cd8a..23ab3b048d9be 100644
+--- a/drivers/usb/storage/unusual_uas.h
++++ b/drivers/usb/storage/unusual_uas.h
+@@ -62,6 +62,13 @@ UNUSUAL_DEV(0x0984, 0x0301, 0x0128, 0x0128,
+ 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+ 		US_FL_IGNORE_UAS),
+ 
++/* Reported-by: Tom Hu <huxiaoying@kylinos.cn> */
++UNUSUAL_DEV(0x0b05, 0x1932, 0x0000, 0x9999,
++		"ASUS",
++		"External HDD",
++		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
++		US_FL_IGNORE_UAS),
++
+ /* Reported-by: David Webb <djw@noc.ac.uk> */
+ UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999,
+ 		"Seagate",
+diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
+index 5fcf89faa31ab..d72626d71258f 100644
+--- a/include/linux/intel-iommu.h
++++ b/include/linux/intel-iommu.h
+@@ -196,7 +196,6 @@
+ #define ecap_dis(e)		(((e) >> 27) & 0x1)
+ #define ecap_nest(e)		(((e) >> 26) & 0x1)
+ #define ecap_mts(e)		(((e) >> 25) & 0x1)
+-#define ecap_ecs(e)		(((e) >> 24) & 0x1)
+ #define ecap_iotlb_offset(e) 	((((e) >> 8) & 0x3ff) * 16)
+ #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16)
+ #define ecap_coherent(e)	((e) & 0x1)
+@@ -264,7 +263,6 @@
+ #define DMA_GSTS_CFIS (((u32)1) << 23)
+ 
+ /* DMA_RTADDR_REG */
+-#define DMA_RTADDR_RTT (((u64)1) << 11)
+ #define DMA_RTADDR_SMT (((u64)1) << 10)
+ 
+ /* CCMD_REG */
+@@ -579,6 +577,7 @@ struct intel_iommu {
+ 
+ #ifdef CONFIG_INTEL_IOMMU
+ 	unsigned long 	*domain_ids; /* bitmap of domains */
++	unsigned long	*copied_tables; /* bitmap of copied tables */
+ 	spinlock_t	lock; /* protect context, domain ids */
+ 	struct root_entry *root_entry; /* virtual address */
+ 
+@@ -692,6 +691,11 @@ static inline int nr_pte_to_next_page(struct dma_pte *pte)
+ 		(struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte;
+ }
+ 
++static inline bool context_present(struct context_entry *context)
++{
++	return (context->lo & 1);
++}
++
+ extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
+ 
+ extern int dmar_enable_qi(struct intel_iommu *iommu);
+@@ -776,7 +780,6 @@ static inline void intel_iommu_debugfs_init(void) {}
+ #endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
+ 
+ extern const struct attribute_group *intel_iommu_groups[];
+-bool context_present(struct context_entry *context);
+ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+ 					 u8 devfn, int alloc);
+ 
+diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
+index b0b4ac92354a2..b3ea245faa515 100644
+--- a/include/linux/mlx5/driver.h
++++ b/include/linux/mlx5/driver.h
+@@ -606,6 +606,7 @@ struct mlx5_priv {
+ 	spinlock_t              ctx_lock;
+ 	struct mlx5_adev       **adev;
+ 	int			adev_idx;
++	int			sw_vhca_id;
+ 	struct mlx5_events      *events;
+ 
+ 	struct mlx5_flow_steering *steering;
+@@ -1274,16 +1275,17 @@ enum {
+ 	MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
+ };
+ 
+-static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev)
++bool mlx5_is_roce_on(struct mlx5_core_dev *dev);
++
++static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev)
+ {
+-	struct devlink *devlink = priv_to_devlink(dev);
+-	union devlink_param_value val;
+-	int err;
+-
+-	err = devlink_param_driverinit_value_get(devlink,
+-						 DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+-						 &val);
+-	return err ? MLX5_CAP_GEN(dev, roce) : val.vbool;
++	if (MLX5_CAP_GEN(dev, roce_rw_supported))
++		return MLX5_CAP_GEN(dev, roce);
++
++	/* If RoCE cap is read-only in FW, get RoCE state from devlink
++	 * in order to support RoCE enable/disable feature
++	 */
++	return mlx5_is_roce_on(dev);
+ }
+ 
+ #endif /* MLX5_DRIVER_H */
+diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
+index fd7d083a34d33..6d57e5ec9718d 100644
+--- a/include/linux/mlx5/mlx5_ifc.h
++++ b/include/linux/mlx5/mlx5_ifc.h
+@@ -1804,7 +1804,14 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
+ 	u8	   max_reformat_remove_size[0x8];
+ 	u8	   max_reformat_remove_offset[0x8];
+ 
+-	u8	   reserved_at_c0[0x740];
++	u8	   reserved_at_c0[0x160];
++
++	u8	   reserved_at_220[0x1];
++	u8	   sw_vhca_id_valid[0x1];
++	u8	   sw_vhca_id[0xe];
++	u8	   reserved_at_230[0x10];
++
++	u8	   reserved_at_240[0x5c0];
+ };
+ 
+ enum mlx5_ifc_flow_destination_type {
+@@ -3715,6 +3722,11 @@ struct mlx5_ifc_rmpc_bits {
+ 	struct mlx5_ifc_wq_bits wq;
+ };
+ 
++enum {
++	VHCA_ID_TYPE_HW = 0,
++	VHCA_ID_TYPE_SW = 1,
++};
++
+ struct mlx5_ifc_nic_vport_context_bits {
+ 	u8         reserved_at_0[0x5];
+ 	u8         min_wqe_inline_mode[0x3];
+@@ -3731,8 +3743,8 @@ struct mlx5_ifc_nic_vport_context_bits {
+ 	u8         event_on_mc_address_change[0x1];
+ 	u8         event_on_uc_address_change[0x1];
+ 
+-	u8         reserved_at_40[0xc];
+-
++	u8         vhca_id_type[0x1];
++	u8         reserved_at_41[0xb];
+ 	u8	   affiliation_criteria[0x4];
+ 	u8	   affiliated_vhca_id[0x10];
+ 
+@@ -7189,7 +7201,12 @@ struct mlx5_ifc_init_hca_in_bits {
+ 	u8         reserved_at_20[0x10];
+ 	u8         op_mod[0x10];
+ 
+-	u8         reserved_at_40[0x40];
++	u8         reserved_at_40[0x20];
++
++	u8         reserved_at_60[0x2];
++	u8         sw_vhca_id[0xe];
++	u8         reserved_at_70[0x10];
++
+ 	u8	   sw_owner_id[4][0x20];
+ };
+ 
+diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
+index cbdf0e2bc5ae0..d0fb74b0db1d5 100644
+--- a/net/bluetooth/mgmt.c
++++ b/net/bluetooth/mgmt.c
+@@ -4420,6 +4420,22 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev,
+ 			       MGMT_STATUS_NOT_SUPPORTED);
+ }
+ 
++static u32 get_params_flags(struct hci_dev *hdev,
++			    struct hci_conn_params *params)
++{
++	u32 flags = hdev->conn_flags;
++
++	/* Devices using RPAs can only be programmed in the acceptlist if
++	 * LL Privacy has been enable otherwise they cannot mark
++	 * HCI_CONN_FLAG_REMOTE_WAKEUP.
++	 */
++	if ((flags & HCI_CONN_FLAG_REMOTE_WAKEUP) && !use_ll_privacy(hdev) &&
++	    hci_find_irk_by_addr(hdev, &params->addr, params->addr_type))
++		flags &= ~HCI_CONN_FLAG_REMOTE_WAKEUP;
++
++	return flags;
++}
++
+ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
+ 			    u16 data_len)
+ {
+@@ -4451,10 +4467,10 @@ static int get_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
+ 	} else {
+ 		params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
+ 						le_addr_type(cp->addr.type));
+-
+ 		if (!params)
+ 			goto done;
+ 
++		supported_flags = get_params_flags(hdev, params);
+ 		current_flags = params->flags;
+ 	}
+ 
+@@ -4523,38 +4539,35 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
+ 			bt_dev_warn(hdev, "No such BR/EDR device %pMR (0x%x)",
+ 				    &cp->addr.bdaddr, cp->addr.type);
+ 		}
+-	} else {
+-		params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
+-						le_addr_type(cp->addr.type));
+-		if (params) {
+-			/* Devices using RPAs can only be programmed in the
+-			 * acceptlist LL Privacy has been enable otherwise they
+-			 * cannot mark HCI_CONN_FLAG_REMOTE_WAKEUP.
+-			 */
+-			if ((current_flags & HCI_CONN_FLAG_REMOTE_WAKEUP) &&
+-			    !use_ll_privacy(hdev) &&
+-			    hci_find_irk_by_addr(hdev, &params->addr,
+-						 params->addr_type)) {
+-				bt_dev_warn(hdev,
+-					    "Cannot set wakeable for RPA");
+-				goto unlock;
+-			}
+ 
+-			params->flags = current_flags;
+-			status = MGMT_STATUS_SUCCESS;
++		goto unlock;
++	}
+ 
+-			/* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY
+-			 * has been set.
+-			 */
+-			if (params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY)
+-				hci_update_passive_scan(hdev);
+-		} else {
+-			bt_dev_warn(hdev, "No such LE device %pMR (0x%x)",
+-				    &cp->addr.bdaddr,
+-				    le_addr_type(cp->addr.type));
+-		}
++	params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
++					le_addr_type(cp->addr.type));
++	if (!params) {
++		bt_dev_warn(hdev, "No such LE device %pMR (0x%x)",
++			    &cp->addr.bdaddr, le_addr_type(cp->addr.type));
++		goto unlock;
++	}
++
++	supported_flags = get_params_flags(hdev, params);
++
++	if ((supported_flags | current_flags) != supported_flags) {
++		bt_dev_warn(hdev, "Bad flag given (0x%x) vs supported (0x%0x)",
++			    current_flags, supported_flags);
++		goto unlock;
+ 	}
+ 
++	params->flags = current_flags;
++	status = MGMT_STATUS_SUCCESS;
++
++	/* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY
++	 * has been set.
++	 */
++	if (params->flags & HCI_CONN_FLAG_DEVICE_PRIVACY)
++		hci_update_passive_scan(hdev);
++
+ unlock:
+ 	hci_dev_unlock(hdev);
+ 
+diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c
+index eb204ad36eeec..846588c0070a5 100644
+--- a/net/dsa/tag_hellcreek.c
++++ b/net/dsa/tag_hellcreek.c
+@@ -45,7 +45,7 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb,
+ 
+ 	skb->dev = dsa_master_find_slave(dev, 0, port);
+ 	if (!skb->dev) {
+-		netdev_warn(dev, "Failed to get source port: %d\n", port);
++		netdev_warn_once(dev, "Failed to get source port: %d\n", port);
+ 		return NULL;
+ 	}
+ 
diff --git a/sys-kernel/pinephone-sources/files/Multi-Gen-LRU-Framework.patch b/sys-kernel/pinephone-sources/files/Multi-Gen-LRU-Framework.patch
deleted file mode 100644
index cf365f8..0000000
--- a/sys-kernel/pinephone-sources/files/Multi-Gen-LRU-Framework.patch
+++ /dev/null
@@ -1,8901 +0,0 @@
-From patchwork Wed Jul  6 22:00:10 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908719
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id 94C1DC43334
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:07:01 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id 3052B6B0072; Wed,  6 Jul 2022 18:07:01 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id 2B5426B0073; Wed,  6 Jul 2022 18:07:01 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id 1A27C6B0074; Wed,  6 Jul 2022 18:07:01 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0011.hostedemail.com
- [216.40.44.11])
-	by kanga.kvack.org (Postfix) with ESMTP id 0BC686B0072
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:07:01 -0400 (EDT)
-Received: from smtpin02.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay10.hostedemail.com (Postfix) with ESMTP id CA2056A6
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:07:00 +0000 (UTC)
-X-FDA: 79658061000.02.A339B96
-Received: from mail-io1-f74.google.com (mail-io1-f74.google.com
- [209.85.166.74])
-	by imf05.hostedemail.com (Postfix) with ESMTP id 4E8D2100017
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:07:00 +0000 (UTC)
-Received: by mail-io1-f74.google.com with SMTP id
- bw12-20020a056602398c00b00675895c2e24so8731074iob.19
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:07:00 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=qRI8PXbLcM+5CLpEdu5Szvo90bsJIGjJE2jS009gkGk=;
-        b=ZmyxY1Zw8XSvfRWkKAW+f4mUNqqtO18FFYBy2MotiZryXwyz9ItbUh9iu4txbliGWV
-         2zSpKFQCiNnOAlQ6EcsvQBLjKhLO02wKW9+/0P3DsfIXA4cNhb908dXECrznSmVA8Pnr
-         F13ODZZAGss1dN9dP7/zz2TweJvGgqjzlw8hpy3C9EXhkGdCEVfFUX5sYsFwHF6ph62j
-         YFYkt0yEeDGZ6BSKwot0UC5ZcUyd9AqPFg+XD4PWIlU21bbWaLA6eIQAr/1vyvoOUESY
-         RP+ZlS9AQ2JVmz3TDo8SyWa829c8OgLjNn28DmB38A4um5Ju0lB8q6j6sdVFGsj5iEvp
-         AFww==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=qRI8PXbLcM+5CLpEdu5Szvo90bsJIGjJE2jS009gkGk=;
-        b=AOUJsEidUKQJJt8SToVFGAPr3C12fTJoJElAAFE7KxYD410S32sDmau0kREt/LiYkv
-         dUGJshmCRHkkY36SzjWitE/xBZ1tllr3SMm51k1bORtnqWEcusXlr0UtsGdinQK9ILBh
-         K59jgkQzsGwJHEpe+Ll9kADhZ7o8oLcSMLrrgD3Fnx7oEtfuAHMKEcBlBkSgMPppXUTd
-         ulnN01wxGMVZTYZ/yA1sJwjTalfTQgS1jWfhZp1m3A1fJTF4eeWoX2ceAvv5rGOLAiJM
-         SW/ePavunPzzOpIPmO+dbtG0EWRHGTxBR65eiy6ov/69KgUE7bGwub0yYiOXQKWohY5Z
-         W3FQ==
-X-Gm-Message-State: AJIora8WGdQPm0mObzFDWcXDHpPyK5u2HGFLGwaXGZ5Jsb3uO01h6wcU
-	F87blpjQgVwnSA0QtoTcFD+utCSKlvM=
-X-Google-Smtp-Source: 
- AGRyM1sDSDGcRnDMwsrjVWQBGg0nLWGv8i6pEMJ10oDW9TLUnp/+bD9AQNZ2+emMVfEkLEmXVjNXufDKAmU=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a05:6638:14cd:b0:33c:c00e:898d with SMTP id
- l13-20020a05663814cd00b0033cc00e898dmr26357876jak.143.1657144852078; Wed, 06
- Jul 2022 15:00:52 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:10 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-2-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 01/14] mm: x86, arm64: add arch_has_hw_pte_young()
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Barry Song <baohua@kernel.org>, Brian Geffon <bgeffon@google.com>,
-  Jan Alexander Steffens <heftig@archlinux.org>,
- Oleksandr Natalenko <oleksandr@natalenko.name>,
-  Steven Barrett <steven@liquorix.net>,
- Suleiman Souhlal <suleiman@google.com>,  Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657145220; a=rsa-sha256;
-	cv=none;
-	b=VumvfKCjx5tf93BL/O1DNNiONuUUMaZYR4iOhULdFR4P8YOdhpBtrpKwBsHGR4wUqyMcvI
-	ToLran37owHd2V3ShTKRPwSH8VjFvggnLlLoA19COIyGitTG9II71uvoVW/BX9CNy0fyvU
-	cjZkFbkAV2gw14xwh4oA0dBJXiv4wcs=
-ARC-Authentication-Results: i=1;
-	imf05.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=ZmyxY1Zw;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf05.hostedemail.com: domain of
- 3FAbGYgYKCFULHM4xB3BB381.zB985AHK-997Ixz7.BE3@flex--yuzhao.bounces.google.com
- designates 209.85.166.74 as permitted sender)
- smtp.mailfrom=3FAbGYgYKCFULHM4xB3BB381.zB985AHK-997Ixz7.BE3@flex--yuzhao.bounces.google.com
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657145220;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=qRI8PXbLcM+5CLpEdu5Szvo90bsJIGjJE2jS009gkGk=;
-	b=qXdH1Ee5JE3ufkBF1syfLTJ4Hf4+XbhNy8Ep7CdbOWtn0impShoppleSgAJd0DjZcGtBPd
-	BrCXlkc1QnMUpwyPi5WEIjJZZLPAAkBIhwltXoG15zc7F1kIblfi2GpbrcQSpycZKhMp2a
-	awra7JeixwgTaauxTH0OVnzltL0UkbU=
-X-Stat-Signature: y7hoskbhfp1nq6ugnwo8zwjg458t1yhh
-X-Rspam-User: 
-X-Rspamd-Server: rspam12
-X-Rspamd-Queue-Id: 4E8D2100017
-Authentication-Results: imf05.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=ZmyxY1Zw;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf05.hostedemail.com: domain of
- 3FAbGYgYKCFULHM4xB3BB381.zB985AHK-997Ixz7.BE3@flex--yuzhao.bounces.google.com
- designates 209.85.166.74 as permitted sender)
- smtp.mailfrom=3FAbGYgYKCFULHM4xB3BB381.zB985AHK-997Ixz7.BE3@flex--yuzhao.bounces.google.com
-X-HE-Tag: 1657145220-618745
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-Some architectures automatically set the accessed bit in PTEs, e.g.,
-x86 and arm64 v8.2. On architectures that do not have this capability,
-clearing the accessed bit in a PTE usually triggers a page fault
-following the TLB miss of this PTE (to emulate the accessed bit).
-
-Being aware of this capability can help make better decisions, e.g.,
-whether to spread the work out over a period of time to reduce bursty
-page faults when trying to clear the accessed bit in many PTEs.
-
-Note that theoretically this capability can be unreliable, e.g.,
-hotplugged CPUs might be different from builtin ones. Therefore it
-should not be used in architecture-independent code that involves
-correctness, e.g., to determine whether TLB flushes are required (in
-combination with the accessed bit).
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Reviewed-by: Barry Song <baohua@kernel.org>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Acked-by: Will Deacon <will@kernel.org>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
----
- arch/arm64/include/asm/pgtable.h | 15 ++-------------
- arch/x86/include/asm/pgtable.h   |  6 +++---
- include/linux/pgtable.h          | 13 +++++++++++++
- mm/memory.c                      | 14 +-------------
- 4 files changed, 19 insertions(+), 29 deletions(-)
-
-diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
-index 0b6632f18364..c46399c0500c 100644
---- a/arch/arm64/include/asm/pgtable.h
-+++ b/arch/arm64/include/asm/pgtable.h
-@@ -1066,24 +1066,13 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
-  * page after fork() + CoW for pfn mappings. We don't always have a
-  * hardware-managed access flag on arm64.
-  */
--static inline bool arch_faults_on_old_pte(void)
--{
--	/* The register read below requires a stable CPU to make any sense */
--	cant_migrate();
--
--	return !cpu_has_hw_af();
--}
--#define arch_faults_on_old_pte		arch_faults_on_old_pte
-+#define arch_has_hw_pte_young		cpu_has_hw_af
- 
- /*
-  * Experimentally, it's cheap to set the access flag in hardware and we
-  * benefit from prefaulting mappings as 'old' to start with.
-  */
--static inline bool arch_wants_old_prefaulted_pte(void)
--{
--	return !arch_faults_on_old_pte();
--}
--#define arch_wants_old_prefaulted_pte	arch_wants_old_prefaulted_pte
-+#define arch_wants_old_prefaulted_pte	cpu_has_hw_af
- 
- static inline bool pud_sect_supported(void)
- {
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index 44e2d6f1dbaa..dc5f7d8ef68a 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -1431,10 +1431,10 @@ static inline bool arch_has_pfn_modify_check(void)
- 	return boot_cpu_has_bug(X86_BUG_L1TF);
- }
- 
--#define arch_faults_on_old_pte arch_faults_on_old_pte
--static inline bool arch_faults_on_old_pte(void)
-+#define arch_has_hw_pte_young arch_has_hw_pte_young
-+static inline bool arch_has_hw_pte_young(void)
- {
--	return false;
-+	return true;
- }
- 
- #ifdef CONFIG_PAGE_TABLE_CHECK
-diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
-index 3cdc16cfd867..8eee31bc9bde 100644
---- a/include/linux/pgtable.h
-+++ b/include/linux/pgtable.h
-@@ -260,6 +260,19 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma,
- #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
- #endif
- 
-+#ifndef arch_has_hw_pte_young
-+/*
-+ * Return whether the accessed bit is supported on the local CPU.
-+ *
-+ * This stub assumes accessing through an old PTE triggers a page fault.
-+ * Architectures that automatically set the access bit should overwrite it.
-+ */
-+static inline bool arch_has_hw_pte_young(void)
-+{
-+	return false;
-+}
-+#endif
-+
- #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
- static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
- 				       unsigned long address,
-diff --git a/mm/memory.c b/mm/memory.c
-index 7a089145cad4..49500390b91b 100644
---- a/mm/memory.c
-+++ b/mm/memory.c
-@@ -125,18 +125,6 @@ int randomize_va_space __read_mostly =
- 					2;
- #endif
- 
--#ifndef arch_faults_on_old_pte
--static inline bool arch_faults_on_old_pte(void)
--{
--	/*
--	 * Those arches which don't have hw access flag feature need to
--	 * implement their own helper. By default, "true" means pagefault
--	 * will be hit on old pte.
--	 */
--	return true;
--}
--#endif
--
- #ifndef arch_wants_old_prefaulted_pte
- static inline bool arch_wants_old_prefaulted_pte(void)
- {
-@@ -2862,7 +2850,7 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
- 	 * On architectures with software "accessed" bits, we would
- 	 * take a double page fault, so mark it accessed here.
- 	 */
--	if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) {
-+	if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) {
- 		pte_t entry;
- 
- 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
-
-From patchwork Wed Jul  6 22:00:11 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908700
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id CAC87C433EF
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:00:56 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id 154F66B0073; Wed,  6 Jul 2022 18:00:56 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id 0B6EE6B0074; Wed,  6 Jul 2022 18:00:56 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id E729E6B0075; Wed,  6 Jul 2022 18:00:55 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0016.hostedemail.com
- [216.40.44.16])
-	by kanga.kvack.org (Postfix) with ESMTP id D9E9C6B0073
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:00:55 -0400 (EDT)
-Received: from smtpin22.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay06.hostedemail.com (Postfix) with ESMTP id 9F16534610
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:00:55 +0000 (UTC)
-X-FDA: 79658045670.22.8A546BF
-Received: from mail-yb1-f201.google.com (mail-yb1-f201.google.com
- [209.85.219.201])
-	by imf12.hostedemail.com (Postfix) with ESMTP id 0D92C40033
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:00:53 +0000 (UTC)
-Received: by mail-yb1-f201.google.com with SMTP id
- m68-20020a253f47000000b006683bd91962so12445167yba.0
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:00:53 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=Yske1o/9q5kvxCT6Do7fK+m0Z7RMKAIlwEA5dqQMV6o=;
-        b=ScyLwm63xSUVYY78eVpIKf7E4l6uHPJ8SKqWyYLqNgfcQS9rJpZhYXa+GvIYC8VFxz
-         2VFStSncvwevlF5a8SeHX4Xsz1oxV5uuYYiB5ijS1hgFnqmnWUZ92SAkit2dsdOrKkVm
-         doRskpr19skWYdTit7iDaFWDHSkEjmp1FnyOwnhb4K1iob0FZUGliEmOjr11tQKlaxMl
-         A7gk8PUbqgtBAB5FxJW674j5ErsQXUNEF0mV9mDiI18iHiW2zTe0Jvp4coFt/YGkO03P
-         +mGZgU80OTVBNdIcmd9CUSdknj31pHlFfc27NA1Hoqf7YpOu3eL0SW+Jp946t/R7w6FH
-         wLdA==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=Yske1o/9q5kvxCT6Do7fK+m0Z7RMKAIlwEA5dqQMV6o=;
-        b=XVonL9oPc/dGS0Lj1bwrmAxlSwptN2oDguArTP7th8VxXdXwHpn2oGKmURoazynnWW
-         GaxAg33Dr4knllzhF6wCdcowLA++/AgQbdQfMwZEbDkgdPMiKz+9twLafdDp2twVELPc
-         mZFyE0neVCe8OAOes5N5stgxrIPJyGN+cmejA7EFYbUXD5yKaVHVWEbZ1DKvs+vkVfYH
-         4I7Mc++TN9sTNUODcCZv7eNmy5ddfKdhs8ZEqmBzkQQl+6Nyi2IUxEa3YeftDVQx6pqJ
-         7oPE/pTcwcjKcRm4Bn+MZj1FE7of9UClcR0Wd4ZoxSRmvPtCnOiV9G4yZyDcGkpaBUWh
-         +quA==
-X-Gm-Message-State: AJIora81vFHYbn7du42CA/tgPDfWrrUd4KS2ldCuwHG08ccHdOGjEtup
-	i9hDsoVZxoG9FtGAgLFxbBinusC9kmE=
-X-Google-Smtp-Source: 
- AGRyM1uPiyB7rEJDdAsYAVesh6XcxF7m4/NOwgKHx35NtLh0WZv9A8PqKLV1Gu8X5xOooB/DS/0V1C1QSZU=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a25:390:0:b0:66e:b9c7:b46c with SMTP id
- 138-20020a250390000000b0066eb9c7b46cmr172371ybd.505.1657144853349; Wed, 06
- Jul 2022 15:00:53 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:11 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-3-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 02/14] mm: x86: add CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Barry Song <baohua@kernel.org>, Brian Geffon <bgeffon@google.com>,
-  Jan Alexander Steffens <heftig@archlinux.org>,
- Oleksandr Natalenko <oleksandr@natalenko.name>,
-  Steven Barrett <steven@liquorix.net>,
- Suleiman Souhlal <suleiman@google.com>,  Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657144854; a=rsa-sha256;
-	cv=none;
-	b=ba2dVYmnfxk8QTCdMBgkSodQNf9QrUzHc+vrIrNl2fGKaUM0VC5LUOkOS0Uam92Z/fgIw+
-	J3iBf4wOdpf9YxVZLpvnO/CvPz7LzU7dbaCIsHjkTYZyjSGj5b5H8veJBlUQe2PyEhqktl
-	KdZlmcrPxuSkAHBseFs2D8j/Mhzx2nw=
-ARC-Authentication-Results: i=1;
-	imf12.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=ScyLwm63;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf12.hostedemail.com: domain of
- 3FQbGYgYKCFYMIN5yC4CC492.0CA96BIL-AA8Jy08.CF4@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3FQbGYgYKCFYMIN5yC4CC492.0CA96BIL-AA8Jy08.CF4@flex--yuzhao.bounces.google.com
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657144854;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=Yske1o/9q5kvxCT6Do7fK+m0Z7RMKAIlwEA5dqQMV6o=;
-	b=TMdhE0VPqYoVfu/UiVKCUJJu+4spbPZ1vrUaXbX8Pa9OP/6dtZAs1KcJMK2kLgdUvhO8E9
-	UX8x+Y/myW5EAlPfC2BrKGzFE9TjBUVGeJYESpLIZg2lf658PqCu5GAkoM5vFZRBG80nvF
-	ObTkOxqzk+IkvR3PYDfWaVzYP1bgmkU=
-Authentication-Results: imf12.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=ScyLwm63;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf12.hostedemail.com: domain of
- 3FQbGYgYKCFYMIN5yC4CC492.0CA96BIL-AA8Jy08.CF4@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3FQbGYgYKCFYMIN5yC4CC492.0CA96BIL-AA8Jy08.CF4@flex--yuzhao.bounces.google.com
-X-Stat-Signature: u9s859meeaiiqe5mxswoozqtkc4fepwx
-X-Rspamd-Queue-Id: 0D92C40033
-X-Rspamd-Server: rspam05
-X-Rspam-User: 
-X-HE-Tag: 1657144853-88353
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-Some architectures support the accessed bit in non-leaf PMD entries,
-e.g., x86 sets the accessed bit in a non-leaf PMD entry when using it
-as part of linear address translation [1]. Page table walkers that
-clear the accessed bit may use this capability to reduce their search
-space.
-
-Note that:
-1. Although an inline function is preferable, this capability is added
-   as a configuration option for consistency with the existing macros.
-2. Due to the little interest in other varieties, this capability was
-   only tested on Intel and AMD CPUs.
-
-Thanks to the following developers for their efforts [2][3].
-  Randy Dunlap <rdunlap@infradead.org>
-  Stephen Rothwell <sfr@canb.auug.org.au>
-
-[1]: Intel 64 and IA-32 Architectures Software Developer's Manual
-     Volume 3 (June 2021), section 4.8
-[2] https://lore.kernel.org/r/bfdcc7c8-922f-61a9-aa15-7e7250f04af7@infradead.org/
-[3] https://lore.kernel.org/r/20220413151513.5a0d7a7e@canb.auug.org.au/
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Reviewed-by: Barry Song <baohua@kernel.org>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
----
- arch/Kconfig                   | 8 ++++++++
- arch/x86/Kconfig               | 1 +
- arch/x86/include/asm/pgtable.h | 3 ++-
- arch/x86/mm/pgtable.c          | 5 ++++-
- include/linux/pgtable.h        | 4 ++--
- 5 files changed, 17 insertions(+), 4 deletions(-)
-
-diff --git a/arch/Kconfig b/arch/Kconfig
-index fcf9a41a4ef5..eaeec187bd6a 100644
---- a/arch/Kconfig
-+++ b/arch/Kconfig
-@@ -1403,6 +1403,14 @@ config DYNAMIC_SIGFRAME
- config HAVE_ARCH_NODE_DEV_GROUP
- 	bool
- 
-+config ARCH_HAS_NONLEAF_PMD_YOUNG
-+	bool
-+	help
-+	  Architectures that select this option are capable of setting the
-+	  accessed bit in non-leaf PMD entries when using them as part of linear
-+	  address translations. Page table walkers that clear the accessed bit
-+	  may use this capability to reduce their search space.
-+
- source "kernel/gcov/Kconfig"
- 
- source "scripts/gcc-plugins/Kconfig"
-diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
-index be0b95e51df6..5715111abe13 100644
---- a/arch/x86/Kconfig
-+++ b/arch/x86/Kconfig
-@@ -85,6 +85,7 @@ config X86
- 	select ARCH_HAS_PMEM_API		if X86_64
- 	select ARCH_HAS_PTE_DEVMAP		if X86_64
- 	select ARCH_HAS_PTE_SPECIAL
-+	select ARCH_HAS_NONLEAF_PMD_YOUNG	if PGTABLE_LEVELS > 2
- 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
- 	select ARCH_HAS_COPY_MC			if X86_64
- 	select ARCH_HAS_SET_MEMORY
-diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
-index dc5f7d8ef68a..5059799bebe3 100644
---- a/arch/x86/include/asm/pgtable.h
-+++ b/arch/x86/include/asm/pgtable.h
-@@ -815,7 +815,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
- 
- static inline int pmd_bad(pmd_t pmd)
- {
--	return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
-+	return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) !=
-+	       (_KERNPG_TABLE & ~_PAGE_ACCESSED);
- }
- 
- static inline unsigned long pages_to_mb(unsigned long npg)
-diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
-index a932d7712d85..8525f2876fb4 100644
---- a/arch/x86/mm/pgtable.c
-+++ b/arch/x86/mm/pgtable.c
-@@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
- 	return ret;
- }
- 
--#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
- int pmdp_test_and_clear_young(struct vm_area_struct *vma,
- 			      unsigned long addr, pmd_t *pmdp)
- {
-@@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
- 
- 	return ret;
- }
-+#endif
-+
-+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- int pudp_test_and_clear_young(struct vm_area_struct *vma,
- 			      unsigned long addr, pud_t *pudp)
- {
-diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
-index 8eee31bc9bde..9c57c5cc49c2 100644
---- a/include/linux/pgtable.h
-+++ b/include/linux/pgtable.h
-@@ -213,7 +213,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
- #endif
- 
- #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
--#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
- static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
- 					    unsigned long address,
- 					    pmd_t *pmdp)
-@@ -234,7 +234,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
- 	BUILD_BUG();
- 	return 0;
- }
--#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */
- #endif
- 
- #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-
-From patchwork Wed Jul  6 22:00:12 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908701
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id 05277CCA481
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:00:57 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id 7E4186B0074; Wed,  6 Jul 2022 18:00:57 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id 76ECC6B0075; Wed,  6 Jul 2022 18:00:57 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id 524486B0078; Wed,  6 Jul 2022 18:00:57 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0011.hostedemail.com
- [216.40.44.11])
-	by kanga.kvack.org (Postfix) with ESMTP id 3E4B36B0074
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:00:57 -0400 (EDT)
-Received: from smtpin31.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay06.hostedemail.com (Postfix) with ESMTP id 0F88934416
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:00:57 +0000 (UTC)
-X-FDA: 79658045754.31.374F01B
-Received: from mail-yw1-f201.google.com (mail-yw1-f201.google.com
- [209.85.128.201])
-	by imf23.hostedemail.com (Postfix) with ESMTP id 7B5CF140071
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:00:55 +0000 (UTC)
-Received: by mail-yw1-f201.google.com with SMTP id
- 00721157ae682-31c9a49a1a8so63943167b3.9
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:00:55 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=xRTO2a+j5NrVJKtKeScRWRKKBSTrjcMS5t6hiKMzj7E=;
-        b=jaYbReZJ8uDDLbii1xwhzvdsu6n9p9fFeOoX3rMWV4HRFwikqu+fxkANqP9J1hGdR2
-         NeJtlffRYWnnwdndS5aG1Db183fv4nEfSDNZk5Aw1GhS0DDV+irZrJ4sR+RBQ0mlRL0F
-         PCWg0VVitxpZ5yzJzYAkEO4uHOjww0Tjni9prrUmk4iDUdAeuQHZsQYSGRbR+cGm4i8w
-         k7/vbxWbkPS/YQ/tq51SCEZjr+bTsFRcUYhsaDMMVhgqvpvMmhh84viZjp9G9W/MZCVp
-         lhJy7B/1ym1XZ9aYTn0gi9sgQDfh0ksvuw/1a5ib9CO1DG9/pvF0LoK/EKm8nNJ/pZyy
-         kAfA==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=xRTO2a+j5NrVJKtKeScRWRKKBSTrjcMS5t6hiKMzj7E=;
-        b=idSd06MTounv54fIacoxrI0eQJJSRslStkggX8APKQWu6w4H6jnTCQKfjZteTt+gtC
-         gAOrWhpfQgtNIatPBQliE6gNhZivKVIon5hkSCuin7ffPXxx98Zr4xXmYyyQFey9eZEP
-         bYnELAeg+MpzknUvWf0bHlIZA6PwqWxz1eWAYDCV4dPEApBGDNhC+aYkMFf9EqdJdmDs
-         9FEeSbXKN25LUaLiOpKh0js2kdqX4Aukk0uqlH6XX+ZNfNNe0hkK/+OhTievM+jCt5TK
-         p/+QW8aqDk5Qbxin3B3Tq7oWWNyUaM/L/r7o7nuPHq1VWWbGPqj0YyKzEZLz9+OsyhJD
-         mEbg==
-X-Gm-Message-State: AJIora8tXy61OEQgPe+vKTfstDk3naLrxXDSbXHWqk7IopwvdFEH4Cef
-	nkdLlvLaW7zOebfRuKKHBEK+WcRl9M8=
-X-Google-Smtp-Source: 
- AGRyM1slRLC7YoaOAJX582u+iAc9V/TxbhI1Hoxnov8FBUQD9MbHWxCLjXxj9TQ7JPp2TjaJTr9hNbuL1Ok=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a05:6902:1206:b0:66e:6e93:366c with SMTP id
- s6-20020a056902120600b0066e6e93366cmr11968955ybu.59.1657144854840; Wed, 06
- Jul 2022 15:00:54 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:12 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-4-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 03/14] mm/vmscan.c: refactor shrink_node()
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Barry Song <baohua@kernel.org>, Miaohe Lin <linmiaohe@huawei.com>,
-  Brian Geffon <bgeffon@google.com>,
- Jan Alexander Steffens <heftig@archlinux.org>,
-  Oleksandr Natalenko <oleksandr@natalenko.name>,
- Steven Barrett <steven@liquorix.net>,
-  Suleiman Souhlal <suleiman@google.com>, Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657144855; a=rsa-sha256;
-	cv=none;
-	b=C5Z9j3O02mimmj7Fw9J8fsWNjRE+Y/gPxdz9C+kmFqM/2BgzBkrX0GAW0hgI7dHEM8924m
-	HqfJJSqt0XV/+xhpN2q4jK0T+02nB4EEXRK//o2vKS+/FvUwh/ucVjQrLyDiacFK43TXI6
-	NfK2zkhL3Ol0W61EEn2HSK05MjQkolc=
-ARC-Authentication-Results: i=1;
-	imf23.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=jaYbReZJ;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf23.hostedemail.com: domain of
- 3FgbGYgYKCFcNJO6zD5DD5A3.1DBA7CJM-BB9Kz19.DG5@flex--yuzhao.bounces.google.com
- designates 209.85.128.201 as permitted sender)
- smtp.mailfrom=3FgbGYgYKCFcNJO6zD5DD5A3.1DBA7CJM-BB9Kz19.DG5@flex--yuzhao.bounces.google.com
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657144855;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=xRTO2a+j5NrVJKtKeScRWRKKBSTrjcMS5t6hiKMzj7E=;
-	b=8oACpQ7ksfTXalrWOXw+va/sQyeVexNkPm99hYfcs2rccyQJRt+TczrAEF/1Dx80ZM4U1c
-	tQU/+fYGnG8sEjmePqrrHye0U6E7JvxV6YqmuCDVUUaIEEgYqMC0KfEir3FNalMA6JhauV
-	vCylGdyHJmqBqvDVl9PD0HpFhXgtW3U=
-Authentication-Results: imf23.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=jaYbReZJ;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf23.hostedemail.com: domain of
- 3FgbGYgYKCFcNJO6zD5DD5A3.1DBA7CJM-BB9Kz19.DG5@flex--yuzhao.bounces.google.com
- designates 209.85.128.201 as permitted sender)
- smtp.mailfrom=3FgbGYgYKCFcNJO6zD5DD5A3.1DBA7CJM-BB9Kz19.DG5@flex--yuzhao.bounces.google.com
-X-Stat-Signature: ogjqd7prxk1deq49x3znr781rgid8amh
-X-Rspamd-Queue-Id: 7B5CF140071
-X-Rspamd-Server: rspam05
-X-Rspam-User: 
-X-HE-Tag: 1657144855-543148
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-This patch refactors shrink_node() to improve readability for the
-upcoming changes to mm/vmscan.c.
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Reviewed-by: Barry Song <baohua@kernel.org>
-Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
----
- mm/vmscan.c | 198 +++++++++++++++++++++++++++-------------------------
- 1 file changed, 104 insertions(+), 94 deletions(-)
-
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index f7d9a683e3a7..fddb9bd3c6c2 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -2670,6 +2670,109 @@ enum scan_balance {
- 	SCAN_FILE,
- };
- 
-+static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
-+{
-+	unsigned long file;
-+	struct lruvec *target_lruvec;
-+
-+	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
-+
-+	/*
-+	 * Flush the memory cgroup stats, so that we read accurate per-memcg
-+	 * lruvec stats for heuristics.
-+	 */
-+	mem_cgroup_flush_stats();
-+
-+	/*
-+	 * Determine the scan balance between anon and file LRUs.
-+	 */
-+	spin_lock_irq(&target_lruvec->lru_lock);
-+	sc->anon_cost = target_lruvec->anon_cost;
-+	sc->file_cost = target_lruvec->file_cost;
-+	spin_unlock_irq(&target_lruvec->lru_lock);
-+
-+	/*
-+	 * Target desirable inactive:active list ratios for the anon
-+	 * and file LRU lists.
-+	 */
-+	if (!sc->force_deactivate) {
-+		unsigned long refaults;
-+
-+		refaults = lruvec_page_state(target_lruvec,
-+				WORKINGSET_ACTIVATE_ANON);
-+		if (refaults != target_lruvec->refaults[0] ||
-+			inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
-+			sc->may_deactivate |= DEACTIVATE_ANON;
-+		else
-+			sc->may_deactivate &= ~DEACTIVATE_ANON;
-+
-+		/*
-+		 * When refaults are being observed, it means a new
-+		 * workingset is being established. Deactivate to get
-+		 * rid of any stale active pages quickly.
-+		 */
-+		refaults = lruvec_page_state(target_lruvec,
-+				WORKINGSET_ACTIVATE_FILE);
-+		if (refaults != target_lruvec->refaults[1] ||
-+		    inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
-+			sc->may_deactivate |= DEACTIVATE_FILE;
-+		else
-+			sc->may_deactivate &= ~DEACTIVATE_FILE;
-+	} else
-+		sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
-+
-+	/*
-+	 * If we have plenty of inactive file pages that aren't
-+	 * thrashing, try to reclaim those first before touching
-+	 * anonymous pages.
-+	 */
-+	file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
-+	if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
-+		sc->cache_trim_mode = 1;
-+	else
-+		sc->cache_trim_mode = 0;
-+
-+	/*
-+	 * Prevent the reclaimer from falling into the cache trap: as
-+	 * cache pages start out inactive, every cache fault will tip
-+	 * the scan balance towards the file LRU.  And as the file LRU
-+	 * shrinks, so does the window for rotation from references.
-+	 * This means we have a runaway feedback loop where a tiny
-+	 * thrashing file LRU becomes infinitely more attractive than
-+	 * anon pages.  Try to detect this based on file LRU size.
-+	 */
-+	if (!cgroup_reclaim(sc)) {
-+		unsigned long total_high_wmark = 0;
-+		unsigned long free, anon;
-+		int z;
-+
-+		free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
-+		file = node_page_state(pgdat, NR_ACTIVE_FILE) +
-+			   node_page_state(pgdat, NR_INACTIVE_FILE);
-+
-+		for (z = 0; z < MAX_NR_ZONES; z++) {
-+			struct zone *zone = &pgdat->node_zones[z];
-+
-+			if (!managed_zone(zone))
-+				continue;
-+
-+			total_high_wmark += high_wmark_pages(zone);
-+		}
-+
-+		/*
-+		 * Consider anon: if that's low too, this isn't a
-+		 * runaway file reclaim problem, but rather just
-+		 * extreme pressure. Reclaim as per usual then.
-+		 */
-+		anon = node_page_state(pgdat, NR_INACTIVE_ANON);
-+
-+		sc->file_is_tiny =
-+			file + free <= total_high_wmark &&
-+			!(sc->may_deactivate & DEACTIVATE_ANON) &&
-+			anon >> sc->priority;
-+	}
-+}
-+
- /*
-  * Determine how aggressively the anon and file LRU lists should be
-  * scanned.
-@@ -3138,109 +3241,16 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
- 	unsigned long nr_reclaimed, nr_scanned;
- 	struct lruvec *target_lruvec;
- 	bool reclaimable = false;
--	unsigned long file;
- 
- 	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
- 
- again:
--	/*
--	 * Flush the memory cgroup stats, so that we read accurate per-memcg
--	 * lruvec stats for heuristics.
--	 */
--	mem_cgroup_flush_stats();
--
- 	memset(&sc->nr, 0, sizeof(sc->nr));
- 
- 	nr_reclaimed = sc->nr_reclaimed;
- 	nr_scanned = sc->nr_scanned;
- 
--	/*
--	 * Determine the scan balance between anon and file LRUs.
--	 */
--	spin_lock_irq(&target_lruvec->lru_lock);
--	sc->anon_cost = target_lruvec->anon_cost;
--	sc->file_cost = target_lruvec->file_cost;
--	spin_unlock_irq(&target_lruvec->lru_lock);
--
--	/*
--	 * Target desirable inactive:active list ratios for the anon
--	 * and file LRU lists.
--	 */
--	if (!sc->force_deactivate) {
--		unsigned long refaults;
--
--		refaults = lruvec_page_state(target_lruvec,
--				WORKINGSET_ACTIVATE_ANON);
--		if (refaults != target_lruvec->refaults[0] ||
--			inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
--			sc->may_deactivate |= DEACTIVATE_ANON;
--		else
--			sc->may_deactivate &= ~DEACTIVATE_ANON;
--
--		/*
--		 * When refaults are being observed, it means a new
--		 * workingset is being established. Deactivate to get
--		 * rid of any stale active pages quickly.
--		 */
--		refaults = lruvec_page_state(target_lruvec,
--				WORKINGSET_ACTIVATE_FILE);
--		if (refaults != target_lruvec->refaults[1] ||
--		    inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
--			sc->may_deactivate |= DEACTIVATE_FILE;
--		else
--			sc->may_deactivate &= ~DEACTIVATE_FILE;
--	} else
--		sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
--
--	/*
--	 * If we have plenty of inactive file pages that aren't
--	 * thrashing, try to reclaim those first before touching
--	 * anonymous pages.
--	 */
--	file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
--	if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
--		sc->cache_trim_mode = 1;
--	else
--		sc->cache_trim_mode = 0;
--
--	/*
--	 * Prevent the reclaimer from falling into the cache trap: as
--	 * cache pages start out inactive, every cache fault will tip
--	 * the scan balance towards the file LRU.  And as the file LRU
--	 * shrinks, so does the window for rotation from references.
--	 * This means we have a runaway feedback loop where a tiny
--	 * thrashing file LRU becomes infinitely more attractive than
--	 * anon pages.  Try to detect this based on file LRU size.
--	 */
--	if (!cgroup_reclaim(sc)) {
--		unsigned long total_high_wmark = 0;
--		unsigned long free, anon;
--		int z;
--
--		free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
--		file = node_page_state(pgdat, NR_ACTIVE_FILE) +
--			   node_page_state(pgdat, NR_INACTIVE_FILE);
--
--		for (z = 0; z < MAX_NR_ZONES; z++) {
--			struct zone *zone = &pgdat->node_zones[z];
--			if (!managed_zone(zone))
--				continue;
--
--			total_high_wmark += high_wmark_pages(zone);
--		}
--
--		/*
--		 * Consider anon: if that's low too, this isn't a
--		 * runaway file reclaim problem, but rather just
--		 * extreme pressure. Reclaim as per usual then.
--		 */
--		anon = node_page_state(pgdat, NR_INACTIVE_ANON);
--
--		sc->file_is_tiny =
--			file + free <= total_high_wmark &&
--			!(sc->may_deactivate & DEACTIVATE_ANON) &&
--			anon >> sc->priority;
--	}
-+	prepare_scan_count(pgdat, sc);
- 
- 	shrink_node_memcgs(pgdat, sc);
- 
-
-From patchwork Wed Jul  6 22:00:13 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908702
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id E07C1C43334
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:00:59 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id E767D6B0075; Wed,  6 Jul 2022 18:00:57 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id E26676B0078; Wed,  6 Jul 2022 18:00:57 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id CC6F16B007B; Wed,  6 Jul 2022 18:00:57 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0011.hostedemail.com
- [216.40.44.11])
-	by kanga.kvack.org (Postfix) with ESMTP id B2AAC6B0075
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:00:57 -0400 (EDT)
-Received: from smtpin09.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay11.hostedemail.com (Postfix) with ESMTP id 94CBE80B22
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:00:57 +0000 (UTC)
-X-FDA: 79658045754.09.169939E
-Received: from mail-yb1-f201.google.com (mail-yb1-f201.google.com
- [209.85.219.201])
-	by imf07.hostedemail.com (Postfix) with ESMTP id 3DA0B4002F
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:00:57 +0000 (UTC)
-Received: by mail-yb1-f201.google.com with SMTP id
- l6-20020a25bf86000000b00668c915a3f2so12477298ybk.4
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:00:56 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=GyLn4alIs4ulph3wPn6kG/6c2qN7BZlAW/LU7V/wtB8=;
-        b=fqvYzFJd6diatCK9xOi35jB4AbI1jOxd0dc3zbIWXBRd/oZCSL2ChL+LrZ+NDYE03d
-         TIPGwoUneWvzbc4OXeOfpb0FtGxmdhwy/nlPnMgq+BH+J79K/39lDuK/WznYk1HI+hzN
-         zL7bsRal3Q8YUC5jRMId0XoVcP/vuEU/M54E4rAJ15EBntL/F6yfHEySvrSBBtWZhnt0
-         90gyXGuo//w+Jc0ez+vgTHQxHk3TDIFEvyNKpltir9acA6/j0jGHYEfhC/r1UrED+Tt8
-         m1PcqYkXSdSfGsO4GbojXKICNGmqT0/82l34NKy0jmCO9o+gJUnrEIDeiTyPT8jYjdXn
-         eGJQ==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=GyLn4alIs4ulph3wPn6kG/6c2qN7BZlAW/LU7V/wtB8=;
-        b=acqAzpv5+5bfyJLdmlUD3HKE74OO89v1YBTzb7kB54GormxgfqmSVm1QB75bSQFjxy
-         iRQ9yyQgUru50WX+ppP3B5N0K0edy9kKYXC5SmAf9PX7QbH0T3UYfJi5KDO5H7cptgny
-         6VYcZXgdQ/ammtN/NFjOwImJ1NyoXMVSMkwXMJFoH/hDV1+/EGHNkG40d3ui2cNlIGer
-         8oJrkMcYwG1L0yl5Lv1F9vCPZCaUecfxMZFvc0McrXP6BtB3ww3KPyipUsx80uRBu4PN
-         j9j1OZPr59Q+bLCF+TDiL77E2CPhZvzXxkw0VDq91eoHiQQwNRK+035yqGfK0i2derto
-         4qrg==
-X-Gm-Message-State: AJIora8TvaxLFo7c8jNCAkmO2MPTlGSpywySY2xnOvzKMd8WGYtTu0Lg
-	H/Iowy+yks3pdn4k6EN6JEFx/cAzaSo=
-X-Google-Smtp-Source: 
- AGRyM1vs1GQqyrE7d9mjEL5MTBaBKoftkODnUWU3nZauu0DiGFKm6nQgOePB+L8kJ6BOwhlufj40Jzp1R4k=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a25:d043:0:b0:66e:31d6:4606 with SMTP id
- h64-20020a25d043000000b0066e31d64606mr25539292ybg.241.1657144856519; Wed, 06
- Jul 2022 15:00:56 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:13 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-5-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 04/14] Revert "include/linux/mm_inline.h: fold
- __update_lru_size() into its sole caller"
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Miaohe Lin <linmiaohe@huawei.com>, Brian Geffon <bgeffon@google.com>,
-  Jan Alexander Steffens <heftig@archlinux.org>,
- Oleksandr Natalenko <oleksandr@natalenko.name>,
-  Steven Barrett <steven@liquorix.net>,
- Suleiman Souhlal <suleiman@google.com>,  Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657144857; a=rsa-sha256;
-	cv=none;
-	b=VytvHlKkiiUMJbwpI1Paeu5xydng7JksWkoUmOEgZClwpKUFRcoyC2S4kA6s+p3bljEt2Y
-	85v0iGMF2ImZomoiEOeODI88v8cnakz2h3vV0KOYBRLWcUp1MoAAGc9/CatS2RZcyEZAsJ
-	16N+Z8RlZvTLC+lg1BEaccSkBnce95w=
-ARC-Authentication-Results: i=1;
-	imf07.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=fqvYzFJd;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf07.hostedemail.com: domain of
- 3GAbGYgYKCFkPLQ81F7FF7C5.3FDC9ELO-DDBM13B.FI7@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3GAbGYgYKCFkPLQ81F7FF7C5.3FDC9ELO-DDBM13B.FI7@flex--yuzhao.bounces.google.com
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657144857;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=GyLn4alIs4ulph3wPn6kG/6c2qN7BZlAW/LU7V/wtB8=;
-	b=eWydr8xqhEMOpzfboenYQahizL48uc/GlcGbQBWuOSIiHMGD4xSQylZi9Tf6qFNtPbIcfn
-	a/FGlP15AWqiWFjYLC/dUvNrh+6vOaN3WEAagxoFslzh2bv3QfjEGhMrlwdaaPWaec5Ive
-	jXzUgyTbXLI8/pbmo1scWGTYiETc8FI=
-X-Rspam-User: 
-X-Rspamd-Server: rspam02
-X-Rspamd-Queue-Id: 3DA0B4002F
-Authentication-Results: imf07.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=fqvYzFJd;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf07.hostedemail.com: domain of
- 3GAbGYgYKCFkPLQ81F7FF7C5.3FDC9ELO-DDBM13B.FI7@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3GAbGYgYKCFkPLQ81F7FF7C5.3FDC9ELO-DDBM13B.FI7@flex--yuzhao.bounces.google.com
-X-Stat-Signature: 6b6krqahmtizrdq5upykdpctczw885w8
-X-HE-Tag: 1657144857-930305
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-This patch undoes the following refactor:
-commit 289ccba18af4 ("include/linux/mm_inline.h: fold __update_lru_size() into its sole caller")
-
-The upcoming changes to include/linux/mm_inline.h will reuse
-__update_lru_size().
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
----
- include/linux/mm_inline.h | 9 ++++++++-
- 1 file changed, 8 insertions(+), 1 deletion(-)
-
-diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
-index 7b25b53c474a..fb8aadb81cd6 100644
---- a/include/linux/mm_inline.h
-+++ b/include/linux/mm_inline.h
-@@ -34,7 +34,7 @@ static inline int page_is_file_lru(struct page *page)
- 	return folio_is_file_lru(page_folio(page));
- }
- 
--static __always_inline void update_lru_size(struct lruvec *lruvec,
-+static __always_inline void __update_lru_size(struct lruvec *lruvec,
- 				enum lru_list lru, enum zone_type zid,
- 				long nr_pages)
- {
-@@ -43,6 +43,13 @@ static __always_inline void update_lru_size(struct lruvec *lruvec,
- 	__mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
- 	__mod_zone_page_state(&pgdat->node_zones[zid],
- 				NR_ZONE_LRU_BASE + lru, nr_pages);
-+}
-+
-+static __always_inline void update_lru_size(struct lruvec *lruvec,
-+				enum lru_list lru, enum zone_type zid,
-+				long nr_pages)
-+{
-+	__update_lru_size(lruvec, lru, zid, nr_pages);
- #ifdef CONFIG_MEMCG
- 	mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
- #endif
-
-From patchwork Wed Jul  6 22:00:14 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908703
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id 8DA63CCA481
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:01:01 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id 005E26B007D; Wed,  6 Jul 2022 18:01:00 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id E9B606B007B; Wed,  6 Jul 2022 18:00:59 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id CC4B46B007D; Wed,  6 Jul 2022 18:00:59 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0010.hostedemail.com
- [216.40.44.10])
-	by kanga.kvack.org (Postfix) with ESMTP id BA0E36B0078
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:00:59 -0400 (EDT)
-Received: from smtpin23.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay06.hostedemail.com (Postfix) with ESMTP id 7E56E34725
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:00:59 +0000 (UTC)
-X-FDA: 79658045838.23.084DC6A
-Received: from mail-io1-f74.google.com (mail-io1-f74.google.com
- [209.85.166.74])
-	by imf02.hostedemail.com (Postfix) with ESMTP id B44F280009
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:00:58 +0000 (UTC)
-Received: by mail-io1-f74.google.com with SMTP id
- h7-20020a05660224c700b0067898a33ceaso3543048ioe.13
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:00:58 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=y74IoQ1Un60Xq7yBx41XqudQ9pnNmGNgLv+0SiGV5r4=;
-        b=p293qdy+AJ1NK8wVIFYa38QTJD9CsNtfxZWrFxc99swgPytMvFTFgMhkjdcKezzZie
-         yrDLuqEO4g2bHuYcfru6gtGl/vlEBzugJSUw9t9SSuHD0KPbwuSBuj6k/Z4E6o/3VSjs
-         nmEwp3FaQzQrq+AvQ75NBZLJcjJnu2S/L2SRP5n2jtLL27l7UQfJTw+nlDEN61Y6wnKm
-         cTbYVguOwFUEjdFi2ghze0M0n87A9CNsBCyQHS9wRzczRWbW6m+LMwO/fsge9KEjZcyq
-         WUlwLSCnJuEi3hDOUrhrpLVnbT1LO6KIzff4/TXK4ud4HZ+BORPfFQeF2zBQpAIt8foH
-         VdwQ==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=y74IoQ1Un60Xq7yBx41XqudQ9pnNmGNgLv+0SiGV5r4=;
-        b=EWxosmI9v8pE6BcCI05EJUbjYKXIh1pLe5c6N0so7Z1m/PNwBo99ulR0RkBk+8Fmsr
-         84tyZLy+Pyf35k8XxobhAfMup7WIk+mjRYfkHpt7/gxC3CR3vJEm+WqI+pDZ614pUCR3
-         N6ibQFwTKBqpvRJDUwhkC5n8ePOAIkrRwZz7JGXj4eiWWJSZGxUDhwqV9gi7CHQfo8Lr
-         yHt/gyUcmJDvTu8Fy8aP7r187IjoODs5rbqKu518ZAL20ceKmq+HT3FFv02CyDgkXObe
-         H8JjcI1Ovt/TvJlosala45+Ckpmt3TNX1+aCLmAaarDpkTxNHVYvWWYlylLQp+itl3t2
-         Fj5w==
-X-Gm-Message-State: AJIora8xQHdkFAa4pcN+RMWZYlVPfQhLR90DF0MW6/oxA9WDgXVAJA1y
-	HfRDCS36QboLTpSfrlTvo6hF0/eqWKQ=
-X-Google-Smtp-Source: 
- AGRyM1vTPigokpkBMxkuw/ymV5qWW3cjnNF2AOB7Hi8viYhEQm+kOAzrEtDgBoJ1BwoaUWa5EKU0D3T6qsI=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a05:6638:2114:b0:33e:8e12:e5ee with SMTP id
- n20-20020a056638211400b0033e8e12e5eemr22734068jaj.281.1657144858015; Wed, 06
- Jul 2022 15:00:58 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:14 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-6-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 05/14] mm: multi-gen LRU: groundwork
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Brian Geffon <bgeffon@google.com>,
- Jan Alexander Steffens <heftig@archlinux.org>,
-  Oleksandr Natalenko <oleksandr@natalenko.name>,
- Steven Barrett <steven@liquorix.net>,
-  Suleiman Souhlal <suleiman@google.com>, Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657144858;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=y74IoQ1Un60Xq7yBx41XqudQ9pnNmGNgLv+0SiGV5r4=;
-	b=oTaGMrRapdp+kh/gHJqHBTnDMs7aDxVAwUqI4ZEVOcywjneTv7eRea/YKenPJ3SUgTezbZ
-	dNcy3RS0hdffVNFHBdEbaanNSV29TaH7bgFF9LgENwvVZGMsc8+qWj5aOjTjXDI7lU9B66
-	y9zhOk8IIuWtM+JWdB8zZv2w73QNkgI=
-ARC-Authentication-Results: i=1;
-	imf02.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=p293qdy+;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf02.hostedemail.com: domain of
- 3GgbGYgYKCFsRNSA3H9HH9E7.5HFEBGNQ-FFDO35D.HK9@flex--yuzhao.bounces.google.com
- designates 209.85.166.74 as permitted sender)
- smtp.mailfrom=3GgbGYgYKCFsRNSA3H9HH9E7.5HFEBGNQ-FFDO35D.HK9@flex--yuzhao.bounces.google.com
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657144858; a=rsa-sha256;
-	cv=none;
-	b=5jGHjx/lCilMy07izrrxr4gRusLHe7TVfK6eNjlXnYmVVS2CdTSjlX6iI6cgO5jY/6Otqp
-	7rnHHbSwj6t4vUkRkbfhWehDTUsU9TXEcEaZ8NHjLgX8tJZID/D2dcfA1Z/Ae/1iB6tbQa
-	vuAWajsuByCUT6SlkfXfwe+TOdR4BNI=
-X-Rspamd-Server: rspam11
-X-Rspam-User: 
-X-Stat-Signature: gxdxu955m74iz4kx3hhyt9yhtcgejep5
-X-Rspamd-Queue-Id: B44F280009
-Authentication-Results: imf02.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=p293qdy+;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf02.hostedemail.com: domain of
- 3GgbGYgYKCFsRNSA3H9HH9E7.5HFEBGNQ-FFDO35D.HK9@flex--yuzhao.bounces.google.com
- designates 209.85.166.74 as permitted sender)
- smtp.mailfrom=3GgbGYgYKCFsRNSA3H9HH9E7.5HFEBGNQ-FFDO35D.HK9@flex--yuzhao.bounces.google.com
-X-HE-Tag: 1657144858-162393
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-Evictable pages are divided into multiple generations for each lruvec.
-The youngest generation number is stored in lrugen->max_seq for both
-anon and file types as they are aged on an equal footing. The oldest
-generation numbers are stored in lrugen->min_seq[] separately for anon
-and file types as clean file pages can be evicted regardless of swap
-constraints. These three variables are monotonically increasing.
-
-Generation numbers are truncated into order_base_2(MAX_NR_GENS+1) bits
-in order to fit into the gen counter in folio->flags. Each truncated
-generation number is an index to lrugen->lists[]. The sliding window
-technique is used to track at least MIN_NR_GENS and at most
-MAX_NR_GENS generations. The gen counter stores a value within [1,
-MAX_NR_GENS] while a page is on one of lrugen->lists[]. Otherwise it
-stores 0.
-
-There are two conceptually independent procedures: "the aging", which
-produces young generations, and "the eviction", which consumes old
-generations. They form a closed-loop system, i.e., "the page reclaim".
-Both procedures can be invoked from userspace for the purposes of
-working set estimation and proactive reclaim. These techniques are
-commonly used to optimize job scheduling (bin packing) in data
-centers [1][2].
-
-To avoid confusion, the terms "hot" and "cold" will be applied to the
-multi-gen LRU, as a new convention; the terms "active" and "inactive"
-will be applied to the active/inactive LRU, as usual.
-
-The protection of hot pages and the selection of cold pages are based
-on page access channels and patterns. There are two access channels:
-one through page tables and the other through file descriptors. The
-protection of the former channel is by design stronger because:
-1. The uncertainty in determining the access patterns of the former
-   channel is higher due to the approximation of the accessed bit.
-2. The cost of evicting the former channel is higher due to the TLB
-   flushes required and the likelihood of encountering the dirty bit.
-3. The penalty of underprotecting the former channel is higher because
-   applications usually do not prepare themselves for major page
-   faults like they do for blocked I/O. E.g., GUI applications
-   commonly use dedicated I/O threads to avoid blocking rendering
-   threads.
-There are also two access patterns: one with temporal locality and the
-other without. For the reasons listed above, the former channel is
-assumed to follow the former pattern unless VM_SEQ_READ or
-VM_RAND_READ is present; the latter channel is assumed to follow the
-latter pattern unless outlying refaults have been observed [3][4].
-
-The next patch will address the "outlying refaults". Three macros,
-i.e., LRU_REFS_WIDTH, LRU_REFS_PGOFF and LRU_REFS_MASK, used later are
-added in this patch to make the entire patchset less diffy.
-
-A page is added to the youngest generation on faulting. The aging
-needs to check the accessed bit at least twice before handing this
-page over to the eviction. The first check takes care of the accessed
-bit set on the initial fault; the second check makes sure this page
-has not been used since then. This protocol, AKA second chance,
-requires a minimum of two generations, hence MIN_NR_GENS.
-
-[1] https://dl.acm.org/doi/10.1145/3297858.3304053
-[2] https://dl.acm.org/doi/10.1145/3503222.3507731
-[3] https://lwn.net/Articles/495543/
-[4] https://lwn.net/Articles/815342/
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
----
- fs/fuse/dev.c                     |   3 +-
- include/linux/mm.h                |   2 +
- include/linux/mm_inline.h         | 175 ++++++++++++++++++++++++++++++
- include/linux/mmzone.h            | 100 +++++++++++++++++
- include/linux/page-flags-layout.h |  13 ++-
- include/linux/page-flags.h        |   4 +-
- include/linux/sched.h             |   4 +
- kernel/bounds.c                   |   5 +
- mm/Kconfig                        |   8 ++
- mm/huge_memory.c                  |   3 +-
- mm/memcontrol.c                   |   2 +
- mm/memory.c                       |  25 +++++
- mm/mm_init.c                      |   6 +-
- mm/mmzone.c                       |   2 +
- mm/swap.c                         |   9 +-
- mm/vmscan.c                       |  75 +++++++++++++
- 16 files changed, 423 insertions(+), 13 deletions(-)
-
-diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
-index 0e537e580dc1..5d36015071d2 100644
---- a/fs/fuse/dev.c
-+++ b/fs/fuse/dev.c
-@@ -777,7 +777,8 @@ static int fuse_check_page(struct page *page)
- 	       1 << PG_active |
- 	       1 << PG_workingset |
- 	       1 << PG_reclaim |
--	       1 << PG_waiters))) {
-+	       1 << PG_waiters |
-+	       LRU_GEN_MASK | LRU_REFS_MASK))) {
- 		dump_page(page, "fuse: trying to steal weird page");
- 		return 1;
- 	}
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index cf3d0d673f6b..ed5393e5930d 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -1060,6 +1060,8 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
- #define ZONES_PGOFF		(NODES_PGOFF - ZONES_WIDTH)
- #define LAST_CPUPID_PGOFF	(ZONES_PGOFF - LAST_CPUPID_WIDTH)
- #define KASAN_TAG_PGOFF		(LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH)
-+#define LRU_GEN_PGOFF		(KASAN_TAG_PGOFF - LRU_GEN_WIDTH)
-+#define LRU_REFS_PGOFF		(LRU_GEN_PGOFF - LRU_REFS_WIDTH)
- 
- /*
-  * Define the bit shifts to access each section.  For non-existent
-diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
-index fb8aadb81cd6..2ff703900fd0 100644
---- a/include/linux/mm_inline.h
-+++ b/include/linux/mm_inline.h
-@@ -40,6 +40,9 @@ static __always_inline void __update_lru_size(struct lruvec *lruvec,
- {
- 	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
- 
-+	lockdep_assert_held(&lruvec->lru_lock);
-+	WARN_ON_ONCE(nr_pages != (int)nr_pages);
-+
- 	__mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
- 	__mod_zone_page_state(&pgdat->node_zones[zid],
- 				NR_ZONE_LRU_BASE + lru, nr_pages);
-@@ -101,11 +104,177 @@ static __always_inline enum lru_list folio_lru_list(struct folio *folio)
- 	return lru;
- }
- 
-+#ifdef CONFIG_LRU_GEN
-+
-+static inline bool lru_gen_enabled(void)
-+{
-+	return true;
-+}
-+
-+static inline bool lru_gen_in_fault(void)
-+{
-+	return current->in_lru_fault;
-+}
-+
-+static inline int lru_gen_from_seq(unsigned long seq)
-+{
-+	return seq % MAX_NR_GENS;
-+}
-+
-+static inline int folio_lru_gen(struct folio *folio)
-+{
-+	unsigned long flags = READ_ONCE(folio->flags);
-+
-+	return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
-+}
-+
-+static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen)
-+{
-+	unsigned long max_seq = lruvec->lrugen.max_seq;
-+
-+	VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
-+
-+	/* see the comment on MIN_NR_GENS */
-+	return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1);
-+}
-+
-+static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *folio,
-+				       int old_gen, int new_gen)
-+{
-+	int type = folio_is_file_lru(folio);
-+	int zone = folio_zonenum(folio);
-+	int delta = folio_nr_pages(folio);
-+	enum lru_list lru = type * LRU_INACTIVE_FILE;
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+
-+	VM_WARN_ON_ONCE(old_gen != -1 && old_gen >= MAX_NR_GENS);
-+	VM_WARN_ON_ONCE(new_gen != -1 && new_gen >= MAX_NR_GENS);
-+	VM_WARN_ON_ONCE(old_gen == -1 && new_gen == -1);
-+
-+	if (old_gen >= 0)
-+		WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone],
-+			   lrugen->nr_pages[old_gen][type][zone] - delta);
-+	if (new_gen >= 0)
-+		WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone],
-+			   lrugen->nr_pages[new_gen][type][zone] + delta);
-+
-+	/* addition */
-+	if (old_gen < 0) {
-+		if (lru_gen_is_active(lruvec, new_gen))
-+			lru += LRU_ACTIVE;
-+		__update_lru_size(lruvec, lru, zone, delta);
-+		return;
-+	}
-+
-+	/* deletion */
-+	if (new_gen < 0) {
-+		if (lru_gen_is_active(lruvec, old_gen))
-+			lru += LRU_ACTIVE;
-+		__update_lru_size(lruvec, lru, zone, -delta);
-+		return;
-+	}
-+}
-+
-+static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
-+{
-+	unsigned long seq;
-+	unsigned long flags;
-+	int gen = folio_lru_gen(folio);
-+	int type = folio_is_file_lru(folio);
-+	int zone = folio_zonenum(folio);
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+
-+	VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
-+
-+	if (folio_test_unevictable(folio))
-+		return false;
-+	/*
-+	 * There are three common cases for this page:
-+	 * 1. If it's hot, e.g., freshly faulted in or previously hot and
-+	 *    migrated, add it to the youngest generation.
-+	 * 2. If it's cold but can't be evicted immediately, i.e., an anon page
-+	 *    not in swapcache or a dirty page pending writeback, add it to the
-+	 *    second oldest generation.
-+	 * 3. Everything else (clean, cold) is added to the oldest generation.
-+	 */
-+	if (folio_test_active(folio))
-+		seq = lrugen->max_seq;
-+	else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) ||
-+		 (folio_test_reclaim(folio) &&
-+		  (folio_test_dirty(folio) || folio_test_writeback(folio))))
-+		seq = lrugen->min_seq[type] + 1;
-+	else
-+		seq = lrugen->min_seq[type];
-+
-+	gen = lru_gen_from_seq(seq);
-+	flags = (gen + 1UL) << LRU_GEN_PGOFF;
-+	/* see the comment on MIN_NR_GENS about PG_active */
-+	set_mask_bits(&folio->flags, LRU_GEN_MASK | BIT(PG_active), flags);
-+
-+	lru_gen_update_size(lruvec, folio, -1, gen);
-+	/* for folio_rotate_reclaimable() */
-+	if (reclaiming)
-+		list_add_tail(&folio->lru, &lrugen->lists[gen][type][zone]);
-+	else
-+		list_add(&folio->lru, &lrugen->lists[gen][type][zone]);
-+
-+	return true;
-+}
-+
-+static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
-+{
-+	unsigned long flags;
-+	int gen = folio_lru_gen(folio);
-+
-+	if (gen < 0)
-+		return false;
-+
-+	VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
-+	VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
-+
-+	/* for folio_migrate_flags() */
-+	flags = !reclaiming && lru_gen_is_active(lruvec, gen) ? BIT(PG_active) : 0;
-+	flags = set_mask_bits(&folio->flags, LRU_GEN_MASK, flags);
-+	gen = ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
-+
-+	lru_gen_update_size(lruvec, folio, gen, -1);
-+	list_del(&folio->lru);
-+
-+	return true;
-+}
-+
-+#else /* !CONFIG_LRU_GEN */
-+
-+static inline bool lru_gen_enabled(void)
-+{
-+	return false;
-+}
-+
-+static inline bool lru_gen_in_fault(void)
-+{
-+	return false;
-+}
-+
-+static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
-+{
-+	return false;
-+}
-+
-+static inline bool lru_gen_del_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
-+{
-+	return false;
-+}
-+
-+#endif /* CONFIG_LRU_GEN */
-+
- static __always_inline
- void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
- {
- 	enum lru_list lru = folio_lru_list(folio);
- 
-+	if (lru_gen_add_folio(lruvec, folio, false))
-+		return;
-+
- 	update_lru_size(lruvec, lru, folio_zonenum(folio),
- 			folio_nr_pages(folio));
- 	if (lru != LRU_UNEVICTABLE)
-@@ -123,6 +292,9 @@ void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
- {
- 	enum lru_list lru = folio_lru_list(folio);
- 
-+	if (lru_gen_add_folio(lruvec, folio, true))
-+		return;
-+
- 	update_lru_size(lruvec, lru, folio_zonenum(folio),
- 			folio_nr_pages(folio));
- 	/* This is not expected to be used on LRU_UNEVICTABLE */
-@@ -140,6 +312,9 @@ void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
- {
- 	enum lru_list lru = folio_lru_list(folio);
- 
-+	if (lru_gen_del_folio(lruvec, folio, false))
-+		return;
-+
- 	if (lru != LRU_UNEVICTABLE)
- 		list_del(&folio->lru);
- 	update_lru_size(lruvec, lru, folio_zonenum(folio),
-diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
-index aab70355d64f..c90c2282044e 100644
---- a/include/linux/mmzone.h
-+++ b/include/linux/mmzone.h
-@@ -314,6 +314,102 @@ enum lruvec_flags {
- 					 */
- };
- 
-+#endif /* !__GENERATING_BOUNDS_H */
-+
-+/*
-+ * Evictable pages are divided into multiple generations. The youngest and the
-+ * oldest generation numbers, max_seq and min_seq, are monotonically increasing.
-+ * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An
-+ * offset within MAX_NR_GENS, i.e., gen, indexes the LRU list of the
-+ * corresponding generation. The gen counter in folio->flags stores gen+1 while
-+ * a page is on one of lrugen->lists[]. Otherwise it stores 0.
-+ *
-+ * A page is added to the youngest generation on faulting. The aging needs to
-+ * check the accessed bit at least twice before handing this page over to the
-+ * eviction. The first check takes care of the accessed bit set on the initial
-+ * fault; the second check makes sure this page hasn't been used since then.
-+ * This process, AKA second chance, requires a minimum of two generations,
-+ * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive
-+ * LRU, e.g., /proc/vmstat, these two generations are considered active; the
-+ * rest of generations, if they exist, are considered inactive. See
-+ * lru_gen_is_active().
-+ *
-+ * PG_active is always cleared while a page is on one of lrugen->lists[] so that
-+ * the aging needs not to worry about it. And it's set again when a page
-+ * considered active is isolated for non-reclaiming purposes, e.g., migration.
-+ * See lru_gen_add_folio() and lru_gen_del_folio().
-+ *
-+ * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice the
-+ * number of categories of the active/inactive LRU when keeping track of
-+ * accesses through page tables. This requires order_base_2(MAX_NR_GENS+1) bits
-+ * in folio->flags.
-+ */
-+#define MIN_NR_GENS		2U
-+#define MAX_NR_GENS		4U
-+
-+#ifndef __GENERATING_BOUNDS_H
-+
-+struct lruvec;
-+
-+#define LRU_GEN_MASK		((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
-+#define LRU_REFS_MASK		((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
-+
-+#ifdef CONFIG_LRU_GEN
-+
-+enum {
-+	LRU_GEN_ANON,
-+	LRU_GEN_FILE,
-+};
-+
-+/*
-+ * The youngest generation number is stored in max_seq for both anon and file
-+ * types as they are aged on an equal footing. The oldest generation numbers are
-+ * stored in min_seq[] separately for anon and file types as clean file pages
-+ * can be evicted regardless of swap constraints.
-+ *
-+ * Normally anon and file min_seq are in sync. But if swapping is constrained,
-+ * e.g., out of swap space, file min_seq is allowed to advance and leave anon
-+ * min_seq behind.
-+ *
-+ * The number of pages in each generation is eventually consistent and therefore
-+ * can be transiently negative.
-+ */
-+struct lru_gen_struct {
-+	/* the aging increments the youngest generation number */
-+	unsigned long max_seq;
-+	/* the eviction increments the oldest generation numbers */
-+	unsigned long min_seq[ANON_AND_FILE];
-+	/* the multi-gen LRU lists, lazily sorted on eviction */
-+	struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
-+	/* the multi-gen LRU sizes, eventually consistent */
-+	long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
-+};
-+
-+void lru_gen_init_lruvec(struct lruvec *lruvec);
-+
-+#ifdef CONFIG_MEMCG
-+void lru_gen_init_memcg(struct mem_cgroup *memcg);
-+void lru_gen_exit_memcg(struct mem_cgroup *memcg);
-+#endif
-+
-+#else /* !CONFIG_LRU_GEN */
-+
-+static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
-+{
-+}
-+
-+#ifdef CONFIG_MEMCG
-+static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
-+{
-+}
-+
-+static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg)
-+{
-+}
-+#endif
-+
-+#endif /* CONFIG_LRU_GEN */
-+
- struct lruvec {
- 	struct list_head		lists[NR_LRU_LISTS];
- 	/* per lruvec lru_lock for memcg */
-@@ -331,6 +427,10 @@ struct lruvec {
- 	unsigned long			refaults[ANON_AND_FILE];
- 	/* Various lruvec state flags (enum lruvec_flags) */
- 	unsigned long			flags;
-+#ifdef CONFIG_LRU_GEN
-+	/* evictable pages divided into generations */
-+	struct lru_gen_struct		lrugen;
-+#endif
- #ifdef CONFIG_MEMCG
- 	struct pglist_data *pgdat;
- #endif
-diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
-index ef1e3e736e14..240905407a18 100644
---- a/include/linux/page-flags-layout.h
-+++ b/include/linux/page-flags-layout.h
-@@ -55,7 +55,8 @@
- #define SECTIONS_WIDTH		0
- #endif
- 
--#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
-+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \
-+	<= BITS_PER_LONG - NR_PAGEFLAGS
- #define NODES_WIDTH		NODES_SHIFT
- #elif defined(CONFIG_SPARSEMEM_VMEMMAP)
- #error "Vmemmap: No space for nodes field in page flags"
-@@ -89,8 +90,8 @@
- #define LAST_CPUPID_SHIFT 0
- #endif
- 
--#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT \
--	<= BITS_PER_LONG - NR_PAGEFLAGS
-+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
-+	KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
- #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
- #else
- #define LAST_CPUPID_WIDTH 0
-@@ -100,10 +101,12 @@
- #define LAST_CPUPID_NOT_IN_PAGE_FLAGS
- #endif
- 
--#if ZONES_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH \
--	> BITS_PER_LONG - NR_PAGEFLAGS
-+#if ZONES_WIDTH + LRU_GEN_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \
-+	KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
- #error "Not enough bits in page flags"
- #endif
- 
-+#define LRU_REFS_WIDTH	0
-+
- #endif
- #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
-diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
-index e66f7aa3191d..8d466d724852 100644
---- a/include/linux/page-flags.h
-+++ b/include/linux/page-flags.h
-@@ -1059,7 +1059,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
- 	 1UL << PG_private	| 1UL << PG_private_2	|	\
- 	 1UL << PG_writeback	| 1UL << PG_reserved	|	\
- 	 1UL << PG_slab		| 1UL << PG_active 	|	\
--	 1UL << PG_unevictable	| __PG_MLOCKED)
-+	 1UL << PG_unevictable	| __PG_MLOCKED | LRU_GEN_MASK)
- 
- /*
-  * Flags checked when a page is prepped for return by the page allocator.
-@@ -1070,7 +1070,7 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
-  * alloc-free cycle to prevent from reusing the page.
-  */
- #define PAGE_FLAGS_CHECK_AT_PREP	\
--	(PAGEFLAGS_MASK & ~__PG_HWPOISON)
-+	((PAGEFLAGS_MASK & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK)
- 
- #define PAGE_FLAGS_PRIVATE				\
- 	(1UL << PG_private | 1UL << PG_private_2)
-diff --git a/include/linux/sched.h b/include/linux/sched.h
-index c46f3a63b758..744340a96ace 100644
---- a/include/linux/sched.h
-+++ b/include/linux/sched.h
-@@ -912,6 +912,10 @@ struct task_struct {
- #ifdef CONFIG_MEMCG
- 	unsigned			in_user_fault:1;
- #endif
-+#ifdef CONFIG_LRU_GEN
-+	/* whether the LRU algorithm may apply to this access */
-+	unsigned			in_lru_fault:1;
-+#endif
- #ifdef CONFIG_COMPAT_BRK
- 	unsigned			brk_randomized:1;
- #endif
-diff --git a/kernel/bounds.c b/kernel/bounds.c
-index 9795d75b09b2..5ee60777d8e4 100644
---- a/kernel/bounds.c
-+++ b/kernel/bounds.c
-@@ -22,6 +22,11 @@ int main(void)
- 	DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS));
- #endif
- 	DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
-+#ifdef CONFIG_LRU_GEN
-+	DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1));
-+#else
-+	DEFINE(LRU_GEN_WIDTH, 0);
-+#endif
- 	/* End of constants */
- 
- 	return 0;
-diff --git a/mm/Kconfig b/mm/Kconfig
-index 169e64192e48..cee109f3128a 100644
---- a/mm/Kconfig
-+++ b/mm/Kconfig
-@@ -1130,6 +1130,14 @@ config PTE_MARKER_UFFD_WP
- 	  purposes.  It is required to enable userfaultfd write protection on
- 	  file-backed memory types like shmem and hugetlbfs.
- 
-+config LRU_GEN
-+	bool "Multi-Gen LRU"
-+	depends on MMU
-+	# make sure folio->flags has enough spare bits
-+	depends on 64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP
-+	help
-+	  A high performance LRU implementation to overcommit memory.
-+
- source "mm/damon/Kconfig"
- 
- endmenu
-diff --git a/mm/huge_memory.c b/mm/huge_memory.c
-index 834f288b3769..5500583e35b8 100644
---- a/mm/huge_memory.c
-+++ b/mm/huge_memory.c
-@@ -2370,7 +2370,8 @@ static void __split_huge_page_tail(struct page *head, int tail,
- #ifdef CONFIG_64BIT
- 			 (1L << PG_arch_2) |
- #endif
--			 (1L << PG_dirty)));
-+			 (1L << PG_dirty) |
-+			 LRU_GEN_MASK | LRU_REFS_MASK));
- 
- 	/* ->mapping in first tail page is compound_mapcount */
- 	VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
-diff --git a/mm/memcontrol.c b/mm/memcontrol.c
-index 618c366a2f07..7d58e8a73ece 100644
---- a/mm/memcontrol.c
-+++ b/mm/memcontrol.c
-@@ -5105,6 +5105,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
- 
- static void mem_cgroup_free(struct mem_cgroup *memcg)
- {
-+	lru_gen_exit_memcg(memcg);
- 	memcg_wb_domain_exit(memcg);
- 	__mem_cgroup_free(memcg);
- }
-@@ -5163,6 +5164,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
- 	memcg->deferred_split_queue.split_queue_len = 0;
- #endif
- 	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
-+	lru_gen_init_memcg(memcg);
- 	return memcg;
- fail:
- 	mem_cgroup_id_remove(memcg);
-diff --git a/mm/memory.c b/mm/memory.c
-index 49500390b91b..85d3961c2bd5 100644
---- a/mm/memory.c
-+++ b/mm/memory.c
-@@ -5091,6 +5091,27 @@ static inline void mm_account_fault(struct pt_regs *regs,
- 		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
- }
- 
-+#ifdef CONFIG_LRU_GEN
-+static void lru_gen_enter_fault(struct vm_area_struct *vma)
-+{
-+	/* the LRU algorithm doesn't apply to sequential or random reads */
-+	current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
-+}
-+
-+static void lru_gen_exit_fault(void)
-+{
-+	current->in_lru_fault = false;
-+}
-+#else
-+static void lru_gen_enter_fault(struct vm_area_struct *vma)
-+{
-+}
-+
-+static void lru_gen_exit_fault(void)
-+{
-+}
-+#endif /* CONFIG_LRU_GEN */
-+
- /*
-  * By the time we get here, we already hold the mm semaphore
-  *
-@@ -5122,11 +5143,15 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
- 	if (flags & FAULT_FLAG_USER)
- 		mem_cgroup_enter_user_fault();
- 
-+	lru_gen_enter_fault(vma);
-+
- 	if (unlikely(is_vm_hugetlb_page(vma)))
- 		ret = hugetlb_fault(vma->vm_mm, vma, address, flags);
- 	else
- 		ret = __handle_mm_fault(vma, address, flags);
- 
-+	lru_gen_exit_fault();
-+
- 	if (flags & FAULT_FLAG_USER) {
- 		mem_cgroup_exit_user_fault();
- 		/*
-diff --git a/mm/mm_init.c b/mm/mm_init.c
-index 9ddaf0e1b0ab..0d7b2bd2454a 100644
---- a/mm/mm_init.c
-+++ b/mm/mm_init.c
-@@ -65,14 +65,16 @@ void __init mminit_verify_pageflags_layout(void)
- 
- 	shift = 8 * sizeof(unsigned long);
- 	width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH
--		- LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH;
-+		- LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH;
- 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths",
--		"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n",
-+		"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n",
- 		SECTIONS_WIDTH,
- 		NODES_WIDTH,
- 		ZONES_WIDTH,
- 		LAST_CPUPID_WIDTH,
- 		KASAN_TAG_WIDTH,
-+		LRU_GEN_WIDTH,
-+		LRU_REFS_WIDTH,
- 		NR_PAGEFLAGS);
- 	mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts",
- 		"Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n",
-diff --git a/mm/mmzone.c b/mm/mmzone.c
-index 0ae7571e35ab..68e1511be12d 100644
---- a/mm/mmzone.c
-+++ b/mm/mmzone.c
-@@ -88,6 +88,8 @@ void lruvec_init(struct lruvec *lruvec)
- 	 * Poison its list head, so that any operations on it would crash.
- 	 */
- 	list_del(&lruvec->lists[LRU_UNEVICTABLE]);
-+
-+	lru_gen_init_lruvec(lruvec);
- }
- 
- #if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS)
-diff --git a/mm/swap.c b/mm/swap.c
-index 034bb24879a3..b062729b340f 100644
---- a/mm/swap.c
-+++ b/mm/swap.c
-@@ -460,6 +460,11 @@ void folio_add_lru(struct folio *folio)
- 	VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio);
- 	VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
- 
-+	/* see the comment in lru_gen_add_folio() */
-+	if (lru_gen_enabled() && !folio_test_unevictable(folio) &&
-+	    lru_gen_in_fault() && !(current->flags & PF_MEMALLOC))
-+		folio_set_active(folio);
-+
- 	folio_get(folio);
- 	local_lock(&lru_pvecs.lock);
- 	pvec = this_cpu_ptr(&lru_pvecs.lru_add);
-@@ -551,7 +556,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
- 
- static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
- {
--	if (PageActive(page) && !PageUnevictable(page)) {
-+	if (!PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) {
- 		int nr_pages = thp_nr_pages(page);
- 
- 		del_page_from_lru_list(page, lruvec);
-@@ -666,7 +671,7 @@ void deactivate_file_folio(struct folio *folio)
-  */
- void deactivate_page(struct page *page)
- {
--	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
-+	if (PageLRU(page) && !PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) {
- 		struct pagevec *pvec;
- 
- 		local_lock(&lru_pvecs.lock);
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index fddb9bd3c6c2..1fcc0feed985 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -2992,6 +2992,81 @@ static bool can_age_anon_pages(struct pglist_data *pgdat,
- 	return can_demote(pgdat->node_id, sc);
- }
- 
-+#ifdef CONFIG_LRU_GEN
-+
-+/******************************************************************************
-+ *                          shorthand helpers
-+ ******************************************************************************/
-+
-+#define for_each_gen_type_zone(gen, type, zone)				\
-+	for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++)			\
-+		for ((type) = 0; (type) < ANON_AND_FILE; (type)++)	\
-+			for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
-+
-+static struct lruvec __maybe_unused *get_lruvec(struct mem_cgroup *memcg, int nid)
-+{
-+	struct pglist_data *pgdat = NODE_DATA(nid);
-+
-+#ifdef CONFIG_MEMCG
-+	if (memcg) {
-+		struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
-+
-+		/* for hotadd_new_pgdat() */
-+		if (!lruvec->pgdat)
-+			lruvec->pgdat = pgdat;
-+
-+		return lruvec;
-+	}
-+#endif
-+	VM_WARN_ON_ONCE(!mem_cgroup_disabled());
-+
-+	return pgdat ? &pgdat->__lruvec : NULL;
-+}
-+
-+/******************************************************************************
-+ *                          initialization
-+ ******************************************************************************/
-+
-+void lru_gen_init_lruvec(struct lruvec *lruvec)
-+{
-+	int gen, type, zone;
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+
-+	lrugen->max_seq = MIN_NR_GENS + 1;
-+
-+	for_each_gen_type_zone(gen, type, zone)
-+		INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
-+}
-+
-+#ifdef CONFIG_MEMCG
-+void lru_gen_init_memcg(struct mem_cgroup *memcg)
-+{
-+}
-+
-+void lru_gen_exit_memcg(struct mem_cgroup *memcg)
-+{
-+	int nid;
-+
-+	for_each_node(nid) {
-+		struct lruvec *lruvec = get_lruvec(memcg, nid);
-+
-+		VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0,
-+					   sizeof(lruvec->lrugen.nr_pages)));
-+	}
-+}
-+#endif
-+
-+static int __init init_lru_gen(void)
-+{
-+	BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
-+	BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
-+
-+	return 0;
-+};
-+late_initcall(init_lru_gen);
-+
-+#endif /* CONFIG_LRU_GEN */
-+
- static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
- {
- 	unsigned long nr[NR_LRU_LISTS];
-
-From patchwork Wed Jul  6 22:00:15 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908704
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id BC3F5C43334
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:01:04 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id 12E3C8E0002; Wed,  6 Jul 2022 18:01:02 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id 0B84D8E0001; Wed,  6 Jul 2022 18:01:02 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id DAFB38E0002; Wed,  6 Jul 2022 18:01:01 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0016.hostedemail.com
- [216.40.44.16])
-	by kanga.kvack.org (Postfix) with ESMTP id C1CFC8E0001
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:01:01 -0400 (EDT)
-Received: from smtpin27.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay02.hostedemail.com (Postfix) with ESMTP id 8F7AA33A54
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:01 +0000 (UTC)
-X-FDA: 79658045922.27.E9F2FD6
-Received: from mail-yb1-f201.google.com (mail-yb1-f201.google.com
- [209.85.219.201])
-	by imf27.hostedemail.com (Postfix) with ESMTP id C38524001B
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:00 +0000 (UTC)
-Received: by mail-yb1-f201.google.com with SMTP id
- u17-20020a258411000000b0066dfb22644eso11129264ybk.6
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:01:00 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=y0ZxSqiIOv2HRYm553wZrJx5fChLkGPPbLO1qwZgmyQ=;
-        b=k4czIYvx4CiuCTGm0ZE5CP3ROAwcGkVPLViBUVhaVvkR7uaNKMq35oiGoZrpr9wmyA
-         3m25Gt55w07/Zl+RDxl25UcbFclUuv1IhW8RxSswLcgrHkQRPfvrY4sHXWvh8Zx9tcVy
-         57vPZrwMAdg5KxxrjfPcq/qdHGTF/uyJnTdFe8v4GztZ5hfTrusX1wVVySS9zGZ/5Iow
-         Nd9yluqy3C3Vy/90KJx2guGDz9MOF3sU6l1ICpYZ9vNR6C8Rq/+pMVqKsY9lUtmogcQ9
-         4GYcy0Nvop1G8oE5zpjlPJBv9NQtnMO9nw2qaCn4RWoOH37nG4jPSXNMIBpa8zn061RW
-         FgQg==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=y0ZxSqiIOv2HRYm553wZrJx5fChLkGPPbLO1qwZgmyQ=;
-        b=363aZyaQrxgNeIHPuTaRjAavjP4F8EO5sILVoIfz7F8ymEnUrJ32Mjc0rSEnA9jAM9
-         iqLFSFIFzIMWUuTmljy1fFNDL7A2uNdDlNrJRCZ/gZAbXFjDT2j5Dl8E8XzpIRlJl7vn
-         ZRDzjDR9sVo08B1nyi1AhFP4nb1L2qE8qvTpXkzENYGDSZu7h/AALKU8/CiZpj9hkDwo
-         lzh5wc2ycnx5mXpDF9ieinUDPgG+YeeoSleAk0FgtV+zyB5xkdIruPNpSueQff+ct/G8
-         5c1VriWHEfaNMcJkuREKnILndD3W/O2w5mkcChSKsKDm9/nd8r2q/EJUgq2xca+u+CAl
-         jEVQ==
-X-Gm-Message-State: AJIora/mFkYkqX5X5k4lKDoDCm8/beFZvhHqMOoV25In9oaB8n7Wpsnu
-	TVd/VrxMHywvnAlU0/ugaxgGqWgltsw=
-X-Google-Smtp-Source: 
- AGRyM1sRDxtae2IlBgXPvJfXEts8Wxw8Va1kZtVIMGzblX4Mg8zS6Ie6RM5yT6WBMCN4GAE5u4jJ09Jf3oM=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a81:ab4d:0:b0:31c:8655:2207 with SMTP id
- d13-20020a81ab4d000000b0031c86552207mr26036050ywk.389.1657144860068; Wed, 06
- Jul 2022 15:01:00 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:15 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-7-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 06/14] mm: multi-gen LRU: minimal implementation
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Brian Geffon <bgeffon@google.com>,
- Jan Alexander Steffens <heftig@archlinux.org>,
-  Oleksandr Natalenko <oleksandr@natalenko.name>,
- Steven Barrett <steven@liquorix.net>,
-  Suleiman Souhlal <suleiman@google.com>, Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657144861; a=rsa-sha256;
-	cv=none;
-	b=oTd2WxyeO8ccfm0UBIeIMD+jFWftz29Vc+53VsIdewSZAb8/4ceMzzXxauEqqrmAUtsLQ4
-	sWFeVaIcSbnT8ZbgPae4FumiKT2ISp4qcEqBL74ek6P+YSnzhBoTUB4RYYRJ4JqS5sa2rW
-	hk5QljWrRnJjE4lY/D16EloP8YSx7T8=
-ARC-Authentication-Results: i=1;
-	imf27.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=k4czIYvx;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf27.hostedemail.com: domain of
- 3HAbGYgYKCF0TPUC5JBJJBG9.7JHGDIPS-HHFQ57F.JMB@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3HAbGYgYKCF0TPUC5JBJJBG9.7JHGDIPS-HHFQ57F.JMB@flex--yuzhao.bounces.google.com
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657144861;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=y0ZxSqiIOv2HRYm553wZrJx5fChLkGPPbLO1qwZgmyQ=;
-	b=wTs1b9ocf9FcHn9gYIlmuegnIgTo09PHZ8oYnB8j0wXjHhk0Al+NkNixxAvIfaCH4rMsxI
-	ErhpOzYCe9rwuJ5BAQvblyNUvN2Y5/i9ASXhp2bGy5PaMkTpI8OeOqjiGL9EQQonR3t7UB
-	j3QLmoVYs9VO0LxxgVoIQEv9nGf6zf8=
-X-Stat-Signature: 653crzffxniht38wad94goaie9ebgi3z
-X-Rspam-User: 
-X-Rspamd-Server: rspam12
-X-Rspamd-Queue-Id: C38524001B
-Authentication-Results: imf27.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=k4czIYvx;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf27.hostedemail.com: domain of
- 3HAbGYgYKCF0TPUC5JBJJBG9.7JHGDIPS-HHFQ57F.JMB@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3HAbGYgYKCF0TPUC5JBJJBG9.7JHGDIPS-HHFQ57F.JMB@flex--yuzhao.bounces.google.com
-X-HE-Tag: 1657144860-126552
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-To avoid confusion, the terms "promotion" and "demotion" will be
-applied to the multi-gen LRU, as a new convention; the terms
-"activation" and "deactivation" will be applied to the active/inactive
-LRU, as usual.
-
-The aging produces young generations. Given an lruvec, it increments
-max_seq when max_seq-min_seq+1 approaches MIN_NR_GENS. The aging
-promotes hot pages to the youngest generation when it finds them
-accessed through page tables; the demotion of cold pages happens
-consequently when it increments max_seq. Promotion in the aging path
-does not involve any LRU list operations, only the updates of the gen
-counter and lrugen->nr_pages[]; demotion, unless as the result of the
-increment of max_seq, requires LRU list operations, e.g.,
-lru_deactivate_fn(). The aging has the complexity O(nr_hot_pages),
-since it is only interested in hot pages.
-
-The eviction consumes old generations. Given an lruvec, it increments
-min_seq when lrugen->lists[] indexed by min_seq%MAX_NR_GENS becomes
-empty. A feedback loop modeled after the PID controller monitors
-refaults over anon and file types and decides which type to evict when
-both types are available from the same generation.
-
-The protection of pages accessed multiple times through file
-descriptors takes place in the eviction path. Each generation is
-divided into multiple tiers. A page accessed N times through file
-descriptors is in tier order_base_2(N). Tiers do not have dedicated
-lrugen->lists[], only bits in folio->flags. The aforementioned
-feedback loop also monitors refaults over all tiers and decides when
-to protect pages in which tiers (N>1), using the first tier (N=0,1) as
-a baseline. The first tier contains single-use unmapped clean pages,
-which are most likely the best choices. In contrast to promotion in
-the aging path, the protection of a page in the eviction path is
-achieved by moving this page to the next generation, i.e., min_seq+1,
-if the feedback loop decides so. This approach has the following
-advantages:
-1. It removes the cost of activation in the buffered access path by
-   inferring whether pages accessed multiple times through file
-   descriptors are statistically hot and thus worth protecting in the
-   eviction path.
-2. It takes pages accessed through page tables into account and avoids
-   overprotecting pages accessed multiple times through file
-   descriptors. (Pages accessed through page tables are in the first
-   tier, since N=0.)
-3. More tiers provide better protection for pages accessed more than
-   twice through file descriptors, when under heavy buffered I/O
-   workloads.
-
-Server benchmark results:
-  Single workload:
-    fio (buffered I/O): +[30, 32]%
-                IOPS         BW
-      5.19-rc1: 2673k        10.2GiB/s
-      patch1-6: 3491k        13.3GiB/s
-
-  Single workload:
-    memcached (anon): -[4, 6]%
-                Ops/sec      KB/sec
-      5.19-rc1: 1161501.04   45177.25
-      patch1-6: 1106168.46   43025.04
-
-  Configurations:
-    CPU: two Xeon 6154
-    Mem: total 256G
-
-    Node 1 was only used as a ram disk to reduce the variance in the
-    results.
-
-    patch drivers/block/brd.c <<EOF
-    99,100c99,100
-    < 	gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
-    < 	page = alloc_page(gfp_flags);
-    ---
-    > 	gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM | __GFP_THISNODE;
-    > 	page = alloc_pages_node(1, gfp_flags, 0);
-    EOF
-
-    cat >>/etc/systemd/system.conf <<EOF
-    CPUAffinity=numa
-    NUMAPolicy=bind
-    NUMAMask=0
-    EOF
-
-    cat >>/etc/memcached.conf <<EOF
-    -m 184320
-    -s /var/run/memcached/memcached.sock
-    -a 0766
-    -t 36
-    -B binary
-    EOF
-
-    cat fio.sh
-    modprobe brd rd_nr=1 rd_size=113246208
-    swapoff -a
-    mkfs.ext4 /dev/ram0
-    mount -t ext4 /dev/ram0 /mnt
-
-    mkdir /sys/fs/cgroup/user.slice/test
-    echo 38654705664 >/sys/fs/cgroup/user.slice/test/memory.max
-    echo $$ >/sys/fs/cgroup/user.slice/test/cgroup.procs
-    fio -name=mglru --numjobs=72 --directory=/mnt --size=1408m \
-      --buffered=1 --ioengine=io_uring --iodepth=128 \
-      --iodepth_batch_submit=32 --iodepth_batch_complete=32 \
-      --rw=randread --random_distribution=random --norandommap \
-      --time_based --ramp_time=10m --runtime=5m --group_reporting
-
-    cat memcached.sh
-    modprobe brd rd_nr=1 rd_size=113246208
-    swapoff -a
-    mkswap /dev/ram0
-    swapon /dev/ram0
-
-    memtier_benchmark -S /var/run/memcached/memcached.sock \
-      -P memcache_binary -n allkeys --key-minimum=1 \
-      --key-maximum=65000000 --key-pattern=P:P -c 1 -t 36 \
-      --ratio 1:0 --pipeline 8 -d 2000
-
-    memtier_benchmark -S /var/run/memcached/memcached.sock \
-      -P memcache_binary -n allkeys --key-minimum=1 \
-      --key-maximum=65000000 --key-pattern=R:R -c 1 -t 36 \
-      --ratio 0:1 --pipeline 8 --randomize --distinct-client-seed
-
-Client benchmark results:
-  kswapd profiles:
-    5.19-rc1
-      40.33%  page_vma_mapped_walk (overhead)
-      21.80%  lzo1x_1_do_compress (real work)
-       7.53%  do_raw_spin_lock
-       3.95%  _raw_spin_unlock_irq
-       2.52%  vma_interval_tree_iter_next
-       2.37%  folio_referenced_one
-       2.28%  vma_interval_tree_subtree_search
-       1.97%  anon_vma_interval_tree_iter_first
-       1.60%  ptep_clear_flush
-       1.06%  __zram_bvec_write
-
-    patch1-6
-      39.03%  lzo1x_1_do_compress (real work)
-      18.47%  page_vma_mapped_walk (overhead)
-       6.74%  _raw_spin_unlock_irq
-       3.97%  do_raw_spin_lock
-       2.49%  ptep_clear_flush
-       2.48%  anon_vma_interval_tree_iter_first
-       1.92%  folio_referenced_one
-       1.88%  __zram_bvec_write
-       1.48%  memmove
-       1.31%  vma_interval_tree_iter_next
-
-  Configurations:
-    CPU: single Snapdragon 7c
-    Mem: total 4G
-
-    Chrome OS MemoryPressure [1]
-
-[1] https://chromium.googlesource.com/chromiumos/platform/tast-tests/
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
----
- include/linux/mm_inline.h         |  36 ++
- include/linux/mmzone.h            |  41 ++
- include/linux/page-flags-layout.h |   5 +-
- kernel/bounds.c                   |   2 +
- mm/Kconfig                        |  11 +
- mm/swap.c                         |  39 ++
- mm/vmscan.c                       | 810 +++++++++++++++++++++++++++++-
- mm/workingset.c                   | 110 +++-
- 8 files changed, 1044 insertions(+), 10 deletions(-)
-
-diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
-index 2ff703900fd0..f2b2296a42f9 100644
---- a/include/linux/mm_inline.h
-+++ b/include/linux/mm_inline.h
-@@ -121,6 +121,33 @@ static inline int lru_gen_from_seq(unsigned long seq)
- 	return seq % MAX_NR_GENS;
- }
- 
-+static inline int lru_hist_from_seq(unsigned long seq)
-+{
-+	return seq % NR_HIST_GENS;
-+}
-+
-+static inline int lru_tier_from_refs(int refs)
-+{
-+	VM_WARN_ON_ONCE(refs > BIT(LRU_REFS_WIDTH));
-+
-+	/* see the comment in folio_lru_refs() */
-+	return order_base_2(refs + 1);
-+}
-+
-+static inline int folio_lru_refs(struct folio *folio)
-+{
-+	unsigned long flags = READ_ONCE(folio->flags);
-+	bool workingset = flags & BIT(PG_workingset);
-+
-+	/*
-+	 * Return the number of accesses beyond PG_referenced, i.e., N-1 if the
-+	 * total number of accesses is N>1, since N=0,1 both map to the first
-+	 * tier. lru_tier_from_refs() will account for this off-by-one. Also see
-+	 * the comment on MAX_NR_TIERS.
-+	 */
-+	return ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + workingset;
-+}
-+
- static inline int folio_lru_gen(struct folio *folio)
- {
- 	unsigned long flags = READ_ONCE(folio->flags);
-@@ -173,6 +200,15 @@ static inline void lru_gen_update_size(struct lruvec *lruvec, struct folio *foli
- 		__update_lru_size(lruvec, lru, zone, -delta);
- 		return;
- 	}
-+
-+	/* promotion */
-+	if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) {
-+		__update_lru_size(lruvec, lru, zone, -delta);
-+		__update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta);
-+	}
-+
-+	/* demotion requires isolation, e.g., lru_deactivate_fn() */
-+	VM_WARN_ON_ONCE(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen));
- }
- 
- static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
-diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
-index c90c2282044e..0d76222501ed 100644
---- a/include/linux/mmzone.h
-+++ b/include/linux/mmzone.h
-@@ -347,6 +347,28 @@ enum lruvec_flags {
- #define MIN_NR_GENS		2U
- #define MAX_NR_GENS		4U
- 
-+/*
-+ * Each generation is divided into multiple tiers. A page accessed N times
-+ * through file descriptors is in tier order_base_2(N). A page in the first tier
-+ * (N=0,1) is marked by PG_referenced unless it was faulted in through page
-+ * tables or read ahead. A page in any other tier (N>1) is marked by
-+ * PG_referenced and PG_workingset. This implies a minimum of two tiers is
-+ * supported without using additional bits in folio->flags.
-+ *
-+ * In contrast to moving across generations which requires the LRU lock, moving
-+ * across tiers only involves atomic operations on folio->flags and therefore
-+ * has a negligible cost in the buffered access path. In the eviction path,
-+ * comparisons of refaulted/(evicted+protected) from the first tier and the
-+ * rest infer whether pages accessed multiple times through file descriptors
-+ * are statistically hot and thus worth protecting.
-+ *
-+ * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice the
-+ * number of categories of the active/inactive LRU when keeping track of
-+ * accesses through file descriptors. This uses MAX_NR_TIERS-2 spare bits in
-+ * folio->flags.
-+ */
-+#define MAX_NR_TIERS		4U
-+
- #ifndef __GENERATING_BOUNDS_H
- 
- struct lruvec;
-@@ -361,6 +383,16 @@ enum {
- 	LRU_GEN_FILE,
- };
- 
-+#define MIN_LRU_BATCH		BITS_PER_LONG
-+#define MAX_LRU_BATCH		(MIN_LRU_BATCH * 128)
-+
-+/* whether to keep historical stats from evicted generations */
-+#ifdef CONFIG_LRU_GEN_STATS
-+#define NR_HIST_GENS		MAX_NR_GENS
-+#else
-+#define NR_HIST_GENS		1U
-+#endif
-+
- /*
-  * The youngest generation number is stored in max_seq for both anon and file
-  * types as they are aged on an equal footing. The oldest generation numbers are
-@@ -383,6 +415,15 @@ struct lru_gen_struct {
- 	struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
- 	/* the multi-gen LRU sizes, eventually consistent */
- 	long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
-+	/* the exponential moving average of refaulted */
-+	unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS];
-+	/* the exponential moving average of evicted+protected */
-+	unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS];
-+	/* the first tier doesn't need protection, hence the minus one */
-+	unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1];
-+	/* can be modified without holding the LRU lock */
-+	atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
-+	atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
- };
- 
- void lru_gen_init_lruvec(struct lruvec *lruvec);
-diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
-index 240905407a18..7d79818dc065 100644
---- a/include/linux/page-flags-layout.h
-+++ b/include/linux/page-flags-layout.h
-@@ -106,7 +106,10 @@
- #error "Not enough bits in page flags"
- #endif
- 
--#define LRU_REFS_WIDTH	0
-+/* see the comment on MAX_NR_TIERS */
-+#define LRU_REFS_WIDTH	min(__LRU_REFS_WIDTH, BITS_PER_LONG - NR_PAGEFLAGS - \
-+			    ZONES_WIDTH - LRU_GEN_WIDTH - SECTIONS_WIDTH - \
-+			    NODES_WIDTH - KASAN_TAG_WIDTH - LAST_CPUPID_WIDTH)
- 
- #endif
- #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
-diff --git a/kernel/bounds.c b/kernel/bounds.c
-index 5ee60777d8e4..b529182e8b04 100644
---- a/kernel/bounds.c
-+++ b/kernel/bounds.c
-@@ -24,8 +24,10 @@ int main(void)
- 	DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t));
- #ifdef CONFIG_LRU_GEN
- 	DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1));
-+	DEFINE(__LRU_REFS_WIDTH, MAX_NR_TIERS - 2);
- #else
- 	DEFINE(LRU_GEN_WIDTH, 0);
-+	DEFINE(__LRU_REFS_WIDTH, 0);
- #endif
- 	/* End of constants */
- 
-diff --git a/mm/Kconfig b/mm/Kconfig
-index cee109f3128a..a93478acf341 100644
---- a/mm/Kconfig
-+++ b/mm/Kconfig
-@@ -1130,6 +1130,7 @@ config PTE_MARKER_UFFD_WP
- 	  purposes.  It is required to enable userfaultfd write protection on
- 	  file-backed memory types like shmem and hugetlbfs.
- 
-+# multi-gen LRU {
- config LRU_GEN
- 	bool "Multi-Gen LRU"
- 	depends on MMU
-@@ -1138,6 +1139,16 @@ config LRU_GEN
- 	help
- 	  A high performance LRU implementation to overcommit memory.
- 
-+config LRU_GEN_STATS
-+	bool "Full stats for debugging"
-+	depends on LRU_GEN
-+	help
-+	  Do not enable this option unless you plan to look at historical stats
-+	  from evicted generations for debugging purpose.
-+
-+	  This option has a per-memcg and per-node memory overhead.
-+# }
-+
- source "mm/damon/Kconfig"
- 
- endmenu
-diff --git a/mm/swap.c b/mm/swap.c
-index b062729b340f..67e7962fbacc 100644
---- a/mm/swap.c
-+++ b/mm/swap.c
-@@ -405,6 +405,40 @@ static void __lru_cache_activate_folio(struct folio *folio)
- 	local_unlock(&lru_pvecs.lock);
- }
- 
-+#ifdef CONFIG_LRU_GEN
-+static void folio_inc_refs(struct folio *folio)
-+{
-+	unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
-+
-+	if (folio_test_unevictable(folio))
-+		return;
-+
-+	if (!folio_test_referenced(folio)) {
-+		folio_set_referenced(folio);
-+		return;
-+	}
-+
-+	if (!folio_test_workingset(folio)) {
-+		folio_set_workingset(folio);
-+		return;
-+	}
-+
-+	/* see the comment on MAX_NR_TIERS */
-+	do {
-+		new_flags = old_flags & LRU_REFS_MASK;
-+		if (new_flags == LRU_REFS_MASK)
-+			break;
-+
-+		new_flags += BIT(LRU_REFS_PGOFF);
-+		new_flags |= old_flags & ~LRU_REFS_MASK;
-+	} while (!try_cmpxchg(&folio->flags, &old_flags, new_flags));
-+}
-+#else
-+static void folio_inc_refs(struct folio *folio)
-+{
-+}
-+#endif /* CONFIG_LRU_GEN */
-+
- /*
-  * Mark a page as having seen activity.
-  *
-@@ -417,6 +451,11 @@ static void __lru_cache_activate_folio(struct folio *folio)
-  */
- void folio_mark_accessed(struct folio *folio)
- {
-+	if (lru_gen_enabled()) {
-+		folio_inc_refs(folio);
-+		return;
-+	}
-+
- 	if (!folio_test_referenced(folio)) {
- 		folio_set_referenced(folio);
- 	} else if (folio_test_unevictable(folio)) {
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 1fcc0feed985..f768d61e7b85 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -1273,9 +1273,11 @@ static int __remove_mapping(struct address_space *mapping, struct folio *folio,
- 
- 	if (folio_test_swapcache(folio)) {
- 		swp_entry_t swap = folio_swap_entry(folio);
--		mem_cgroup_swapout(folio, swap);
-+
-+		/* get a shadow entry before mem_cgroup_swapout() clears folio_memcg() */
- 		if (reclaimed && !mapping_exiting(mapping))
- 			shadow = workingset_eviction(folio, target_memcg);
-+		mem_cgroup_swapout(folio, swap);
- 		__delete_from_swap_cache(&folio->page, swap, shadow);
- 		xa_unlock_irq(&mapping->i_pages);
- 		put_swap_page(&folio->page, swap);
-@@ -2675,6 +2677,9 @@ static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
- 	unsigned long file;
- 	struct lruvec *target_lruvec;
- 
-+	if (lru_gen_enabled())
-+		return;
-+
- 	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
- 
- 	/*
-@@ -2998,6 +3003,17 @@ static bool can_age_anon_pages(struct pglist_data *pgdat,
-  *                          shorthand helpers
-  ******************************************************************************/
- 
-+#define LRU_REFS_FLAGS	(BIT(PG_referenced) | BIT(PG_workingset))
-+
-+#define DEFINE_MAX_SEQ(lruvec)						\
-+	unsigned long max_seq = READ_ONCE((lruvec)->lrugen.max_seq)
-+
-+#define DEFINE_MIN_SEQ(lruvec)						\
-+	unsigned long min_seq[ANON_AND_FILE] = {			\
-+		READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_ANON]),	\
-+		READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_FILE]),	\
-+	}
-+
- #define for_each_gen_type_zone(gen, type, zone)				\
- 	for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++)			\
- 		for ((type) = 0; (type) < ANON_AND_FILE; (type)++)	\
-@@ -3023,6 +3039,764 @@ static struct lruvec __maybe_unused *get_lruvec(struct mem_cgroup *memcg, int ni
- 	return pgdat ? &pgdat->__lruvec : NULL;
- }
- 
-+static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
-+{
-+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-+	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
-+
-+	if (!can_demote(pgdat->node_id, sc) &&
-+	    mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH)
-+		return 0;
-+
-+	return mem_cgroup_swappiness(memcg);
-+}
-+
-+static int get_nr_gens(struct lruvec *lruvec, int type)
-+{
-+	return lruvec->lrugen.max_seq - lruvec->lrugen.min_seq[type] + 1;
-+}
-+
-+static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
-+{
-+	/* see the comment on lru_gen_struct */
-+	return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS &&
-+	       get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) &&
-+	       get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
-+}
-+
-+/******************************************************************************
-+ *                          refault feedback loop
-+ ******************************************************************************/
-+
-+/*
-+ * A feedback loop based on Proportional-Integral-Derivative (PID) controller.
-+ *
-+ * The P term is refaulted/(evicted+protected) from a tier in the generation
-+ * currently being evicted; the I term is the exponential moving average of the
-+ * P term over the generations previously evicted, using the smoothing factor
-+ * 1/2; the D term isn't supported.
-+ *
-+ * The setpoint (SP) is always the first tier of one type; the process variable
-+ * (PV) is either any tier of the other type or any other tier of the same
-+ * type.
-+ *
-+ * The error is the difference between the SP and the PV; the correction is to
-+ * turn off protection when SP>PV or turn on protection when SP<PV.
-+ *
-+ * For future optimizations:
-+ * 1. The D term may discount the other two terms over time so that long-lived
-+ *    generations can resist stale information.
-+ */
-+struct ctrl_pos {
-+	unsigned long refaulted;
-+	unsigned long total;
-+	int gain;
-+};
-+
-+static void read_ctrl_pos(struct lruvec *lruvec, int type, int tier, int gain,
-+			  struct ctrl_pos *pos)
-+{
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+	int hist = lru_hist_from_seq(lrugen->min_seq[type]);
-+
-+	pos->refaulted = lrugen->avg_refaulted[type][tier] +
-+			 atomic_long_read(&lrugen->refaulted[hist][type][tier]);
-+	pos->total = lrugen->avg_total[type][tier] +
-+		     atomic_long_read(&lrugen->evicted[hist][type][tier]);
-+	if (tier)
-+		pos->total += lrugen->protected[hist][type][tier - 1];
-+	pos->gain = gain;
-+}
-+
-+static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover)
-+{
-+	int hist, tier;
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+	bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1;
-+	unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1;
-+
-+	lockdep_assert_held(&lruvec->lru_lock);
-+
-+	if (!carryover && !clear)
-+		return;
-+
-+	hist = lru_hist_from_seq(seq);
-+
-+	for (tier = 0; tier < MAX_NR_TIERS; tier++) {
-+		if (carryover) {
-+			unsigned long sum;
-+
-+			sum = lrugen->avg_refaulted[type][tier] +
-+			      atomic_long_read(&lrugen->refaulted[hist][type][tier]);
-+			WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2);
-+
-+			sum = lrugen->avg_total[type][tier] +
-+			      atomic_long_read(&lrugen->evicted[hist][type][tier]);
-+			if (tier)
-+				sum += lrugen->protected[hist][type][tier - 1];
-+			WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2);
-+		}
-+
-+		if (clear) {
-+			atomic_long_set(&lrugen->refaulted[hist][type][tier], 0);
-+			atomic_long_set(&lrugen->evicted[hist][type][tier], 0);
-+			if (tier)
-+				WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0);
-+		}
-+	}
-+}
-+
-+static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv)
-+{
-+	/*
-+	 * Return true if the PV has a limited number of refaults or a lower
-+	 * refaulted/total than the SP.
-+	 */
-+	return pv->refaulted < MIN_LRU_BATCH ||
-+	       pv->refaulted * (sp->total + MIN_LRU_BATCH) * sp->gain <=
-+	       (sp->refaulted + 1) * pv->total * pv->gain;
-+}
-+
-+/******************************************************************************
-+ *                          the aging
-+ ******************************************************************************/
-+
-+/* protect pages accessed multiple times through file descriptors */
-+static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
-+{
-+	int type = folio_is_file_lru(folio);
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+	int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
-+	unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
-+
-+	VM_WARN_ON_ONCE_FOLIO(!(old_flags & LRU_GEN_MASK), folio);
-+
-+	do {
-+		new_gen = (old_gen + 1) % MAX_NR_GENS;
-+
-+		new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
-+		new_flags |= (new_gen + 1UL) << LRU_GEN_PGOFF;
-+		/* for folio_end_writeback() */
-+		if (reclaiming)
-+			new_flags |= BIT(PG_reclaim);
-+	} while (!try_cmpxchg(&folio->flags, &old_flags, new_flags));
-+
-+	lru_gen_update_size(lruvec, folio, old_gen, new_gen);
-+
-+	return new_gen;
-+}
-+
-+static void inc_min_seq(struct lruvec *lruvec, int type)
-+{
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+
-+	reset_ctrl_pos(lruvec, type, true);
-+	WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1);
-+}
-+
-+static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
-+{
-+	int gen, type, zone;
-+	bool success = false;
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+	DEFINE_MIN_SEQ(lruvec);
-+
-+	VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
-+
-+	/* find the oldest populated generation */
-+	for (type = !can_swap; type < ANON_AND_FILE; type++) {
-+		while (min_seq[type] + MIN_NR_GENS <= lrugen->max_seq) {
-+			gen = lru_gen_from_seq(min_seq[type]);
-+
-+			for (zone = 0; zone < MAX_NR_ZONES; zone++) {
-+				if (!list_empty(&lrugen->lists[gen][type][zone]))
-+					goto next;
-+			}
-+
-+			min_seq[type]++;
-+		}
-+next:
-+		;
-+	}
-+
-+	/* see the comment on lru_gen_struct */
-+	if (can_swap) {
-+		min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]);
-+		min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]);
-+	}
-+
-+	for (type = !can_swap; type < ANON_AND_FILE; type++) {
-+		if (min_seq[type] == lrugen->min_seq[type])
-+			continue;
-+
-+		reset_ctrl_pos(lruvec, type, true);
-+		WRITE_ONCE(lrugen->min_seq[type], min_seq[type]);
-+		success = true;
-+	}
-+
-+	return success;
-+}
-+
-+static void inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, bool can_swap)
-+{
-+	int prev, next;
-+	int type, zone;
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+
-+	spin_lock_irq(&lruvec->lru_lock);
-+
-+	VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
-+
-+	if (max_seq != lrugen->max_seq)
-+		goto unlock;
-+
-+	for (type = 0; type < ANON_AND_FILE; type++) {
-+		if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
-+			continue;
-+
-+		VM_WARN_ON_ONCE(type == LRU_GEN_FILE || can_swap);
-+
-+		inc_min_seq(lruvec, type);
-+	}
-+
-+	/*
-+	 * Update the active/inactive LRU sizes for compatibility. Both sides of
-+	 * the current max_seq need to be covered, since max_seq+1 can overlap
-+	 * with min_seq[LRU_GEN_ANON] if swapping is constrained. And if they do
-+	 * overlap, cold/hot inversion happens.
-+	 */
-+	prev = lru_gen_from_seq(lrugen->max_seq - 1);
-+	next = lru_gen_from_seq(lrugen->max_seq + 1);
-+
-+	for (type = 0; type < ANON_AND_FILE; type++) {
-+		for (zone = 0; zone < MAX_NR_ZONES; zone++) {
-+			enum lru_list lru = type * LRU_INACTIVE_FILE;
-+			long delta = lrugen->nr_pages[prev][type][zone] -
-+				     lrugen->nr_pages[next][type][zone];
-+
-+			if (!delta)
-+				continue;
-+
-+			__update_lru_size(lruvec, lru, zone, delta);
-+			__update_lru_size(lruvec, lru + LRU_ACTIVE, zone, -delta);
-+		}
-+	}
-+
-+	for (type = 0; type < ANON_AND_FILE; type++)
-+		reset_ctrl_pos(lruvec, type, false);
-+
-+	/* make sure preceding modifications appear */
-+	smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
-+unlock:
-+	spin_unlock_irq(&lruvec->lru_lock);
-+}
-+
-+static unsigned long get_nr_evictable(struct lruvec *lruvec, unsigned long max_seq,
-+				      unsigned long *min_seq, bool can_swap, bool *need_aging)
-+{
-+	int gen, type, zone;
-+	unsigned long old = 0;
-+	unsigned long young = 0;
-+	unsigned long total = 0;
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+
-+	for (type = !can_swap; type < ANON_AND_FILE; type++) {
-+		unsigned long seq;
-+
-+		for (seq = min_seq[type]; seq <= max_seq; seq++) {
-+			unsigned long size = 0;
-+
-+			gen = lru_gen_from_seq(seq);
-+
-+			for (zone = 0; zone < MAX_NR_ZONES; zone++)
-+				size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
-+
-+			total += size;
-+			if (seq == max_seq)
-+				young += size;
-+			if (seq + MIN_NR_GENS == max_seq)
-+				old += size;
-+		}
-+	}
-+
-+	/*
-+	 * The aging tries to be lazy to reduce the overhead. On the other hand,
-+	 * the eviction stalls when the number of generations reaches
-+	 * MIN_NR_GENS. So ideally, there should be MIN_NR_GENS+1 generations,
-+	 * hence the first two if's.
-+	 *
-+	 * Also it's ideal to spread pages out evenly, meaning 1/(MIN_NR_GENS+1)
-+	 * of the total number of pages for each generation. A reasonable range
-+	 * for this average portion is [1/MIN_NR_GENS, 1/(MIN_NR_GENS+2)]. The
-+	 * eviction cares about the lower bound of cold pages, whereas the aging
-+	 * cares about the upper bound of hot pages.
-+	 */
-+	if (min_seq[!can_swap] + MIN_NR_GENS > max_seq)
-+		*need_aging = true;
-+	else if (min_seq[!can_swap] + MIN_NR_GENS < max_seq)
-+		*need_aging = false;
-+	else if (young * MIN_NR_GENS > total)
-+		*need_aging = true;
-+	else if (old * (MIN_NR_GENS + 2) < total)
-+		*need_aging = true;
-+	else
-+		*need_aging = false;
-+
-+	return total;
-+}
-+
-+static void age_lruvec(struct lruvec *lruvec, struct scan_control *sc)
-+{
-+	bool need_aging;
-+	unsigned long nr_to_scan;
-+	int swappiness = get_swappiness(lruvec, sc);
-+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-+	DEFINE_MAX_SEQ(lruvec);
-+	DEFINE_MIN_SEQ(lruvec);
-+
-+	VM_WARN_ON_ONCE(sc->memcg_low_reclaim);
-+
-+	mem_cgroup_calculate_protection(NULL, memcg);
-+
-+	if (mem_cgroup_below_min(memcg))
-+		return;
-+
-+	nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, swappiness, &need_aging);
-+	if (!nr_to_scan)
-+		return;
-+
-+	nr_to_scan >>= mem_cgroup_online(memcg) ? sc->priority : 0;
-+
-+	if (nr_to_scan && need_aging)
-+		inc_max_seq(lruvec, max_seq, swappiness);
-+}
-+
-+static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
-+{
-+	struct mem_cgroup *memcg;
-+
-+	VM_WARN_ON_ONCE(!current_is_kswapd());
-+
-+	memcg = mem_cgroup_iter(NULL, NULL, NULL);
-+	do {
-+		struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
-+
-+		age_lruvec(lruvec, sc);
-+
-+		cond_resched();
-+	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
-+}
-+
-+/******************************************************************************
-+ *                          the eviction
-+ ******************************************************************************/
-+
-+static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx)
-+{
-+	bool success;
-+	int gen = folio_lru_gen(folio);
-+	int type = folio_is_file_lru(folio);
-+	int zone = folio_zonenum(folio);
-+	int delta = folio_nr_pages(folio);
-+	int refs = folio_lru_refs(folio);
-+	int tier = lru_tier_from_refs(refs);
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+
-+	VM_WARN_ON_ONCE_FOLIO(gen >= MAX_NR_GENS, folio);
-+
-+	/* unevictable */
-+	if (!folio_evictable(folio)) {
-+		success = lru_gen_del_folio(lruvec, folio, true);
-+		VM_WARN_ON_ONCE_FOLIO(!success, folio);
-+		folio_set_unevictable(folio);
-+		lruvec_add_folio(lruvec, folio);
-+		__count_vm_events(UNEVICTABLE_PGCULLED, delta);
-+		return true;
-+	}
-+
-+	/* dirty lazyfree */
-+	if (type == LRU_GEN_FILE && folio_test_anon(folio) && folio_test_dirty(folio)) {
-+		success = lru_gen_del_folio(lruvec, folio, true);
-+		VM_WARN_ON_ONCE_FOLIO(!success, folio);
-+		folio_set_swapbacked(folio);
-+		lruvec_add_folio_tail(lruvec, folio);
-+		return true;
-+	}
-+
-+	/* protected */
-+	if (tier > tier_idx) {
-+		int hist = lru_hist_from_seq(lrugen->min_seq[type]);
-+
-+		gen = folio_inc_gen(lruvec, folio, false);
-+		list_move_tail(&folio->lru, &lrugen->lists[gen][type][zone]);
-+
-+		WRITE_ONCE(lrugen->protected[hist][type][tier - 1],
-+			   lrugen->protected[hist][type][tier - 1] + delta);
-+		__mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta);
-+		return true;
-+	}
-+
-+	/* waiting for writeback */
-+	if (folio_test_locked(folio) || folio_test_writeback(folio) ||
-+	    (type == LRU_GEN_FILE && folio_test_dirty(folio))) {
-+		gen = folio_inc_gen(lruvec, folio, true);
-+		list_move(&folio->lru, &lrugen->lists[gen][type][zone]);
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
-+static bool isolate_folio(struct lruvec *lruvec, struct folio *folio, struct scan_control *sc)
-+{
-+	bool success;
-+
-+	/* unmapping inhibited */
-+	if (!sc->may_unmap && folio_mapped(folio))
-+		return false;
-+
-+	/* swapping inhibited */
-+	if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
-+	    (folio_test_dirty(folio) ||
-+	     (folio_test_anon(folio) && !folio_test_swapcache(folio))))
-+		return false;
-+
-+	/* raced with release_pages() */
-+	if (!folio_try_get(folio))
-+		return false;
-+
-+	/* raced with another isolation */
-+	if (!folio_test_clear_lru(folio)) {
-+		folio_put(folio);
-+		return false;
-+	}
-+
-+	/* see the comment on MAX_NR_TIERS */
-+	if (!folio_test_referenced(folio))
-+		set_mask_bits(&folio->flags, LRU_REFS_MASK | LRU_REFS_FLAGS, 0);
-+
-+	/* for shrink_page_list() */
-+	folio_clear_reclaim(folio);
-+	folio_clear_referenced(folio);
-+
-+	success = lru_gen_del_folio(lruvec, folio, true);
-+	VM_WARN_ON_ONCE_FOLIO(!success, folio);
-+
-+	return true;
-+}
-+
-+static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
-+		       int type, int tier, struct list_head *list)
-+{
-+	int gen, zone;
-+	enum vm_event_item item;
-+	int sorted = 0;
-+	int scanned = 0;
-+	int isolated = 0;
-+	int remaining = MAX_LRU_BATCH;
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-+
-+	VM_WARN_ON_ONCE(!list_empty(list));
-+
-+	if (get_nr_gens(lruvec, type) == MIN_NR_GENS)
-+		return 0;
-+
-+	gen = lru_gen_from_seq(lrugen->min_seq[type]);
-+
-+	for (zone = sc->reclaim_idx; zone >= 0; zone--) {
-+		LIST_HEAD(moved);
-+		int skipped = 0;
-+		struct list_head *head = &lrugen->lists[gen][type][zone];
-+
-+		while (!list_empty(head)) {
-+			struct folio *folio = lru_to_folio(head);
-+			int delta = folio_nr_pages(folio);
-+
-+			VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
-+			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
-+			VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
-+			VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
-+
-+			scanned += delta;
-+
-+			if (sort_folio(lruvec, folio, tier))
-+				sorted += delta;
-+			else if (isolate_folio(lruvec, folio, sc)) {
-+				list_add(&folio->lru, list);
-+				isolated += delta;
-+			} else {
-+				list_move(&folio->lru, &moved);
-+				skipped += delta;
-+			}
-+
-+			if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH)
-+				break;
-+		}
-+
-+		if (skipped) {
-+			list_splice(&moved, head);
-+			__count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
-+		}
-+
-+		if (!remaining || isolated >= MIN_LRU_BATCH)
-+			break;
-+	}
-+
-+	item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
-+	if (!cgroup_reclaim(sc)) {
-+		__count_vm_events(item, isolated);
-+		__count_vm_events(PGREFILL, sorted);
-+	}
-+	__count_memcg_events(memcg, item, isolated);
-+	__count_memcg_events(memcg, PGREFILL, sorted);
-+	__count_vm_events(PGSCAN_ANON + type, isolated);
-+
-+	/*
-+	 * There might not be eligible pages due to reclaim_idx, may_unmap and
-+	 * may_writepage. Check the remaining to prevent livelock if it's not
-+	 * making progress.
-+	 */
-+	return isolated || !remaining ? scanned : 0;
-+}
-+
-+static int get_tier_idx(struct lruvec *lruvec, int type)
-+{
-+	int tier;
-+	struct ctrl_pos sp, pv;
-+
-+	/*
-+	 * To leave a margin for fluctuations, use a larger gain factor (1:2).
-+	 * This value is chosen because any other tier would have at least twice
-+	 * as many refaults as the first tier.
-+	 */
-+	read_ctrl_pos(lruvec, type, 0, 1, &sp);
-+	for (tier = 1; tier < MAX_NR_TIERS; tier++) {
-+		read_ctrl_pos(lruvec, type, tier, 2, &pv);
-+		if (!positive_ctrl_err(&sp, &pv))
-+			break;
-+	}
-+
-+	return tier - 1;
-+}
-+
-+static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_idx)
-+{
-+	int type, tier;
-+	struct ctrl_pos sp, pv;
-+	int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness };
-+
-+	/*
-+	 * Compare the first tier of anon with that of file to determine which
-+	 * type to scan. Also need to compare other tiers of the selected type
-+	 * with the first tier of the other type to determine the last tier (of
-+	 * the selected type) to evict.
-+	 */
-+	read_ctrl_pos(lruvec, LRU_GEN_ANON, 0, gain[LRU_GEN_ANON], &sp);
-+	read_ctrl_pos(lruvec, LRU_GEN_FILE, 0, gain[LRU_GEN_FILE], &pv);
-+	type = positive_ctrl_err(&sp, &pv);
-+
-+	read_ctrl_pos(lruvec, !type, 0, gain[!type], &sp);
-+	for (tier = 1; tier < MAX_NR_TIERS; tier++) {
-+		read_ctrl_pos(lruvec, type, tier, gain[type], &pv);
-+		if (!positive_ctrl_err(&sp, &pv))
-+			break;
-+	}
-+
-+	*tier_idx = tier - 1;
-+
-+	return type;
-+}
-+
-+static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
-+			  int *type_scanned, struct list_head *list)
-+{
-+	int i;
-+	int type;
-+	int scanned;
-+	int tier = -1;
-+	DEFINE_MIN_SEQ(lruvec);
-+
-+	/*
-+	 * Try to make the obvious choice first. When anon and file are both
-+	 * available from the same generation, interpret swappiness 1 as file
-+	 * first and 200 as anon first.
-+	 */
-+	if (!swappiness)
-+		type = LRU_GEN_FILE;
-+	else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE])
-+		type = LRU_GEN_ANON;
-+	else if (swappiness == 1)
-+		type = LRU_GEN_FILE;
-+	else if (swappiness == 200)
-+		type = LRU_GEN_ANON;
-+	else
-+		type = get_type_to_scan(lruvec, swappiness, &tier);
-+
-+	for (i = !swappiness; i < ANON_AND_FILE; i++) {
-+		if (tier < 0)
-+			tier = get_tier_idx(lruvec, type);
-+
-+		scanned = scan_folios(lruvec, sc, type, tier, list);
-+		if (scanned)
-+			break;
-+
-+		type = !type;
-+		tier = -1;
-+	}
-+
-+	*type_scanned = type;
-+
-+	return scanned;
-+}
-+
-+static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
-+{
-+	int type;
-+	int scanned;
-+	int reclaimed;
-+	LIST_HEAD(list);
-+	struct folio *folio;
-+	enum vm_event_item item;
-+	struct reclaim_stat stat;
-+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-+	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
-+
-+	spin_lock_irq(&lruvec->lru_lock);
-+
-+	scanned = isolate_folios(lruvec, sc, swappiness, &type, &list);
-+
-+	scanned += try_to_inc_min_seq(lruvec, swappiness);
-+
-+	if (get_nr_gens(lruvec, !swappiness) == MIN_NR_GENS)
-+		scanned = 0;
-+
-+	spin_unlock_irq(&lruvec->lru_lock);
-+
-+	if (list_empty(&list))
-+		return scanned;
-+
-+	reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false);
-+
-+	list_for_each_entry(folio, &list, lru) {
-+		/* restore LRU_REFS_FLAGS cleared by isolate_folio() */
-+		if (folio_test_workingset(folio))
-+			folio_set_referenced(folio);
-+
-+		/* don't add rejected pages to the oldest generation */
-+		if (folio_test_reclaim(folio) &&
-+		    (folio_test_dirty(folio) || folio_test_writeback(folio)))
-+			folio_clear_active(folio);
-+		else
-+			folio_set_active(folio);
-+	}
-+
-+	spin_lock_irq(&lruvec->lru_lock);
-+
-+	move_pages_to_lru(lruvec, &list);
-+
-+	item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
-+	if (!cgroup_reclaim(sc))
-+		__count_vm_events(item, reclaimed);
-+	__count_memcg_events(memcg, item, reclaimed);
-+	__count_vm_events(PGSTEAL_ANON + type, reclaimed);
-+
-+	spin_unlock_irq(&lruvec->lru_lock);
-+
-+	mem_cgroup_uncharge_list(&list);
-+	free_unref_page_list(&list);
-+
-+	sc->nr_reclaimed += reclaimed;
-+
-+	return scanned;
-+}
-+
-+static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
-+				    bool can_swap, unsigned long reclaimed)
-+{
-+	int priority;
-+	bool need_aging;
-+	unsigned long nr_to_scan;
-+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-+	DEFINE_MAX_SEQ(lruvec);
-+	DEFINE_MIN_SEQ(lruvec);
-+
-+	if (mem_cgroup_below_min(memcg) ||
-+	    (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
-+		return 0;
-+
-+	nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, can_swap, &need_aging);
-+	if (!nr_to_scan)
-+		return 0;
-+
-+	/* adjust priority if memcg is offline or the target is met */
-+	if (!mem_cgroup_online(memcg))
-+		priority = 0;
-+	else if (sc->nr_reclaimed - reclaimed >= sc->nr_to_reclaim)
-+		priority = DEF_PRIORITY;
-+	else
-+		priority = sc->priority;
-+
-+	nr_to_scan >>= priority;
-+	if (!nr_to_scan)
-+		return 0;
-+
-+	if (!need_aging)
-+		return nr_to_scan;
-+
-+	/* skip the aging path at the default priority */
-+	if (priority == DEF_PRIORITY)
-+		goto done;
-+
-+	/* leave the work to lru_gen_age_node() */
-+	if (current_is_kswapd())
-+		return 0;
-+
-+	inc_max_seq(lruvec, max_seq, can_swap);
-+done:
-+	return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
-+}
-+
-+static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
-+{
-+	struct blk_plug plug;
-+	unsigned long scanned = 0;
-+	unsigned long reclaimed = sc->nr_reclaimed;
-+
-+	lru_add_drain();
-+
-+	blk_start_plug(&plug);
-+
-+	while (true) {
-+		int delta;
-+		int swappiness;
-+		unsigned long nr_to_scan;
-+
-+		if (sc->may_swap)
-+			swappiness = get_swappiness(lruvec, sc);
-+		else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc))
-+			swappiness = 1;
-+		else
-+			swappiness = 0;
-+
-+		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, reclaimed);
-+		if (!nr_to_scan)
-+			break;
-+
-+		delta = evict_folios(lruvec, sc, swappiness);
-+		if (!delta)
-+			break;
-+
-+		scanned += delta;
-+		if (scanned >= nr_to_scan)
-+			break;
-+
-+		cond_resched();
-+	}
-+
-+	blk_finish_plug(&plug);
-+}
-+
- /******************************************************************************
-  *                          initialization
-  ******************************************************************************/
-@@ -3065,6 +3839,16 @@ static int __init init_lru_gen(void)
- };
- late_initcall(init_lru_gen);
- 
-+#else /* !CONFIG_LRU_GEN */
-+
-+static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
-+{
-+}
-+
-+static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
-+{
-+}
-+
- #endif /* CONFIG_LRU_GEN */
- 
- static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
-@@ -3078,6 +3862,11 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
- 	struct blk_plug plug;
- 	bool scan_adjusted;
- 
-+	if (lru_gen_enabled()) {
-+		lru_gen_shrink_lruvec(lruvec, sc);
-+		return;
-+	}
-+
- 	get_scan_count(lruvec, sc, nr);
- 
- 	/* Record the original scan target for proportional adjustments later */
-@@ -3582,6 +4371,9 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat)
- 	struct lruvec *target_lruvec;
- 	unsigned long refaults;
- 
-+	if (lru_gen_enabled())
-+		return;
-+
- 	target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
- 	refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON);
- 	target_lruvec->refaults[0] = refaults;
-@@ -3946,12 +4738,17 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
- }
- #endif
- 
--static void age_active_anon(struct pglist_data *pgdat,
-+static void kswapd_age_node(struct pglist_data *pgdat,
- 				struct scan_control *sc)
- {
- 	struct mem_cgroup *memcg;
- 	struct lruvec *lruvec;
- 
-+	if (lru_gen_enabled()) {
-+		lru_gen_age_node(pgdat, sc);
-+		return;
-+	}
-+
- 	if (!can_age_anon_pages(pgdat, sc))
- 		return;
- 
-@@ -4271,12 +5068,11 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
- 		sc.may_swap = !nr_boost_reclaim;
- 
- 		/*
--		 * Do some background aging of the anon list, to give
--		 * pages a chance to be referenced before reclaiming. All
--		 * pages are rotated regardless of classzone as this is
--		 * about consistent aging.
-+		 * Do some background aging, to give pages a chance to be
-+		 * referenced before reclaiming. All pages are rotated
-+		 * regardless of classzone as this is about consistent aging.
- 		 */
--		age_active_anon(pgdat, &sc);
-+		kswapd_age_node(pgdat, &sc);
- 
- 		/*
- 		 * If we're getting trouble reclaiming, start doing writepage
-diff --git a/mm/workingset.c b/mm/workingset.c
-index 592569a8974c..84a9e0ab04ad 100644
---- a/mm/workingset.c
-+++ b/mm/workingset.c
-@@ -187,7 +187,6 @@ static unsigned int bucket_order __read_mostly;
- static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction,
- 			 bool workingset)
- {
--	eviction >>= bucket_order;
- 	eviction &= EVICTION_MASK;
- 	eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
- 	eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
-@@ -212,10 +211,107 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
- 
- 	*memcgidp = memcgid;
- 	*pgdat = NODE_DATA(nid);
--	*evictionp = entry << bucket_order;
-+	*evictionp = entry;
- 	*workingsetp = workingset;
- }
- 
-+#ifdef CONFIG_LRU_GEN
-+
-+static void *lru_gen_eviction(struct folio *folio)
-+{
-+	int hist;
-+	unsigned long token;
-+	unsigned long min_seq;
-+	struct lruvec *lruvec;
-+	struct lru_gen_struct *lrugen;
-+	int type = folio_is_file_lru(folio);
-+	int delta = folio_nr_pages(folio);
-+	int refs = folio_lru_refs(folio);
-+	int tier = lru_tier_from_refs(refs);
-+	struct mem_cgroup *memcg = folio_memcg(folio);
-+	struct pglist_data *pgdat = folio_pgdat(folio);
-+
-+	BUILD_BUG_ON(LRU_GEN_WIDTH + LRU_REFS_WIDTH > BITS_PER_LONG - EVICTION_SHIFT);
-+
-+	lruvec = mem_cgroup_lruvec(memcg, pgdat);
-+	lrugen = &lruvec->lrugen;
-+	min_seq = READ_ONCE(lrugen->min_seq[type]);
-+	token = (min_seq << LRU_REFS_WIDTH) | max(refs - 1, 0);
-+
-+	hist = lru_hist_from_seq(min_seq);
-+	atomic_long_add(delta, &lrugen->evicted[hist][type][tier]);
-+
-+	return pack_shadow(mem_cgroup_id(memcg), pgdat, token, refs);
-+}
-+
-+static void lru_gen_refault(struct folio *folio, void *shadow)
-+{
-+	int hist, tier, refs;
-+	int memcg_id;
-+	bool workingset;
-+	unsigned long token;
-+	unsigned long min_seq;
-+	struct lruvec *lruvec;
-+	struct lru_gen_struct *lrugen;
-+	struct mem_cgroup *memcg;
-+	struct pglist_data *pgdat;
-+	int type = folio_is_file_lru(folio);
-+	int delta = folio_nr_pages(folio);
-+
-+	unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset);
-+
-+	if (pgdat != folio_pgdat(folio))
-+		return;
-+
-+	rcu_read_lock();
-+
-+	memcg = folio_memcg_rcu(folio);
-+	if (memcg_id != mem_cgroup_id(memcg))
-+		goto unlock;
-+
-+	lruvec = mem_cgroup_lruvec(memcg, pgdat);
-+	lrugen = &lruvec->lrugen;
-+
-+	min_seq = READ_ONCE(lrugen->min_seq[type]);
-+	if ((token >> LRU_REFS_WIDTH) != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH)))
-+		goto unlock;
-+
-+	hist = lru_hist_from_seq(min_seq);
-+	/* see the comment in folio_lru_refs() */
-+	refs = (token & (BIT(LRU_REFS_WIDTH) - 1)) + workingset;
-+	tier = lru_tier_from_refs(refs);
-+
-+	atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]);
-+	mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + type, delta);
-+
-+	/*
-+	 * Count the following two cases as stalls:
-+	 * 1. For pages accessed through page tables, hotter pages pushed out
-+	 *    hot pages which refaulted immediately.
-+	 * 2. For pages accessed multiple times through file descriptors,
-+	 *    numbers of accesses might have been out of the range.
-+	 */
-+	if (lru_gen_in_fault() || refs == BIT(LRU_REFS_WIDTH)) {
-+		folio_set_workingset(folio);
-+		mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta);
-+	}
-+unlock:
-+	rcu_read_unlock();
-+}
-+
-+#else /* !CONFIG_LRU_GEN */
-+
-+static void *lru_gen_eviction(struct folio *folio)
-+{
-+	return NULL;
-+}
-+
-+static void lru_gen_refault(struct folio *folio, void *shadow)
-+{
-+}
-+
-+#endif /* CONFIG_LRU_GEN */
-+
- /**
-  * workingset_age_nonresident - age non-resident entries as LRU ages
-  * @lruvec: the lruvec that was aged
-@@ -264,10 +360,14 @@ void *workingset_eviction(struct folio *folio, struct mem_cgroup *target_memcg)
- 	VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
- 	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
- 
-+	if (lru_gen_enabled())
-+		return lru_gen_eviction(folio);
-+
- 	lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
- 	/* XXX: target_memcg can be NULL, go through lruvec */
- 	memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
- 	eviction = atomic_long_read(&lruvec->nonresident_age);
-+	eviction >>= bucket_order;
- 	workingset_age_nonresident(lruvec, folio_nr_pages(folio));
- 	return pack_shadow(memcgid, pgdat, eviction,
- 				folio_test_workingset(folio));
-@@ -298,7 +398,13 @@ void workingset_refault(struct folio *folio, void *shadow)
- 	int memcgid;
- 	long nr;
- 
-+	if (lru_gen_enabled()) {
-+		lru_gen_refault(folio, shadow);
-+		return;
-+	}
-+
- 	unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
-+	eviction <<= bucket_order;
- 
- 	rcu_read_lock();
- 	/*
-
-From patchwork Wed Jul  6 22:00:16 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908705
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id 14A6EC433EF
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:01:08 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id B6A108E0003; Wed,  6 Jul 2022 18:01:04 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id AA0BB8E0001; Wed,  6 Jul 2022 18:01:04 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id 91B3F8E0003; Wed,  6 Jul 2022 18:01:04 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0015.hostedemail.com
- [216.40.44.15])
-	by kanga.kvack.org (Postfix) with ESMTP id 7CF7B8E0001
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:01:04 -0400 (EDT)
-Received: from smtpin10.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay01.hostedemail.com (Postfix) with ESMTP id 3A6B760CA7
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:04 +0000 (UTC)
-X-FDA: 79658046048.10.E729B6A
-Received: from mail-io1-f73.google.com (mail-io1-f73.google.com
- [209.85.166.73])
-	by imf12.hostedemail.com (Postfix) with ESMTP id 654DC40027
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:02 +0000 (UTC)
-Received: by mail-io1-f73.google.com with SMTP id
- k1-20020a5d8741000000b00678ad1103e7so2775981iol.21
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:01:02 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=XKxfVy7bUFxJEpcf3Ov7jSHxjMmYVb5gb0hRKtMPzZA=;
-        b=KBFBaieE6U899pZedfVW186dkzrS93jrjdIku8VfT3EELG4tmwSu4pbA8t8KgFrkX7
-         PNLYIcjPRwCltpKZ41cpDa72lID2PMQjd0C5UzA1EP6Fozv39FS8efLoCNj0H5GROfCg
-         QMeGKWc2c6xuBh73e/hz1kG0ddQk8uDEqQzdd1hwg6GKOeAe0e98I4co7JiaxOzZQyVa
-         H3rcYT5ECNNWjJIqW6rJYkUeALUPQkQ6SiSCuxFVIHVt/LqtAYlBu4IfaEL80m1SvJmZ
-         XAzuOW4B/+BDlzSPXhxDXR3iWNFF0evXZaEn2Xyp6i9pgpXVhqsXUcbDrh/yv+aznbGp
-         vfZw==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=XKxfVy7bUFxJEpcf3Ov7jSHxjMmYVb5gb0hRKtMPzZA=;
-        b=wEDdfX1Ca1wRAghCjDtN/218Ar7YzHk1SswV9PWlO2rWdVGpEgzcJ4I72IScKmI1ak
-         byWww5GGZKkp2et7daVUaRaYUyNsN+JvzNTS4ZkA1+KUJp4sBdk6TL4F/+sKxhpfeXRW
-         4O7rUvWmL/AcEhzhNNzOrk8NgMwLZJxEbmWumi3GZaaNwvSzvi/ZnQ4iy7QczcGNuoOD
-         vHTTjgYxcTl+FO55iQfNa8RXZ2EK68p8Q3s6TnE+enNd0MlFosYB8z+tz3T9tsSSAk7D
-         L5g2KMDag6shNFwrkU67N/AO/rmf0tvQvfgZZtmfM+fNoppSNpfEruaUg6sf+al7cZaq
-         Y9Jg==
-X-Gm-Message-State: AJIora9J02U2PbT6ikgYVNH5lZ4bF/dPn/RAZ8KsVxnRgQbrFGcLQrwU
-	eoZuUGfIebQ11JrsGluVoFQjXZwzJak=
-X-Google-Smtp-Source: 
- AGRyM1sRwdKObor4YUQFmQ9ta0XlmIxSt4ZUo1xITqK6sTS54jUHQ2ZrB8LtbtvMYYrpomN4w49bZKlSJRk=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a05:6e02:148c:b0:2dc:38ae:5c6a with SMTP id
- n12-20020a056e02148c00b002dc38ae5c6amr2363805ilk.115.1657144861728; Wed, 06
- Jul 2022 15:01:01 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:16 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-8-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 07/14] mm: multi-gen LRU: exploit locality in rmap
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Barry Song <baohua@kernel.org>, Brian Geffon <bgeffon@google.com>,
-  Jan Alexander Steffens <heftig@archlinux.org>,
- Oleksandr Natalenko <oleksandr@natalenko.name>,
-  Steven Barrett <steven@liquorix.net>,
- Suleiman Souhlal <suleiman@google.com>,  Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657144862;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=XKxfVy7bUFxJEpcf3Ov7jSHxjMmYVb5gb0hRKtMPzZA=;
-	b=78gzoYQUEpZ/3nhPL81S9IoTS+tamEn/8D7ioIFwlboSYOhcwIufnOyPh57lBQoFdANuof
-	SnLww4J7TveiCJa5kFHPwj8xzXM0ANKbJmf4o4cLIVitPhVH7z6V5EFfj457OWAKTjIo6b
-	NZ86RpRkjWLByNbszbZPPLUZQi27u4U=
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657144862; a=rsa-sha256;
-	cv=none;
-	b=gbyvyPJZ1QVIBcx+YUE+JKq+Cj69MF+XU4E+AoEjDiVevGW0fLXZdcgIYKhIeTQ4VReTbP
-	TYy+UJJ7mp48jYOJ43EUlODLaxeez62GyJ6+OwE5GLOHlgIg1MIlrrlMmwrW3B3t4byGOx
-	58gkmlSjFRcnFO6DMq3ACgJdURFm7Fo=
-ARC-Authentication-Results: i=1;
-	imf12.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=KBFBaieE;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf12.hostedemail.com: domain of
- 3HQbGYgYKCF4UQVD6KCKKCHA.8KIHEJQT-IIGR68G.KNC@flex--yuzhao.bounces.google.com
- designates 209.85.166.73 as permitted sender)
- smtp.mailfrom=3HQbGYgYKCF4UQVD6KCKKCHA.8KIHEJQT-IIGR68G.KNC@flex--yuzhao.bounces.google.com
-X-Rspam-User: 
-Authentication-Results: imf12.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=KBFBaieE;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf12.hostedemail.com: domain of
- 3HQbGYgYKCF4UQVD6KCKKCHA.8KIHEJQT-IIGR68G.KNC@flex--yuzhao.bounces.google.com
- designates 209.85.166.73 as permitted sender)
- smtp.mailfrom=3HQbGYgYKCF4UQVD6KCKKCHA.8KIHEJQT-IIGR68G.KNC@flex--yuzhao.bounces.google.com
-X-Rspamd-Server: rspam06
-X-Rspamd-Queue-Id: 654DC40027
-X-Stat-Signature: upc9kmwzt4t6z55coguwsrw11efy9iir
-X-HE-Tag: 1657144862-661235
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-Searching the rmap for PTEs mapping each page on an LRU list (to test
-and clear the accessed bit) can be expensive because pages from
-different VMAs (PA space) are not cache friendly to the rmap (VA
-space). For workloads mostly using mapped pages, searching the rmap
-can incur the highest CPU cost in the reclaim path.
-
-This patch exploits spatial locality to reduce the trips into the
-rmap. When shrink_page_list() walks the rmap and finds a young PTE, a
-new function lru_gen_look_around() scans at most BITS_PER_LONG-1
-adjacent PTEs. On finding another young PTE, it clears the accessed
-bit and updates the gen counter of the page mapped by this PTE to
-(max_seq%MAX_NR_GENS)+1.
-
-Server benchmark results:
-  Single workload:
-    fio (buffered I/O): no change
-
-  Single workload:
-    memcached (anon): +[3, 5]%
-                Ops/sec      KB/sec
-      patch1-6: 1106168.46   43025.04
-      patch1-7: 1147696.57   44640.29
-
-  Configurations:
-    no change
-
-Client benchmark results:
-  kswapd profiles:
-    patch1-6
-      39.03%  lzo1x_1_do_compress (real work)
-      18.47%  page_vma_mapped_walk (overhead)
-       6.74%  _raw_spin_unlock_irq
-       3.97%  do_raw_spin_lock
-       2.49%  ptep_clear_flush
-       2.48%  anon_vma_interval_tree_iter_first
-       1.92%  folio_referenced_one
-       1.88%  __zram_bvec_write
-       1.48%  memmove
-       1.31%  vma_interval_tree_iter_next
-
-    patch1-7
-      48.16%  lzo1x_1_do_compress (real work)
-       8.20%  page_vma_mapped_walk (overhead)
-       7.06%  _raw_spin_unlock_irq
-       2.92%  ptep_clear_flush
-       2.53%  __zram_bvec_write
-       2.11%  do_raw_spin_lock
-       2.02%  memmove
-       1.93%  lru_gen_look_around
-       1.56%  free_unref_page_list
-       1.40%  memset
-
-  Configurations:
-    no change
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Acked-by: Barry Song <baohua@kernel.org>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
----
- include/linux/memcontrol.h |  31 +++++++
- include/linux/mm.h         |   5 +
- include/linux/mmzone.h     |   6 ++
- mm/internal.h              |   1 +
- mm/memcontrol.c            |   1 +
- mm/rmap.c                  |   6 ++
- mm/swap.c                  |   4 +-
- mm/vmscan.c                | 184 +++++++++++++++++++++++++++++++++++++
- 8 files changed, 236 insertions(+), 2 deletions(-)
-
-diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
-index 9ecead1042b9..9d0fea17f9ef 100644
---- a/include/linux/memcontrol.h
-+++ b/include/linux/memcontrol.h
-@@ -444,6 +444,7 @@ static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
-  * - LRU isolation
-  * - lock_page_memcg()
-  * - exclusive reference
-+ * - mem_cgroup_trylock_pages()
-  *
-  * For a kmem folio a caller should hold an rcu read lock to protect memcg
-  * associated with a kmem folio from being released.
-@@ -505,6 +506,7 @@ static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
-  * - LRU isolation
-  * - lock_page_memcg()
-  * - exclusive reference
-+ * - mem_cgroup_trylock_pages()
-  *
-  * For a kmem page a caller should hold an rcu read lock to protect memcg
-  * associated with a kmem page from being released.
-@@ -950,6 +952,23 @@ void unlock_page_memcg(struct page *page);
- 
- void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val);
- 
-+/* try to stablize folio_memcg() for all the pages in a memcg */
-+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
-+{
-+	rcu_read_lock();
-+
-+	if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account))
-+		return true;
-+
-+	rcu_read_unlock();
-+	return false;
-+}
-+
-+static inline void mem_cgroup_unlock_pages(void)
-+{
-+	rcu_read_unlock();
-+}
-+
- /* idx can be of type enum memcg_stat_item or node_stat_item */
- static inline void mod_memcg_state(struct mem_cgroup *memcg,
- 				   int idx, int val)
-@@ -1401,6 +1420,18 @@ static inline void folio_memcg_unlock(struct folio *folio)
- {
- }
- 
-+static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg)
-+{
-+	/* to match folio_memcg_rcu() */
-+	rcu_read_lock();
-+	return true;
-+}
-+
-+static inline void mem_cgroup_unlock_pages(void)
-+{
-+	rcu_read_unlock();
-+}
-+
- static inline void mem_cgroup_handle_over_high(void)
- {
- }
-diff --git a/include/linux/mm.h b/include/linux/mm.h
-index ed5393e5930d..981b2e447936 100644
---- a/include/linux/mm.h
-+++ b/include/linux/mm.h
-@@ -1523,6 +1523,11 @@ static inline unsigned long folio_pfn(struct folio *folio)
- 	return page_to_pfn(&folio->page);
- }
- 
-+static inline struct folio *pfn_folio(unsigned long pfn)
-+{
-+	return page_folio(pfn_to_page(pfn));
-+}
-+
- static inline atomic_t *folio_pincount_ptr(struct folio *folio)
- {
- 	return &folio_page(folio, 1)->compound_pincount;
-diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
-index 0d76222501ed..4fd7fc16eeb4 100644
---- a/include/linux/mmzone.h
-+++ b/include/linux/mmzone.h
-@@ -372,6 +372,7 @@ enum lruvec_flags {
- #ifndef __GENERATING_BOUNDS_H
- 
- struct lruvec;
-+struct page_vma_mapped_walk;
- 
- #define LRU_GEN_MASK		((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
- #define LRU_REFS_MASK		((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF)
-@@ -427,6 +428,7 @@ struct lru_gen_struct {
- };
- 
- void lru_gen_init_lruvec(struct lruvec *lruvec);
-+void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
- 
- #ifdef CONFIG_MEMCG
- void lru_gen_init_memcg(struct mem_cgroup *memcg);
-@@ -439,6 +441,10 @@ static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
- {
- }
- 
-+static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
-+{
-+}
-+
- #ifdef CONFIG_MEMCG
- static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
- {
-diff --git a/mm/internal.h b/mm/internal.h
-index c0f8fbe0445b..3d070582052e 100644
---- a/mm/internal.h
-+++ b/mm/internal.h
-@@ -83,6 +83,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf);
- void folio_rotate_reclaimable(struct folio *folio);
- bool __folio_end_writeback(struct folio *folio);
- void deactivate_file_folio(struct folio *folio);
-+void folio_activate(struct folio *folio);
- 
- void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
- 		unsigned long floor, unsigned long ceiling);
-diff --git a/mm/memcontrol.c b/mm/memcontrol.c
-index 7d58e8a73ece..743f8513f1c3 100644
---- a/mm/memcontrol.c
-+++ b/mm/memcontrol.c
-@@ -2777,6 +2777,7 @@ static void commit_charge(struct folio *folio, struct mem_cgroup *memcg)
- 	 * - LRU isolation
- 	 * - lock_page_memcg()
- 	 * - exclusive reference
-+	 * - mem_cgroup_trylock_pages()
- 	 */
- 	folio->memcg_data = (unsigned long)memcg;
- }
-diff --git a/mm/rmap.c b/mm/rmap.c
-index 5bcb334cd6f2..dce1a56b02f8 100644
---- a/mm/rmap.c
-+++ b/mm/rmap.c
-@@ -830,6 +830,12 @@ static bool folio_referenced_one(struct folio *folio,
- 		}
- 
- 		if (pvmw.pte) {
-+			if (lru_gen_enabled() && pte_young(*pvmw.pte) &&
-+			    !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
-+				lru_gen_look_around(&pvmw);
-+				referenced++;
-+			}
-+
- 			if (ptep_clear_flush_young_notify(vma, address,
- 						pvmw.pte)) {
- 				/*
-diff --git a/mm/swap.c b/mm/swap.c
-index 67e7962fbacc..131fc76242a3 100644
---- a/mm/swap.c
-+++ b/mm/swap.c
-@@ -342,7 +342,7 @@ static bool need_activate_page_drain(int cpu)
- 	return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0;
- }
- 
--static void folio_activate(struct folio *folio)
-+void folio_activate(struct folio *folio)
- {
- 	if (folio_test_lru(folio) && !folio_test_active(folio) &&
- 	    !folio_test_unevictable(folio)) {
-@@ -362,7 +362,7 @@ static inline void activate_page_drain(int cpu)
- {
- }
- 
--static void folio_activate(struct folio *folio)
-+void folio_activate(struct folio *folio)
- {
- 	struct lruvec *lruvec;
- 
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index f768d61e7b85..ec786fc556a7 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -1574,6 +1574,11 @@ static unsigned int shrink_page_list(struct list_head *page_list,
- 		if (!sc->may_unmap && folio_mapped(folio))
- 			goto keep_locked;
- 
-+		/* folio_update_gen() tried to promote this page? */
-+		if (lru_gen_enabled() && !ignore_references &&
-+		    folio_mapped(folio) && folio_test_referenced(folio))
-+			goto keep_locked;
-+
- 		/*
- 		 * The number of dirty pages determines if a node is marked
- 		 * reclaim_congested. kswapd will stall and start writing
-@@ -3161,6 +3166,29 @@ static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv)
-  *                          the aging
-  ******************************************************************************/
- 
-+/* promote pages accessed through page tables */
-+static int folio_update_gen(struct folio *folio, int gen)
-+{
-+	unsigned long new_flags, old_flags = READ_ONCE(folio->flags);
-+
-+	VM_WARN_ON_ONCE(gen >= MAX_NR_GENS);
-+	VM_WARN_ON_ONCE(!rcu_read_lock_held());
-+
-+	do {
-+		/* lru_gen_del_folio() has isolated this page? */
-+		if (!(old_flags & LRU_GEN_MASK)) {
-+			/* for shrink_page_list() */
-+			new_flags = old_flags | BIT(PG_referenced);
-+			continue;
-+		}
-+
-+		new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
-+		new_flags |= (gen + 1UL) << LRU_GEN_PGOFF;
-+	} while (!try_cmpxchg(&folio->flags, &old_flags, new_flags));
-+
-+	return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
-+}
-+
- /* protect pages accessed multiple times through file descriptors */
- static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclaiming)
- {
-@@ -3172,6 +3200,11 @@ static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclai
- 	VM_WARN_ON_ONCE_FOLIO(!(old_flags & LRU_GEN_MASK), folio);
- 
- 	do {
-+		new_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
-+		/* folio_update_gen() has promoted this page? */
-+		if (new_gen >= 0 && new_gen != old_gen)
-+			return new_gen;
-+
- 		new_gen = (old_gen + 1) % MAX_NR_GENS;
- 
- 		new_flags = old_flags & ~(LRU_GEN_MASK | LRU_REFS_MASK | LRU_REFS_FLAGS);
-@@ -3186,6 +3219,43 @@ static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclai
- 	return new_gen;
- }
- 
-+static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
-+{
-+	unsigned long pfn = pte_pfn(pte);
-+
-+	VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end);
-+
-+	if (!pte_present(pte) || is_zero_pfn(pfn))
-+		return -1;
-+
-+	if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
-+		return -1;
-+
-+	if (WARN_ON_ONCE(!pfn_valid(pfn)))
-+		return -1;
-+
-+	return pfn;
-+}
-+
-+static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
-+				   struct pglist_data *pgdat)
-+{
-+	struct folio *folio;
-+
-+	/* try to avoid unnecessary memory loads */
-+	if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
-+		return NULL;
-+
-+	folio = pfn_folio(pfn);
-+	if (folio_nid(folio) != pgdat->node_id)
-+		return NULL;
-+
-+	if (folio_memcg_rcu(folio) != memcg)
-+		return NULL;
-+
-+	return folio;
-+}
-+
- static void inc_min_seq(struct lruvec *lruvec, int type)
- {
- 	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-@@ -3387,6 +3457,114 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
- 	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
- }
- 
-+/*
-+ * This function exploits spatial locality when shrink_page_list() walks the
-+ * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages.
-+ */
-+void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
-+{
-+	int i;
-+	pte_t *pte;
-+	unsigned long start;
-+	unsigned long end;
-+	unsigned long addr;
-+	unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
-+	struct folio *folio = pfn_folio(pvmw->pfn);
-+	struct mem_cgroup *memcg = folio_memcg(folio);
-+	struct pglist_data *pgdat = folio_pgdat(folio);
-+	struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
-+	DEFINE_MAX_SEQ(lruvec);
-+	int old_gen, new_gen = lru_gen_from_seq(max_seq);
-+
-+	lockdep_assert_held(pvmw->ptl);
-+	VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio);
-+
-+	if (spin_is_contended(pvmw->ptl))
-+		return;
-+
-+	start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start);
-+	end = min(pvmw->address | ~PMD_MASK, pvmw->vma->vm_end - 1) + 1;
-+
-+	if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
-+		if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
-+			end = start + MIN_LRU_BATCH * PAGE_SIZE;
-+		else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2)
-+			start = end - MIN_LRU_BATCH * PAGE_SIZE;
-+		else {
-+			start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2;
-+			end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2;
-+		}
-+	}
-+
-+	pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE;
-+
-+	rcu_read_lock();
-+	arch_enter_lazy_mmu_mode();
-+
-+	for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) {
-+		unsigned long pfn;
-+
-+		pfn = get_pte_pfn(pte[i], pvmw->vma, addr);
-+		if (pfn == -1)
-+			continue;
-+
-+		if (!pte_young(pte[i]))
-+			continue;
-+
-+		folio = get_pfn_folio(pfn, memcg, pgdat);
-+		if (!folio)
-+			continue;
-+
-+		if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i))
-+			continue;
-+
-+		if (pte_dirty(pte[i]) && !folio_test_dirty(folio) &&
-+		    !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
-+		      !folio_test_swapcache(folio)))
-+			folio_mark_dirty(folio);
-+
-+		old_gen = folio_lru_gen(folio);
-+		if (old_gen < 0)
-+			folio_set_referenced(folio);
-+		else if (old_gen != new_gen)
-+			__set_bit(i, bitmap);
-+	}
-+
-+	arch_leave_lazy_mmu_mode();
-+	rcu_read_unlock();
-+
-+	if (bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) {
-+		for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
-+			folio = pfn_folio(pte_pfn(pte[i]));
-+			folio_activate(folio);
-+		}
-+		return;
-+	}
-+
-+	/* folio_update_gen() requires stable folio_memcg() */
-+	if (!mem_cgroup_trylock_pages(memcg))
-+		return;
-+
-+	spin_lock_irq(&lruvec->lru_lock);
-+	new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq);
-+
-+	for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
-+		folio = pfn_folio(pte_pfn(pte[i]));
-+		if (folio_memcg_rcu(folio) != memcg)
-+			continue;
-+
-+		old_gen = folio_update_gen(folio, new_gen);
-+		if (old_gen < 0 || old_gen == new_gen)
-+			continue;
-+
-+		lru_gen_update_size(lruvec, folio, old_gen, new_gen);
-+	}
-+
-+	spin_unlock_irq(&lruvec->lru_lock);
-+
-+	mem_cgroup_unlock_pages();
-+}
-+
- /******************************************************************************
-  *                          the eviction
-  ******************************************************************************/
-@@ -3423,6 +3601,12 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, int tier_idx)
- 		return true;
- 	}
- 
-+	/* promoted */
-+	if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
-+		list_move(&folio->lru, &lrugen->lists[gen][type][zone]);
-+		return true;
-+	}
-+
- 	/* protected */
- 	if (tier > tier_idx) {
- 		int hist = lru_hist_from_seq(lrugen->min_seq[type]);
-
-From patchwork Wed Jul  6 22:00:17 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908709
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id 2F4F3C433EF
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:01:21 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id B67CF8E0008; Wed,  6 Jul 2022 18:01:11 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id A53EA8E0001; Wed,  6 Jul 2022 18:01:11 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id 7BC5B8E0008; Wed,  6 Jul 2022 18:01:11 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0011.hostedemail.com
- [216.40.44.11])
-	by kanga.kvack.org (Postfix) with ESMTP id 613D28E0001
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:01:11 -0400 (EDT)
-Received: from smtpin31.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay02.hostedemail.com (Postfix) with ESMTP id 3AABF33A6A
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:11 +0000 (UTC)
-X-FDA: 79658046342.31.25FB448
-Received: from mail-yw1-f202.google.com (mail-yw1-f202.google.com
- [209.85.128.202])
-	by imf01.hostedemail.com (Postfix) with ESMTP id E1CB840019
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:04 +0000 (UTC)
-Received: by mail-yw1-f202.google.com with SMTP id
- 00721157ae682-31814f7654dso116292467b3.15
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:01:04 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=QRwOHdNvCJsdEWcZ8PiNBBmz8P6BxE21GfKaWd62Hcs=;
-        b=B0sFmQhPneIOAV1YVS6vP1oEsRe/BTwVflV6UdX0rzJbZE3r0MadNNURWxHDBukL1I
-         ELnHPWwed0WOAIrL8nftaw9ahABsSVQtJZPifYycg6l36RW7IRVZKE/FLzqQbao5lQVp
-         2lyTvVaA0fwTYrrOAkppMHFJS9NhtOwiPWkN8qczgGMF/wfTpEMLT1c3gwH7x0wTp0CA
-         bmGxHDwTUBtMQvnhn6ZHsn3tW2Mue+sW/jt4FZPTcsu1wgfJSmRfIgRB/FRZTem/MRn1
-         s04RGx0yhTSGEtt8gc/smm1CW1G6xElKiEo1r8zVeztvFvFMntvooTqGlsQvsu1rVxNL
-         nxIA==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=QRwOHdNvCJsdEWcZ8PiNBBmz8P6BxE21GfKaWd62Hcs=;
-        b=c4bdMkLhxyvEnutnBA5XUiYftxDhTV6M0oPnBxTEWM2+ScjFG2RdUzrhfOZMxurWrf
-         sKZIm+7oIW+QIFcYwXv79hTW6tBrlW/YZfAQk5To3Rg3HYz9y6EONeJBeRq7VD3s7cDK
-         yCD5V7/rn1zPfpa7e5FCEQ3uaAAzJpmXH0yzMlJovObkLUf5/2H61vCu5Ss3s0nyUzu5
-         PmBA7cbVlJg8w2iHFXSYVvkQw+nwkfZPYiZf6a7C4b5cBaaqSjFwp9R1Dj4Dmt/hyfqL
-         9aSikv3Dqd00tRhmEqz7CFDN0nFe0RCoyF/1imT4h/wLfpY/PfqAblpzKLs1DVaRiIpK
-         0dcg==
-X-Gm-Message-State: AJIora/sVJDwRZqeywVvAnGpxiHdOH6QHZPRRxUF3BgVCxqUwhhWX/Sv
-	qRTmED85dcbhYCcgcB7NXYcxpzqLgic=
-X-Google-Smtp-Source: 
- AGRyM1uf52B2LApundNg2J5h3sPxkMm3CEPkOlcVUbZUCRfPLLNdJaJY9XTnmessJ0elI3BxSTXeuJSaFn0=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a81:2f4c:0:b0:31c:2bee:dfa4 with SMTP id
- v73-20020a812f4c000000b0031c2beedfa4mr47320138ywv.483.1657144863343; Wed, 06
- Jul 2022 15:01:03 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:17 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-9-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 08/14] mm: multi-gen LRU: support page table walks
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Brian Geffon <bgeffon@google.com>,
- Jan Alexander Steffens <heftig@archlinux.org>,
-  Oleksandr Natalenko <oleksandr@natalenko.name>,
- Steven Barrett <steven@liquorix.net>,
-  Suleiman Souhlal <suleiman@google.com>, Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657144870;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=QRwOHdNvCJsdEWcZ8PiNBBmz8P6BxE21GfKaWd62Hcs=;
-	b=yTdhrGd2Yn7SlvL67mHmk0coJxZY8xT17lW/ewc4fNMOsnnVS1sKnIvZPTnTcn0Fe+dccs
-	i2sOOxXkGXEhgV1hMozofaMLxhLPzFCWAEqHzOEcXyOK4AUM8ZYrXZOlIFqaID1et19+VY
-	9DG+lIYPEo08J5Ku8PkMzTbLZN1d/1w=
-ARC-Authentication-Results: i=1;
-	imf01.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=B0sFmQhP;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf01.hostedemail.com: domain of
- 3HwbGYgYKCGAWSXF8MEMMEJC.AMKJGLSV-KKIT8AI.MPE@flex--yuzhao.bounces.google.com
- designates 209.85.128.202 as permitted sender)
- smtp.mailfrom=3HwbGYgYKCGAWSXF8MEMMEJC.AMKJGLSV-KKIT8AI.MPE@flex--yuzhao.bounces.google.com
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657144870; a=rsa-sha256;
-	cv=none;
-	b=AqNzfeMgehbGAF0NaBoToCygtio3p/CKcEQ2XvVEAyt3GUO/NWzgGf3L1H/PYlFzCPzE7Z
-	USY6Zs44Owz7ybSkwmXNxexJwWitplxX8dRNKKzWXbZkJ3+tWRbprGyp/NLFp9NFcFhMkz
-	orvBVvz7eGVPFZ1+kb859dx9H/Ub2G4=
-X-Rspam-User: 
-X-Rspamd-Server: rspam07
-Authentication-Results: imf01.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=B0sFmQhP;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf01.hostedemail.com: domain of
- 3HwbGYgYKCGAWSXF8MEMMEJC.AMKJGLSV-KKIT8AI.MPE@flex--yuzhao.bounces.google.com
- designates 209.85.128.202 as permitted sender)
- smtp.mailfrom=3HwbGYgYKCGAWSXF8MEMMEJC.AMKJGLSV-KKIT8AI.MPE@flex--yuzhao.bounces.google.com
-X-Stat-Signature: z89omp4mfbgn9jqrf7gixf63n1ypp6j5
-X-Rspamd-Queue-Id: E1CB840019
-X-HE-Tag: 1657144864-40541
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-To further exploit spatial locality, the aging prefers to walk page
-tables to search for young PTEs and promote hot pages. A kill switch
-will be added in the next patch to disable this behavior. When
-disabled, the aging relies on the rmap only.
-
-NB: this behavior has nothing similar with the page table scanning in
-the 2.4 kernel [1], which searches page tables for old PTEs, adds cold
-pages to swapcache and unmaps them.
-
-To avoid confusion, the term "iteration" specifically means the
-traversal of an entire mm_struct list; the term "walk" will be applied
-to page tables and the rmap, as usual.
-
-An mm_struct list is maintained for each memcg, and an mm_struct
-follows its owner task to the new memcg when this task is migrated.
-Given an lruvec, the aging iterates lruvec_memcg()->mm_list and calls
-walk_page_range() with each mm_struct on this list to promote hot
-pages before it increments max_seq.
-
-When multiple page table walkers iterate the same list, each of them
-gets a unique mm_struct; therefore they can run concurrently. Page
-table walkers ignore any misplaced pages, e.g., if an mm_struct was
-migrated, pages it left in the previous memcg will not be promoted
-when its current memcg is under reclaim. Similarly, page table walkers
-will not promote pages from nodes other than the one under reclaim.
-
-This patch uses the following optimizations when walking page tables:
-1. It tracks the usage of mm_struct's between context switches so that
-   page table walkers can skip processes that have been sleeping since
-   the last iteration.
-2. It uses generational Bloom filters to record populated branches so
-   that page table walkers can reduce their search space based on the
-   query results, e.g., to skip page tables containing mostly holes or
-   misplaced pages.
-3. It takes advantage of the accessed bit in non-leaf PMD entries when
-   CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y.
-4. It does not zigzag between a PGD table and the same PMD table
-   spanning multiple VMAs. IOW, it finishes all the VMAs within the
-   range of the same PMD table before it returns to a PGD table. This
-   improves the cache performance for workloads that have large
-   numbers of tiny VMAs [2], especially when CONFIG_PGTABLE_LEVELS=5.
-
-Server benchmark results:
-  Single workload:
-    fio (buffered I/O): no change
-
-  Single workload:
-    memcached (anon): +[8, 10]%
-                Ops/sec      KB/sec
-      patch1-7: 1147696.57   44640.29
-      patch1-8: 1245274.91   48435.66
-
-  Configurations:
-    no change
-
-Client benchmark results:
-  kswapd profiles:
-    patch1-7
-      48.16%  lzo1x_1_do_compress (real work)
-       8.20%  page_vma_mapped_walk (overhead)
-       7.06%  _raw_spin_unlock_irq
-       2.92%  ptep_clear_flush
-       2.53%  __zram_bvec_write
-       2.11%  do_raw_spin_lock
-       2.02%  memmove
-       1.93%  lru_gen_look_around
-       1.56%  free_unref_page_list
-       1.40%  memset
-
-    patch1-8
-      49.44%  lzo1x_1_do_compress (real work)
-       6.19%  page_vma_mapped_walk (overhead)
-       5.97%  _raw_spin_unlock_irq
-       3.13%  get_pfn_folio
-       2.85%  ptep_clear_flush
-       2.42%  __zram_bvec_write
-       2.08%  do_raw_spin_lock
-       1.92%  memmove
-       1.44%  alloc_zspage
-       1.36%  memset
-
-  Configurations:
-    no change
-
-Thanks to the following developers for their efforts [3].
-  kernel test robot <lkp@intel.com>
-
-[1] https://lwn.net/Articles/23732/
-[2] https://llvm.org/docs/ScudoHardenedAllocator.html
-[3] https://lore.kernel.org/r/202204160827.ekEARWQo-lkp@intel.com/
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
----
- fs/exec.c                  |    2 +
- include/linux/memcontrol.h |    5 +
- include/linux/mm_types.h   |   77 +++
- include/linux/mmzone.h     |   56 +-
- include/linux/swap.h       |    4 +
- kernel/exit.c              |    1 +
- kernel/fork.c              |    9 +
- kernel/sched/core.c        |    1 +
- mm/memcontrol.c            |   25 +
- mm/vmscan.c                | 1000 +++++++++++++++++++++++++++++++++++-
- 10 files changed, 1163 insertions(+), 17 deletions(-)
-
-diff --git a/fs/exec.c b/fs/exec.c
-index 0989fb8472a1..b1fda634e01a 100644
---- a/fs/exec.c
-+++ b/fs/exec.c
-@@ -1015,6 +1015,7 @@ static int exec_mmap(struct mm_struct *mm)
- 	active_mm = tsk->active_mm;
- 	tsk->active_mm = mm;
- 	tsk->mm = mm;
-+	lru_gen_add_mm(mm);
- 	/*
- 	 * This prevents preemption while active_mm is being loaded and
- 	 * it and mm are being updated, which could cause problems for
-@@ -1030,6 +1031,7 @@ static int exec_mmap(struct mm_struct *mm)
- 	tsk->mm->vmacache_seqnum = 0;
- 	vmacache_flush(tsk);
- 	task_unlock(tsk);
-+	lru_gen_use_mm(mm);
- 	if (old_mm) {
- 		mmap_read_unlock(old_mm);
- 		BUG_ON(active_mm != old_mm);
-diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
-index 9d0fea17f9ef..eca62345fdd5 100644
---- a/include/linux/memcontrol.h
-+++ b/include/linux/memcontrol.h
-@@ -350,6 +350,11 @@ struct mem_cgroup {
- 	struct deferred_split deferred_split_queue;
- #endif
- 
-+#ifdef CONFIG_LRU_GEN
-+	/* per-memcg mm_struct list */
-+	struct lru_gen_mm_list mm_list;
-+#endif
-+
- 	struct mem_cgroup_per_node *nodeinfo[];
- };
- 
-diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
-index c29ab4c0cd5c..7db51151a28b 100644
---- a/include/linux/mm_types.h
-+++ b/include/linux/mm_types.h
-@@ -3,6 +3,7 @@
- #define _LINUX_MM_TYPES_H
- 
- #include <linux/mm_types_task.h>
-+#include <linux/sched.h>
- 
- #include <linux/auxvec.h>
- #include <linux/kref.h>
-@@ -17,6 +18,7 @@
- #include <linux/page-flags-layout.h>
- #include <linux/workqueue.h>
- #include <linux/seqlock.h>
-+#include <linux/mmdebug.h>
- 
- #include <asm/mmu.h>
- 
-@@ -667,6 +669,22 @@ struct mm_struct {
- 		 */
- 		unsigned long ksm_merging_pages;
- #endif
-+#ifdef CONFIG_LRU_GEN
-+		struct {
-+			/* this mm_struct is on lru_gen_mm_list */
-+			struct list_head list;
-+			/*
-+			 * Set when switching to this mm_struct, as a hint of
-+			 * whether it has been used since the last time per-node
-+			 * page table walkers cleared the corresponding bits.
-+			 */
-+			unsigned long bitmap;
-+#ifdef CONFIG_MEMCG
-+			/* points to the memcg of "owner" above */
-+			struct mem_cgroup *memcg;
-+#endif
-+		} lru_gen;
-+#endif /* CONFIG_LRU_GEN */
- 	} __randomize_layout;
- 
- 	/*
-@@ -693,6 +711,65 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
- 	return (struct cpumask *)&mm->cpu_bitmap;
- }
- 
-+#ifdef CONFIG_LRU_GEN
-+
-+struct lru_gen_mm_list {
-+	/* mm_struct list for page table walkers */
-+	struct list_head fifo;
-+	/* protects the list above */
-+	spinlock_t lock;
-+};
-+
-+void lru_gen_add_mm(struct mm_struct *mm);
-+void lru_gen_del_mm(struct mm_struct *mm);
-+#ifdef CONFIG_MEMCG
-+void lru_gen_migrate_mm(struct mm_struct *mm);
-+#endif
-+
-+static inline void lru_gen_init_mm(struct mm_struct *mm)
-+{
-+	INIT_LIST_HEAD(&mm->lru_gen.list);
-+	mm->lru_gen.bitmap = 0;
-+#ifdef CONFIG_MEMCG
-+	mm->lru_gen.memcg = NULL;
-+#endif
-+}
-+
-+static inline void lru_gen_use_mm(struct mm_struct *mm)
-+{
-+	/* unlikely but not a bug when racing with lru_gen_migrate_mm() */
-+	VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list));
-+
-+	if (!(current->flags & PF_KTHREAD))
-+		WRITE_ONCE(mm->lru_gen.bitmap, -1);
-+}
-+
-+#else /* !CONFIG_LRU_GEN */
-+
-+static inline void lru_gen_add_mm(struct mm_struct *mm)
-+{
-+}
-+
-+static inline void lru_gen_del_mm(struct mm_struct *mm)
-+{
-+}
-+
-+#ifdef CONFIG_MEMCG
-+static inline void lru_gen_migrate_mm(struct mm_struct *mm)
-+{
-+}
-+#endif
-+
-+static inline void lru_gen_init_mm(struct mm_struct *mm)
-+{
-+}
-+
-+static inline void lru_gen_use_mm(struct mm_struct *mm)
-+{
-+}
-+
-+#endif /* CONFIG_LRU_GEN */
-+
- struct mmu_gather;
- extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
- extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
-diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
-index 4fd7fc16eeb4..0cf0856b484a 100644
---- a/include/linux/mmzone.h
-+++ b/include/linux/mmzone.h
-@@ -405,7 +405,7 @@ enum {
-  * min_seq behind.
-  *
-  * The number of pages in each generation is eventually consistent and therefore
-- * can be transiently negative.
-+ * can be transiently negative when reset_batch_size() is pending.
-  */
- struct lru_gen_struct {
- 	/* the aging increments the youngest generation number */
-@@ -427,6 +427,53 @@ struct lru_gen_struct {
- 	atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
- };
- 
-+enum {
-+	MM_LEAF_TOTAL,		/* total leaf entries */
-+	MM_LEAF_OLD,		/* old leaf entries */
-+	MM_LEAF_YOUNG,		/* young leaf entries */
-+	MM_NONLEAF_TOTAL,	/* total non-leaf entries */
-+	MM_NONLEAF_FOUND,	/* non-leaf entries found in Bloom filters */
-+	MM_NONLEAF_ADDED,	/* non-leaf entries added to Bloom filters */
-+	NR_MM_STATS
-+};
-+
-+/* double-buffering Bloom filters */
-+#define NR_BLOOM_FILTERS	2
-+
-+struct lru_gen_mm_state {
-+	/* set to max_seq after each iteration */
-+	unsigned long seq;
-+	/* where the current iteration continues (inclusive) */
-+	struct list_head *head;
-+	/* where the last iteration ended (exclusive) */
-+	struct list_head *tail;
-+	/* to wait for the last page table walker to finish */
-+	struct wait_queue_head wait;
-+	/* Bloom filters flip after each iteration */
-+	unsigned long *filters[NR_BLOOM_FILTERS];
-+	/* the mm stats for debugging */
-+	unsigned long stats[NR_HIST_GENS][NR_MM_STATS];
-+	/* the number of concurrent page table walkers */
-+	int nr_walkers;
-+};
-+
-+struct lru_gen_mm_walk {
-+	/* the lruvec under reclaim */
-+	struct lruvec *lruvec;
-+	/* unstable max_seq from lru_gen_struct */
-+	unsigned long max_seq;
-+	/* the next address within an mm to scan */
-+	unsigned long next_addr;
-+	/* to batch promoted pages */
-+	int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
-+	/* to batch the mm stats */
-+	int mm_stats[NR_MM_STATS];
-+	/* total batched items */
-+	int batched;
-+	bool can_swap;
-+	bool force_scan;
-+};
-+
- void lru_gen_init_lruvec(struct lruvec *lruvec);
- void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
- 
-@@ -477,6 +524,8 @@ struct lruvec {
- #ifdef CONFIG_LRU_GEN
- 	/* evictable pages divided into generations */
- 	struct lru_gen_struct		lrugen;
-+	/* to concurrently iterate lru_gen_mm_list */
-+	struct lru_gen_mm_state		mm_state;
- #endif
- #ifdef CONFIG_MEMCG
- 	struct pglist_data *pgdat;
-@@ -1070,6 +1119,11 @@ typedef struct pglist_data {
- 
- 	unsigned long		flags;
- 
-+#ifdef CONFIG_LRU_GEN
-+	/* kswap mm walk data */
-+	struct lru_gen_mm_walk	mm_walk;
-+#endif
-+
- 	ZONE_PADDING(_pad2_)
- 
- 	/* Per-node vmstats */
-diff --git a/include/linux/swap.h b/include/linux/swap.h
-index 0c0fed1b348f..b66cbc7ea93c 100644
---- a/include/linux/swap.h
-+++ b/include/linux/swap.h
-@@ -162,6 +162,10 @@ union swap_header {
-  */
- struct reclaim_state {
- 	unsigned long reclaimed_slab;
-+#ifdef CONFIG_LRU_GEN
-+	/* per-thread mm walk data */
-+	struct lru_gen_mm_walk *mm_walk;
-+#endif
- };
- 
- #ifdef __KERNEL__
-diff --git a/kernel/exit.c b/kernel/exit.c
-index f072959fcab7..f2d4d48ea790 100644
---- a/kernel/exit.c
-+++ b/kernel/exit.c
-@@ -466,6 +466,7 @@ void mm_update_next_owner(struct mm_struct *mm)
- 		goto retry;
- 	}
- 	WRITE_ONCE(mm->owner, c);
-+	lru_gen_migrate_mm(mm);
- 	task_unlock(c);
- 	put_task_struct(c);
- }
-diff --git a/kernel/fork.c b/kernel/fork.c
-index 9d44f2d46c69..67b7666d7321 100644
---- a/kernel/fork.c
-+++ b/kernel/fork.c
-@@ -1152,6 +1152,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
- 		goto fail_nocontext;
- 
- 	mm->user_ns = get_user_ns(user_ns);
-+	lru_gen_init_mm(mm);
- 	return mm;
- 
- fail_nocontext:
-@@ -1194,6 +1195,7 @@ static inline void __mmput(struct mm_struct *mm)
- 	}
- 	if (mm->binfmt)
- 		module_put(mm->binfmt->module);
-+	lru_gen_del_mm(mm);
- 	mmdrop(mm);
- }
- 
-@@ -2676,6 +2678,13 @@ pid_t kernel_clone(struct kernel_clone_args *args)
- 		get_task_struct(p);
- 	}
- 
-+	if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
-+		/* lock the task to synchronize with memcg migration */
-+		task_lock(p);
-+		lru_gen_add_mm(p->mm);
-+		task_unlock(p);
-+	}
-+
- 	wake_up_new_task(p);
- 
- 	/* forking complete and child started to run, tell ptracer */
-diff --git a/kernel/sched/core.c b/kernel/sched/core.c
-index da0bf6fe9ecd..320d82697037 100644
---- a/kernel/sched/core.c
-+++ b/kernel/sched/core.c
-@@ -5130,6 +5130,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
- 		 * finish_task_switch()'s mmdrop().
- 		 */
- 		switch_mm_irqs_off(prev->active_mm, next->mm, next);
-+		lru_gen_use_mm(next->mm);
- 
- 		if (!prev->mm) {                        // from kernel
- 			/* will mmdrop() in finish_task_switch(). */
-diff --git a/mm/memcontrol.c b/mm/memcontrol.c
-index 743f8513f1c3..84f3707667bc 100644
---- a/mm/memcontrol.c
-+++ b/mm/memcontrol.c
-@@ -6133,6 +6133,30 @@ static void mem_cgroup_move_task(void)
- }
- #endif
- 
-+#ifdef CONFIG_LRU_GEN
-+static void mem_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+	struct task_struct *task;
-+	struct cgroup_subsys_state *css;
-+
-+	/* find the first leader if there is any */
-+	cgroup_taskset_for_each_leader(task, css, tset)
-+		break;
-+
-+	if (!task)
-+		return;
-+
-+	task_lock(task);
-+	if (task->mm && task->mm->owner == task)
-+		lru_gen_migrate_mm(task->mm);
-+	task_unlock(task);
-+}
-+#else
-+static void mem_cgroup_attach(struct cgroup_taskset *tset)
-+{
-+}
-+#endif /* CONFIG_LRU_GEN */
-+
- static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
- {
- 	if (value == PAGE_COUNTER_MAX)
-@@ -6536,6 +6560,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
- 	.css_reset = mem_cgroup_css_reset,
- 	.css_rstat_flush = mem_cgroup_css_rstat_flush,
- 	.can_attach = mem_cgroup_can_attach,
-+	.attach = mem_cgroup_attach,
- 	.cancel_attach = mem_cgroup_cancel_attach,
- 	.post_attach = mem_cgroup_move_task,
- 	.dfl_cftypes = memory_files,
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index ec786fc556a7..8e55a1ce1ae0 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -50,6 +50,8 @@
- #include <linux/printk.h>
- #include <linux/dax.h>
- #include <linux/psi.h>
-+#include <linux/pagewalk.h>
-+#include <linux/shmem_fs.h>
- 
- #include <asm/tlbflush.h>
- #include <asm/div64.h>
-@@ -3024,7 +3026,7 @@ static bool can_age_anon_pages(struct pglist_data *pgdat,
- 		for ((type) = 0; (type) < ANON_AND_FILE; (type)++)	\
- 			for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++)
- 
--static struct lruvec __maybe_unused *get_lruvec(struct mem_cgroup *memcg, int nid)
-+static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid)
- {
- 	struct pglist_data *pgdat = NODE_DATA(nid);
- 
-@@ -3069,6 +3071,372 @@ static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
- 	       get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS;
- }
- 
-+/******************************************************************************
-+ *                          mm_struct list
-+ ******************************************************************************/
-+
-+static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg)
-+{
-+	static struct lru_gen_mm_list mm_list = {
-+		.fifo = LIST_HEAD_INIT(mm_list.fifo),
-+		.lock = __SPIN_LOCK_UNLOCKED(mm_list.lock),
-+	};
-+
-+#ifdef CONFIG_MEMCG
-+	if (memcg)
-+		return &memcg->mm_list;
-+#endif
-+	VM_WARN_ON_ONCE(!mem_cgroup_disabled());
-+
-+	return &mm_list;
-+}
-+
-+void lru_gen_add_mm(struct mm_struct *mm)
-+{
-+	int nid;
-+	struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
-+	struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
-+
-+	VM_WARN_ON_ONCE(!list_empty(&mm->lru_gen.list));
-+#ifdef CONFIG_MEMCG
-+	VM_WARN_ON_ONCE(mm->lru_gen.memcg);
-+	mm->lru_gen.memcg = memcg;
-+#endif
-+	spin_lock(&mm_list->lock);
-+
-+	for_each_node_state(nid, N_MEMORY) {
-+		struct lruvec *lruvec = get_lruvec(memcg, nid);
-+
-+		if (!lruvec)
-+			continue;
-+
-+		/* the first addition since the last iteration */
-+		if (lruvec->mm_state.tail == &mm_list->fifo)
-+			lruvec->mm_state.tail = &mm->lru_gen.list;
-+	}
-+
-+	list_add_tail(&mm->lru_gen.list, &mm_list->fifo);
-+
-+	spin_unlock(&mm_list->lock);
-+}
-+
-+void lru_gen_del_mm(struct mm_struct *mm)
-+{
-+	int nid;
-+	struct lru_gen_mm_list *mm_list;
-+	struct mem_cgroup *memcg = NULL;
-+
-+	if (list_empty(&mm->lru_gen.list))
-+		return;
-+
-+#ifdef CONFIG_MEMCG
-+	memcg = mm->lru_gen.memcg;
-+#endif
-+	mm_list = get_mm_list(memcg);
-+
-+	spin_lock(&mm_list->lock);
-+
-+	for_each_node(nid) {
-+		struct lruvec *lruvec = get_lruvec(memcg, nid);
-+
-+		if (!lruvec)
-+			continue;
-+
-+		/* where the last iteration ended (exclusive) */
-+		if (lruvec->mm_state.tail == &mm->lru_gen.list)
-+			lruvec->mm_state.tail = lruvec->mm_state.tail->next;
-+
-+		/* where the current iteration continues (inclusive) */
-+		if (lruvec->mm_state.head != &mm->lru_gen.list)
-+			continue;
-+
-+		lruvec->mm_state.head = lruvec->mm_state.head->next;
-+		/* the deletion ends the current iteration */
-+		if (lruvec->mm_state.head == &mm_list->fifo)
-+			WRITE_ONCE(lruvec->mm_state.seq, lruvec->mm_state.seq + 1);
-+	}
-+
-+	list_del_init(&mm->lru_gen.list);
-+
-+	spin_unlock(&mm_list->lock);
-+
-+#ifdef CONFIG_MEMCG
-+	mem_cgroup_put(mm->lru_gen.memcg);
-+	mm->lru_gen.memcg = NULL;
-+#endif
-+}
-+
-+#ifdef CONFIG_MEMCG
-+void lru_gen_migrate_mm(struct mm_struct *mm)
-+{
-+	struct mem_cgroup *memcg;
-+
-+	lockdep_assert_held(&mm->owner->alloc_lock);
-+
-+	/* for mm_update_next_owner() */
-+	if (mem_cgroup_disabled())
-+		return;
-+
-+	rcu_read_lock();
-+	memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
-+	rcu_read_unlock();
-+	if (memcg == mm->lru_gen.memcg)
-+		return;
-+
-+	VM_WARN_ON_ONCE(!mm->lru_gen.memcg);
-+	VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list));
-+
-+	lru_gen_del_mm(mm);
-+	lru_gen_add_mm(mm);
-+}
-+#endif
-+
-+/*
-+ * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when
-+ * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of
-+ * bits in a bitmap, k is the number of hash functions and n is the number of
-+ * inserted items.
-+ *
-+ * Page table walkers use one of the two filters to reduce their search space.
-+ * To get rid of non-leaf entries that no longer have enough leaf entries, the
-+ * aging uses the double-buffering technique to flip to the other filter each
-+ * time it produces a new generation. For non-leaf entries that have enough
-+ * leaf entries, the aging carries them over to the next generation in
-+ * walk_pmd_range(); the eviction also report them when walking the rmap
-+ * in lru_gen_look_around().
-+ *
-+ * For future optimizations:
-+ * 1. It's not necessary to keep both filters all the time. The spare one can be
-+ *    freed after the RCU grace period and reallocated if needed again.
-+ * 2. And when reallocating, it's worth scaling its size according to the number
-+ *    of inserted entries in the other filter, to reduce the memory overhead on
-+ *    small systems and false positives on large systems.
-+ * 3. Jenkins' hash function is an alternative to Knuth's.
-+ */
-+#define BLOOM_FILTER_SHIFT	15
-+
-+static inline int filter_gen_from_seq(unsigned long seq)
-+{
-+	return seq % NR_BLOOM_FILTERS;
-+}
-+
-+static void get_item_key(void *item, int *key)
-+{
-+	u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2);
-+
-+	BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32));
-+
-+	key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1);
-+	key[1] = hash >> BLOOM_FILTER_SHIFT;
-+}
-+
-+static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq)
-+{
-+	unsigned long *filter;
-+	int gen = filter_gen_from_seq(seq);
-+
-+	filter = lruvec->mm_state.filters[gen];
-+	if (filter) {
-+		bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT));
-+		return;
-+	}
-+
-+	filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT),
-+			       __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
-+	WRITE_ONCE(lruvec->mm_state.filters[gen], filter);
-+}
-+
-+static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
-+{
-+	int key[2];
-+	unsigned long *filter;
-+	int gen = filter_gen_from_seq(seq);
-+
-+	filter = READ_ONCE(lruvec->mm_state.filters[gen]);
-+	if (!filter)
-+		return;
-+
-+	get_item_key(item, key);
-+
-+	if (!test_bit(key[0], filter))
-+		set_bit(key[0], filter);
-+	if (!test_bit(key[1], filter))
-+		set_bit(key[1], filter);
-+}
-+
-+static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item)
-+{
-+	int key[2];
-+	unsigned long *filter;
-+	int gen = filter_gen_from_seq(seq);
-+
-+	filter = READ_ONCE(lruvec->mm_state.filters[gen]);
-+	if (!filter)
-+		return true;
-+
-+	get_item_key(item, key);
-+
-+	return test_bit(key[0], filter) && test_bit(key[1], filter);
-+}
-+
-+static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last)
-+{
-+	int i;
-+	int hist;
-+
-+	lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock);
-+
-+	if (walk) {
-+		hist = lru_hist_from_seq(walk->max_seq);
-+
-+		for (i = 0; i < NR_MM_STATS; i++) {
-+			WRITE_ONCE(lruvec->mm_state.stats[hist][i],
-+				   lruvec->mm_state.stats[hist][i] + walk->mm_stats[i]);
-+			walk->mm_stats[i] = 0;
-+		}
-+	}
-+
-+	if (NR_HIST_GENS > 1 && last) {
-+		hist = lru_hist_from_seq(lruvec->mm_state.seq + 1);
-+
-+		for (i = 0; i < NR_MM_STATS; i++)
-+			WRITE_ONCE(lruvec->mm_state.stats[hist][i], 0);
-+	}
-+}
-+
-+static bool should_skip_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk)
-+{
-+	int type;
-+	unsigned long size = 0;
-+	struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
-+	int key = pgdat->node_id % BITS_PER_TYPE(mm->lru_gen.bitmap);
-+
-+	if (!walk->force_scan && !test_bit(key, &mm->lru_gen.bitmap))
-+		return true;
-+
-+	clear_bit(key, &mm->lru_gen.bitmap);
-+
-+	for (type = !walk->can_swap; type < ANON_AND_FILE; type++) {
-+		size += type ? get_mm_counter(mm, MM_FILEPAGES) :
-+			       get_mm_counter(mm, MM_ANONPAGES) +
-+			       get_mm_counter(mm, MM_SHMEMPAGES);
-+	}
-+
-+	if (size < MIN_LRU_BATCH)
-+		return true;
-+
-+	if (test_bit(MMF_OOM_REAP_QUEUED, &mm->flags))
-+		return true;
-+
-+	return !mmget_not_zero(mm);
-+}
-+
-+static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk,
-+			    struct mm_struct **iter)
-+{
-+	bool first = false;
-+	bool last = true;
-+	struct mm_struct *mm = NULL;
-+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-+	struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
-+	struct lru_gen_mm_state *mm_state = &lruvec->mm_state;
-+
-+	/*
-+	 * There are four interesting cases for this page table walker:
-+	 * 1. It tries to start a new iteration of mm_list with a stale max_seq;
-+	 *    there is nothing left to do.
-+	 * 2. It's the first of the current generation, and it needs to reset
-+	 *    the Bloom filter for the next generation.
-+	 * 3. It reaches the end of mm_list, and it needs to increment
-+	 *    mm_state->seq; the iteration is done.
-+	 * 4. It's the last of the current generation, and it needs to reset the
-+	 *    mm stats counters for the next generation.
-+	 */
-+	spin_lock(&mm_list->lock);
-+
-+	VM_WARN_ON_ONCE(mm_state->seq + 1 < walk->max_seq);
-+	VM_WARN_ON_ONCE(*iter && mm_state->seq > walk->max_seq);
-+	VM_WARN_ON_ONCE(*iter && !mm_state->nr_walkers);
-+
-+	if (walk->max_seq <= mm_state->seq) {
-+		if (!*iter)
-+			last = false;
-+		goto done;
-+	}
-+
-+	if (!mm_state->nr_walkers) {
-+		VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo);
-+
-+		mm_state->head = mm_list->fifo.next;
-+		first = true;
-+	}
-+
-+	while (!mm && mm_state->head != &mm_list->fifo) {
-+		mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list);
-+
-+		mm_state->head = mm_state->head->next;
-+
-+		/* force scan for those added after the last iteration */
-+		if (!mm_state->tail || mm_state->tail == &mm->lru_gen.list) {
-+			mm_state->tail = mm_state->head;
-+			walk->force_scan = true;
-+		}
-+
-+		if (should_skip_mm(mm, walk))
-+			mm = NULL;
-+	}
-+
-+	if (mm_state->head == &mm_list->fifo)
-+		WRITE_ONCE(mm_state->seq, mm_state->seq + 1);
-+done:
-+	if (*iter && !mm)
-+		mm_state->nr_walkers--;
-+	if (!*iter && mm)
-+		mm_state->nr_walkers++;
-+
-+	if (mm_state->nr_walkers)
-+		last = false;
-+
-+	if (*iter || last)
-+		reset_mm_stats(lruvec, walk, last);
-+
-+	spin_unlock(&mm_list->lock);
-+
-+	if (mm && first)
-+		reset_bloom_filter(lruvec, walk->max_seq + 1);
-+
-+	if (*iter)
-+		mmput_async(*iter);
-+
-+	*iter = mm;
-+
-+	return last;
-+}
-+
-+static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq)
-+{
-+	bool success = false;
-+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-+	struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
-+	struct lru_gen_mm_state *mm_state = &lruvec->mm_state;
-+
-+	spin_lock(&mm_list->lock);
-+
-+	VM_WARN_ON_ONCE(mm_state->seq + 1 < max_seq);
-+
-+	if (max_seq > mm_state->seq && !mm_state->nr_walkers) {
-+		VM_WARN_ON_ONCE(mm_state->head && mm_state->head != &mm_list->fifo);
-+
-+		WRITE_ONCE(mm_state->seq, mm_state->seq + 1);
-+		reset_mm_stats(lruvec, NULL, true);
-+		success = true;
-+	}
-+
-+	spin_unlock(&mm_list->lock);
-+
-+	return success;
-+}
-+
- /******************************************************************************
-  *                          refault feedback loop
-  ******************************************************************************/
-@@ -3219,6 +3587,118 @@ static int folio_inc_gen(struct lruvec *lruvec, struct folio *folio, bool reclai
- 	return new_gen;
- }
- 
-+static void update_batch_size(struct lru_gen_mm_walk *walk, struct folio *folio,
-+			      int old_gen, int new_gen)
-+{
-+	int type = folio_is_file_lru(folio);
-+	int zone = folio_zonenum(folio);
-+	int delta = folio_nr_pages(folio);
-+
-+	VM_WARN_ON_ONCE(old_gen >= MAX_NR_GENS);
-+	VM_WARN_ON_ONCE(new_gen >= MAX_NR_GENS);
-+
-+	walk->batched++;
-+
-+	walk->nr_pages[old_gen][type][zone] -= delta;
-+	walk->nr_pages[new_gen][type][zone] += delta;
-+}
-+
-+static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk)
-+{
-+	int gen, type, zone;
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+
-+	walk->batched = 0;
-+
-+	for_each_gen_type_zone(gen, type, zone) {
-+		enum lru_list lru = type * LRU_INACTIVE_FILE;
-+		int delta = walk->nr_pages[gen][type][zone];
-+
-+		if (!delta)
-+			continue;
-+
-+		walk->nr_pages[gen][type][zone] = 0;
-+		WRITE_ONCE(lrugen->nr_pages[gen][type][zone],
-+			   lrugen->nr_pages[gen][type][zone] + delta);
-+
-+		if (lru_gen_is_active(lruvec, gen))
-+			lru += LRU_ACTIVE;
-+		__update_lru_size(lruvec, lru, zone, delta);
-+	}
-+}
-+
-+static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *args)
-+{
-+	struct address_space *mapping;
-+	struct vm_area_struct *vma = args->vma;
-+	struct lru_gen_mm_walk *walk = args->private;
-+
-+	if (!vma_is_accessible(vma))
-+		return true;
-+
-+	if (is_vm_hugetlb_page(vma))
-+		return true;
-+
-+	if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_SEQ_READ | VM_RAND_READ))
-+		return true;
-+
-+	if (vma == get_gate_vma(vma->vm_mm))
-+		return true;
-+
-+	if (vma_is_anonymous(vma))
-+		return !walk->can_swap;
-+
-+	if (WARN_ON_ONCE(!vma->vm_file || !vma->vm_file->f_mapping))
-+		return true;
-+
-+	mapping = vma->vm_file->f_mapping;
-+	if (mapping_unevictable(mapping))
-+		return true;
-+
-+	if (shmem_mapping(mapping))
-+		return !walk->can_swap;
-+
-+	/* to exclude special mappings like dax, etc. */
-+	return !mapping->a_ops->read_folio;
-+}
-+
-+/*
-+ * Some userspace memory allocators map many single-page VMAs. Instead of
-+ * returning back to the PGD table for each of such VMAs, finish an entire PMD
-+ * table to reduce zigzags and improve cache performance.
-+ */
-+static bool get_next_vma(unsigned long mask, unsigned long size, struct mm_walk *args,
-+			 unsigned long *vm_start, unsigned long *vm_end)
-+{
-+	unsigned long start = round_up(*vm_end, size);
-+	unsigned long end = (start | ~mask) + 1;
-+
-+	VM_WARN_ON_ONCE(mask & size);
-+	VM_WARN_ON_ONCE((start & mask) != (*vm_start & mask));
-+
-+	while (args->vma) {
-+		if (start >= args->vma->vm_end) {
-+			args->vma = args->vma->vm_next;
-+			continue;
-+		}
-+
-+		if (end && end <= args->vma->vm_start)
-+			return false;
-+
-+		if (should_skip_vma(args->vma->vm_start, args->vma->vm_end, args)) {
-+			args->vma = args->vma->vm_next;
-+			continue;
-+		}
-+
-+		*vm_start = max(start, args->vma->vm_start);
-+		*vm_end = min(end - 1, args->vma->vm_end - 1) + 1;
-+
-+		return true;
-+	}
-+
-+	return false;
-+}
-+
- static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
- {
- 	unsigned long pfn = pte_pfn(pte);
-@@ -3237,8 +3717,28 @@ static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned
- 	return pfn;
- }
- 
-+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
-+static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr)
-+{
-+	unsigned long pfn = pmd_pfn(pmd);
-+
-+	VM_WARN_ON_ONCE(addr < vma->vm_start || addr >= vma->vm_end);
-+
-+	if (!pmd_present(pmd) || is_huge_zero_pmd(pmd))
-+		return -1;
-+
-+	if (WARN_ON_ONCE(pmd_devmap(pmd)))
-+		return -1;
-+
-+	if (WARN_ON_ONCE(!pfn_valid(pfn)))
-+		return -1;
-+
-+	return pfn;
-+}
-+#endif
-+
- static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
--				   struct pglist_data *pgdat)
-+				   struct pglist_data *pgdat, bool can_swap)
- {
- 	struct folio *folio;
- 
-@@ -3253,9 +3753,371 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
- 	if (folio_memcg_rcu(folio) != memcg)
- 		return NULL;
- 
-+	/* file VMAs can contain anon pages from COW */
-+	if (!folio_is_file_lru(folio) && !can_swap)
-+		return NULL;
-+
- 	return folio;
- }
- 
-+static bool suitable_to_scan(int total, int young)
-+{
-+	int n = clamp_t(int, cache_line_size() / sizeof(pte_t), 2, 8);
-+
-+	/* suitable if the average number of young PTEs per cacheline is >=1 */
-+	return young * n >= total;
-+}
-+
-+static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
-+			   struct mm_walk *args)
-+{
-+	int i;
-+	pte_t *pte;
-+	spinlock_t *ptl;
-+	unsigned long addr;
-+	int total = 0;
-+	int young = 0;
-+	struct lru_gen_mm_walk *walk = args->private;
-+	struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec);
-+	struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
-+	int old_gen, new_gen = lru_gen_from_seq(walk->max_seq);
-+
-+	VM_WARN_ON_ONCE(pmd_leaf(*pmd));
-+
-+	ptl = pte_lockptr(args->mm, pmd);
-+	if (!spin_trylock(ptl))
-+		return false;
-+
-+	arch_enter_lazy_mmu_mode();
-+
-+	pte = pte_offset_map(pmd, start & PMD_MASK);
-+restart:
-+	for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) {
-+		unsigned long pfn;
-+		struct folio *folio;
-+
-+		total++;
-+		walk->mm_stats[MM_LEAF_TOTAL]++;
-+
-+		pfn = get_pte_pfn(pte[i], args->vma, addr);
-+		if (pfn == -1)
-+			continue;
-+
-+		if (!pte_young(pte[i])) {
-+			walk->mm_stats[MM_LEAF_OLD]++;
-+			continue;
-+		}
-+
-+		folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
-+		if (!folio)
-+			continue;
-+
-+		if (!ptep_test_and_clear_young(args->vma, addr, pte + i))
-+			continue;
-+
-+		young++;
-+		walk->mm_stats[MM_LEAF_YOUNG]++;
-+
-+		if (pte_dirty(pte[i]) && !folio_test_dirty(folio) &&
-+		    !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
-+		      !folio_test_swapcache(folio)))
-+			folio_mark_dirty(folio);
-+
-+		old_gen = folio_update_gen(folio, new_gen);
-+		if (old_gen >= 0 && old_gen != new_gen)
-+			update_batch_size(walk, folio, old_gen, new_gen);
-+	}
-+
-+	if (i < PTRS_PER_PTE && get_next_vma(PMD_MASK, PAGE_SIZE, args, &start, &end))
-+		goto restart;
-+
-+	pte_unmap(pte);
-+
-+	arch_leave_lazy_mmu_mode();
-+	spin_unlock(ptl);
-+
-+	return suitable_to_scan(total, young);
-+}
-+
-+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG)
-+static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma,
-+				  struct mm_walk *args, unsigned long *bitmap, unsigned long *start)
-+{
-+	int i;
-+	pmd_t *pmd;
-+	spinlock_t *ptl;
-+	struct lru_gen_mm_walk *walk = args->private;
-+	struct mem_cgroup *memcg = lruvec_memcg(walk->lruvec);
-+	struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
-+	int old_gen, new_gen = lru_gen_from_seq(walk->max_seq);
-+
-+	VM_WARN_ON_ONCE(pud_leaf(*pud));
-+
-+	/* try to batch at most 1+MIN_LRU_BATCH+1 entries */
-+	if (*start == -1) {
-+		*start = next;
-+		return;
-+	}
-+
-+	i = next == -1 ? 0 : pmd_index(next) - pmd_index(*start);
-+	if (i && i <= MIN_LRU_BATCH) {
-+		__set_bit(i - 1, bitmap);
-+		return;
-+	}
-+
-+	pmd = pmd_offset(pud, *start);
-+
-+	ptl = pmd_lockptr(args->mm, pmd);
-+	if (!spin_trylock(ptl))
-+		goto done;
-+
-+	arch_enter_lazy_mmu_mode();
-+
-+	do {
-+		unsigned long pfn;
-+		struct folio *folio;
-+		unsigned long addr = i ? (*start & PMD_MASK) + i * PMD_SIZE : *start;
-+
-+		pfn = get_pmd_pfn(pmd[i], vma, addr);
-+		if (pfn == -1)
-+			goto next;
-+
-+		if (!pmd_trans_huge(pmd[i])) {
-+			if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG))
-+				pmdp_test_and_clear_young(vma, addr, pmd + i);
-+			goto next;
-+		}
-+
-+		folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
-+		if (!folio)
-+			goto next;
-+
-+		if (!pmdp_test_and_clear_young(vma, addr, pmd + i))
-+			goto next;
-+
-+		walk->mm_stats[MM_LEAF_YOUNG]++;
-+
-+		if (pmd_dirty(pmd[i]) && !folio_test_dirty(folio) &&
-+		    !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
-+		      !folio_test_swapcache(folio)))
-+			folio_mark_dirty(folio);
-+
-+		old_gen = folio_update_gen(folio, new_gen);
-+		if (old_gen >= 0 && old_gen != new_gen)
-+			update_batch_size(walk, folio, old_gen, new_gen);
-+next:
-+		i = i > MIN_LRU_BATCH ? 0 : find_next_bit(bitmap, MIN_LRU_BATCH, i) + 1;
-+	} while (i <= MIN_LRU_BATCH);
-+
-+	arch_leave_lazy_mmu_mode();
-+	spin_unlock(ptl);
-+done:
-+	*start = -1;
-+	bitmap_zero(bitmap, MIN_LRU_BATCH);
-+}
-+#else
-+static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma,
-+				  struct mm_walk *args, unsigned long *bitmap, unsigned long *start)
-+{
-+}
-+#endif
-+
-+static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
-+			   struct mm_walk *args)
-+{
-+	int i;
-+	pmd_t *pmd;
-+	unsigned long next;
-+	unsigned long addr;
-+	struct vm_area_struct *vma;
-+	unsigned long pos = -1;
-+	struct lru_gen_mm_walk *walk = args->private;
-+	unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
-+
-+	VM_WARN_ON_ONCE(pud_leaf(*pud));
-+
-+	/*
-+	 * Finish an entire PMD in two passes: the first only reaches to PTE
-+	 * tables to avoid taking the PMD lock; the second, if necessary, takes
-+	 * the PMD lock to clear the accessed bit in PMD entries.
-+	 */
-+	pmd = pmd_offset(pud, start & PUD_MASK);
-+restart:
-+	/* walk_pte_range() may call get_next_vma() */
-+	vma = args->vma;
-+	for (i = pmd_index(start), addr = start; addr != end; i++, addr = next) {
-+		pmd_t val = pmd_read_atomic(pmd + i);
-+
-+		/* for pmd_read_atomic() */
-+		barrier();
-+
-+		next = pmd_addr_end(addr, end);
-+
-+		if (!pmd_present(val) || is_huge_zero_pmd(val)) {
-+			walk->mm_stats[MM_LEAF_TOTAL]++;
-+			continue;
-+		}
-+
-+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-+		if (pmd_trans_huge(val)) {
-+			unsigned long pfn = pmd_pfn(val);
-+			struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
-+
-+			walk->mm_stats[MM_LEAF_TOTAL]++;
-+
-+			if (!pmd_young(val)) {
-+				walk->mm_stats[MM_LEAF_OLD]++;
-+				continue;
-+			}
-+
-+			/* try to avoid unnecessary memory loads */
-+			if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
-+				continue;
-+
-+			walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
-+			continue;
-+		}
-+#endif
-+		walk->mm_stats[MM_NONLEAF_TOTAL]++;
-+
-+#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
-+		if (!pmd_young(val))
-+			continue;
-+
-+		walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
-+#endif
-+		if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
-+			continue;
-+
-+		walk->mm_stats[MM_NONLEAF_FOUND]++;
-+
-+		if (!walk_pte_range(&val, addr, next, args))
-+			continue;
-+
-+		walk->mm_stats[MM_NONLEAF_ADDED]++;
-+
-+		/* carry over to the next generation */
-+		update_bloom_filter(walk->lruvec, walk->max_seq + 1, pmd + i);
-+	}
-+
-+	walk_pmd_range_locked(pud, -1, vma, args, bitmap, &pos);
-+
-+	if (i < PTRS_PER_PMD && get_next_vma(PUD_MASK, PMD_SIZE, args, &start, &end))
-+		goto restart;
-+}
-+
-+static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end,
-+			  struct mm_walk *args)
-+{
-+	int i;
-+	pud_t *pud;
-+	unsigned long addr;
-+	unsigned long next;
-+	struct lru_gen_mm_walk *walk = args->private;
-+
-+	VM_WARN_ON_ONCE(p4d_leaf(*p4d));
-+
-+	pud = pud_offset(p4d, start & P4D_MASK);
-+restart:
-+	for (i = pud_index(start), addr = start; addr != end; i++, addr = next) {
-+		pud_t val = READ_ONCE(pud[i]);
-+
-+		next = pud_addr_end(addr, end);
-+
-+		if (!pud_present(val) || WARN_ON_ONCE(pud_leaf(val)))
-+			continue;
-+
-+		walk_pmd_range(&val, addr, next, args);
-+
-+		if (walk->batched >= MAX_LRU_BATCH) {
-+			end = (addr | ~PUD_MASK) + 1;
-+			goto done;
-+		}
-+	}
-+
-+	if (i < PTRS_PER_PUD && get_next_vma(P4D_MASK, PUD_SIZE, args, &start, &end))
-+		goto restart;
-+
-+	end = round_up(end, P4D_SIZE);
-+done:
-+	if (!end || !args->vma)
-+		return 1;
-+
-+	walk->next_addr = max(end, args->vma->vm_start);
-+
-+	return -EAGAIN;
-+}
-+
-+static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_mm_walk *walk)
-+{
-+	static const struct mm_walk_ops mm_walk_ops = {
-+		.test_walk = should_skip_vma,
-+		.p4d_entry = walk_pud_range,
-+	};
-+
-+	int err;
-+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-+
-+	walk->next_addr = FIRST_USER_ADDRESS;
-+
-+	do {
-+		err = -EBUSY;
-+
-+		/* folio_update_gen() requires stable folio_memcg() */
-+		if (!mem_cgroup_trylock_pages(memcg))
-+			break;
-+
-+		/* the caller might be holding the lock for write */
-+		if (mmap_read_trylock(mm)) {
-+			err = walk_page_range(mm, walk->next_addr, ULONG_MAX, &mm_walk_ops, walk);
-+
-+			mmap_read_unlock(mm);
-+		}
-+
-+		mem_cgroup_unlock_pages();
-+
-+		if (walk->batched) {
-+			spin_lock_irq(&lruvec->lru_lock);
-+			reset_batch_size(lruvec, walk);
-+			spin_unlock_irq(&lruvec->lru_lock);
-+		}
-+
-+		cond_resched();
-+	} while (err == -EAGAIN);
-+}
-+
-+static struct lru_gen_mm_walk *set_mm_walk(struct pglist_data *pgdat)
-+{
-+	struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
-+
-+	if (pgdat && current_is_kswapd()) {
-+		VM_WARN_ON_ONCE(walk);
-+
-+		walk = &pgdat->mm_walk;
-+	} else if (!pgdat && !walk) {
-+		VM_WARN_ON_ONCE(current_is_kswapd());
-+
-+		walk = kzalloc(sizeof(*walk), __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN);
-+	}
-+
-+	current->reclaim_state->mm_walk = walk;
-+
-+	return walk;
-+}
-+
-+static void clear_mm_walk(void)
-+{
-+	struct lru_gen_mm_walk *walk = current->reclaim_state->mm_walk;
-+
-+	VM_WARN_ON_ONCE(walk && memchr_inv(walk->nr_pages, 0, sizeof(walk->nr_pages)));
-+	VM_WARN_ON_ONCE(walk && memchr_inv(walk->mm_stats, 0, sizeof(walk->mm_stats)));
-+
-+	current->reclaim_state->mm_walk = NULL;
-+
-+	if (!current_is_kswapd())
-+		kfree(walk);
-+}
-+
- static void inc_min_seq(struct lruvec *lruvec, int type)
- {
- 	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-@@ -3307,7 +4169,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
- 	return success;
- }
- 
--static void inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, bool can_swap)
-+static void inc_max_seq(struct lruvec *lruvec, bool can_swap)
- {
- 	int prev, next;
- 	int type, zone;
-@@ -3317,9 +4179,6 @@ static void inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, bool can_s
- 
- 	VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
- 
--	if (max_seq != lrugen->max_seq)
--		goto unlock;
--
- 	for (type = 0; type < ANON_AND_FILE; type++) {
- 		if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
- 			continue;
-@@ -3357,10 +4216,76 @@ static void inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, bool can_s
- 
- 	/* make sure preceding modifications appear */
- 	smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
--unlock:
-+
- 	spin_unlock_irq(&lruvec->lru_lock);
- }
- 
-+static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
-+			       struct scan_control *sc, bool can_swap)
-+{
-+	bool success;
-+	struct lru_gen_mm_walk *walk;
-+	struct mm_struct *mm = NULL;
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+
-+	VM_WARN_ON_ONCE(max_seq > READ_ONCE(lrugen->max_seq));
-+
-+	/* see the comment in iterate_mm_list() */
-+	if (max_seq <= READ_ONCE(lruvec->mm_state.seq)) {
-+		success = false;
-+		goto done;
-+	}
-+
-+	/*
-+	 * If the hardware doesn't automatically set the accessed bit, fallback
-+	 * to lru_gen_look_around(), which only clears the accessed bit in a
-+	 * handful of PTEs. Spreading the work out over a period of time usually
-+	 * is less efficient, but it avoids bursty page faults.
-+	 */
-+	if (!arch_has_hw_pte_young()) {
-+		success = iterate_mm_list_nowalk(lruvec, max_seq);
-+		goto done;
-+	}
-+
-+	walk = set_mm_walk(NULL);
-+	if (!walk) {
-+		success = iterate_mm_list_nowalk(lruvec, max_seq);
-+		goto done;
-+	}
-+
-+	walk->lruvec = lruvec;
-+	walk->max_seq = max_seq;
-+	walk->can_swap = can_swap;
-+	walk->force_scan = false;
-+
-+	do {
-+		success = iterate_mm_list(lruvec, walk, &mm);
-+		if (mm)
-+			walk_mm(lruvec, mm, walk);
-+
-+		cond_resched();
-+	} while (mm);
-+done:
-+	if (!success) {
-+		if (!current_is_kswapd() && !sc->priority)
-+			wait_event_killable(lruvec->mm_state.wait,
-+					    max_seq < READ_ONCE(lrugen->max_seq));
-+
-+		return max_seq < READ_ONCE(lrugen->max_seq);
-+	}
-+
-+	VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
-+
-+	inc_max_seq(lruvec, can_swap);
-+	/* either this sees any waiters or they will see updated max_seq */
-+	if (wq_has_sleeper(&lruvec->mm_state.wait))
-+		wake_up_all(&lruvec->mm_state.wait);
-+
-+	wakeup_flusher_threads(WB_REASON_VMSCAN);
-+
-+	return true;
-+}
-+
- static unsigned long get_nr_evictable(struct lruvec *lruvec, unsigned long max_seq,
- 				      unsigned long *min_seq, bool can_swap, bool *need_aging)
- {
-@@ -3438,7 +4363,7 @@ static void age_lruvec(struct lruvec *lruvec, struct scan_control *sc)
- 	nr_to_scan >>= mem_cgroup_online(memcg) ? sc->priority : 0;
- 
- 	if (nr_to_scan && need_aging)
--		inc_max_seq(lruvec, max_seq, swappiness);
-+		try_to_inc_max_seq(lruvec, max_seq, sc, swappiness);
- }
- 
- static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
-@@ -3447,6 +4372,8 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
- 
- 	VM_WARN_ON_ONCE(!current_is_kswapd());
- 
-+	set_mm_walk(pgdat);
-+
- 	memcg = mem_cgroup_iter(NULL, NULL, NULL);
- 	do {
- 		struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
-@@ -3455,11 +4382,16 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
- 
- 		cond_resched();
- 	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
-+
-+	clear_mm_walk();
- }
- 
- /*
-  * This function exploits spatial locality when shrink_page_list() walks the
-- * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages.
-+ * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. If
-+ * the scan was done cacheline efficiently, it adds the PMD entry pointing to
-+ * the PTE table to the Bloom filter. This forms a feedback loop between the
-+ * eviction and the aging.
-  */
- void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
- {
-@@ -3468,6 +4400,8 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
- 	unsigned long start;
- 	unsigned long end;
- 	unsigned long addr;
-+	struct lru_gen_mm_walk *walk;
-+	int young = 0;
- 	unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {};
- 	struct folio *folio = pfn_folio(pvmw->pfn);
- 	struct mem_cgroup *memcg = folio_memcg(folio);
-@@ -3497,6 +4431,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
- 	}
- 
- 	pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE;
-+	walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL;
- 
- 	rcu_read_lock();
- 	arch_enter_lazy_mmu_mode();
-@@ -3511,13 +4446,15 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
- 		if (!pte_young(pte[i]))
- 			continue;
- 
--		folio = get_pfn_folio(pfn, memcg, pgdat);
-+		folio = get_pfn_folio(pfn, memcg, pgdat, !walk || walk->can_swap);
- 		if (!folio)
- 			continue;
- 
- 		if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i))
- 			continue;
- 
-+		young++;
-+
- 		if (pte_dirty(pte[i]) && !folio_test_dirty(folio) &&
- 		    !(folio_test_anon(folio) && folio_test_swapbacked(folio) &&
- 		      !folio_test_swapcache(folio)))
-@@ -3533,7 +4470,11 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
- 	arch_leave_lazy_mmu_mode();
- 	rcu_read_unlock();
- 
--	if (bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) {
-+	/* feedback from rmap walkers to page table walkers */
-+	if (suitable_to_scan(i, young))
-+		update_bloom_filter(lruvec, max_seq, pvmw->pmd);
-+
-+	if (!walk && bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) {
- 		for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
- 			folio = pfn_folio(pte_pfn(pte[i]));
- 			folio_activate(folio);
-@@ -3545,8 +4486,10 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
- 	if (!mem_cgroup_trylock_pages(memcg))
- 		return;
- 
--	spin_lock_irq(&lruvec->lru_lock);
--	new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq);
-+	if (!walk) {
-+		spin_lock_irq(&lruvec->lru_lock);
-+		new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq);
-+	}
- 
- 	for_each_set_bit(i, bitmap, MIN_LRU_BATCH) {
- 		folio = pfn_folio(pte_pfn(pte[i]));
-@@ -3557,10 +4500,14 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
- 		if (old_gen < 0 || old_gen == new_gen)
- 			continue;
- 
--		lru_gen_update_size(lruvec, folio, old_gen, new_gen);
-+		if (walk)
-+			update_batch_size(walk, folio, old_gen, new_gen);
-+		else
-+			lru_gen_update_size(lruvec, folio, old_gen, new_gen);
- 	}
- 
--	spin_unlock_irq(&lruvec->lru_lock);
-+	if (!walk)
-+		spin_unlock_irq(&lruvec->lru_lock);
- 
- 	mem_cgroup_unlock_pages();
- }
-@@ -3843,6 +4790,7 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
- 	struct folio *folio;
- 	enum vm_event_item item;
- 	struct reclaim_stat stat;
-+	struct lru_gen_mm_walk *walk;
- 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
- 	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
- 
-@@ -3879,6 +4827,10 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
- 
- 	move_pages_to_lru(lruvec, &list);
- 
-+	walk = current->reclaim_state->mm_walk;
-+	if (walk && walk->batched)
-+		reset_batch_size(lruvec, walk);
-+
- 	item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
- 	if (!cgroup_reclaim(sc))
- 		__count_vm_events(item, reclaimed);
-@@ -3936,7 +4888,8 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *
- 	if (current_is_kswapd())
- 		return 0;
- 
--	inc_max_seq(lruvec, max_seq, can_swap);
-+	if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap))
-+		return nr_to_scan;
- done:
- 	return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
- }
-@@ -3951,6 +4904,8 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
- 
- 	blk_start_plug(&plug);
- 
-+	set_mm_walk(lruvec_pgdat(lruvec));
-+
- 	while (true) {
- 		int delta;
- 		int swappiness;
-@@ -3978,6 +4933,8 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
- 		cond_resched();
- 	}
- 
-+	clear_mm_walk();
-+
- 	blk_finish_plug(&plug);
- }
- 
-@@ -3994,15 +4951,21 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
- 
- 	for_each_gen_type_zone(gen, type, zone)
- 		INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
-+
-+	lruvec->mm_state.seq = MIN_NR_GENS;
-+	init_waitqueue_head(&lruvec->mm_state.wait);
- }
- 
- #ifdef CONFIG_MEMCG
- void lru_gen_init_memcg(struct mem_cgroup *memcg)
- {
-+	INIT_LIST_HEAD(&memcg->mm_list.fifo);
-+	spin_lock_init(&memcg->mm_list.lock);
- }
- 
- void lru_gen_exit_memcg(struct mem_cgroup *memcg)
- {
-+	int i;
- 	int nid;
- 
- 	for_each_node(nid) {
-@@ -4010,6 +4973,11 @@ void lru_gen_exit_memcg(struct mem_cgroup *memcg)
- 
- 		VM_WARN_ON_ONCE(memchr_inv(lruvec->lrugen.nr_pages, 0,
- 					   sizeof(lruvec->lrugen.nr_pages)));
-+
-+		for (i = 0; i < NR_BLOOM_FILTERS; i++) {
-+			bitmap_free(lruvec->mm_state.filters[i]);
-+			lruvec->mm_state.filters[i] = NULL;
-+		}
- 	}
- }
- #endif
-
-From patchwork Wed Jul  6 22:00:18 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908706
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id 4EB4BCCA480
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:01:11 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id BDBC98E0005; Wed,  6 Jul 2022 18:01:06 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id B8B188E0001; Wed,  6 Jul 2022 18:01:06 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id A05548E0005; Wed,  6 Jul 2022 18:01:06 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0011.hostedemail.com
- [216.40.44.11])
-	by kanga.kvack.org (Postfix) with ESMTP id 8B02E8E0001
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:01:06 -0400 (EDT)
-Received: from smtpin07.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay08.hostedemail.com (Postfix) with ESMTP id 5322D218C7
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:06 +0000 (UTC)
-X-FDA: 79658046132.07.9DE38CF
-Received: from mail-yw1-f201.google.com (mail-yw1-f201.google.com
- [209.85.128.201])
-	by imf28.hostedemail.com (Postfix) with ESMTP id 97908C0054
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:05 +0000 (UTC)
-Received: by mail-yw1-f201.google.com with SMTP id
- 00721157ae682-31c8c7138ebso70710887b3.17
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:01:05 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=ZS7i+zWbsuFYZiRlenI/F/Y7PzZj3Cv3ABmuogIV+d0=;
-        b=Is9nnwDLdoF8cmdhQhl8FEZEIPLZOTCQNPziPrZ3WCv4Hkh+8SM7Qirn2/JzlJe5Qt
-         IMzoKhGVVu62zPGO2f8uqvwVO7ZBpwGEu3Y0nx+xsR+UR6rSMs9BgDYfSl6hxumhEzXQ
-         AVU29P45SCq1drQE+AuDu2NsKyQ+R9NLi2XNN7GjQzGIS59mnKnciabxZ70kUwocqXEh
-         TsuagDSQmmH5SjPkOzOUNm6Sk8f3JEhf7X8a1bPpbg+ozA3KspzkTBjkMrHomLe9ffcm
-         BFgwNEyH9XBgnj0m4gnfT2SYRWWY1k3MsXJMQ+zIJmqc6vDRB4WpYW/qGMJadOFCZfMM
-         nXgA==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=ZS7i+zWbsuFYZiRlenI/F/Y7PzZj3Cv3ABmuogIV+d0=;
-        b=RCS2awk0QPdb9ZH7wNOyKeKXpab1x67IIAx6j5zq56jXzEwz7GXRfLumdFrGKXCGb4
-         Ni9Rp5dJZYzq8bxw94GX64zJKMTSoJ4V+UAQj4zSBLEczJlyZKy/yWOit9ZSOEb0iMvR
-         4+5mHFOU3YS4X1wdcqI8B4TLEOn1Me9mvfoveMAhSf8VHEhQ3neJrM0mY0hrTiGCSK5A
-         J4de3u3SGUe8nuXf8ZpGc38zd9x6oFOeBCUdjka6ao55yXPRLAwyLVnsLaRAKC6md3fT
-         VHJAQ+yfE2vhF9ga9RIwItKKGhAOJVHWoDcC3pQb9GfR0/p6eeP/23lc7iaFdQYywDcG
-         ckOQ==
-X-Gm-Message-State: AJIora9+K81MvAVQpqZj+MVKlE2AQLbnYpnFjbMqoGoc4IJHROymV58Z
-	hBHBT8xaW7KW2eD0IR1+YvgbPP1Vazg=
-X-Google-Smtp-Source: 
- AGRyM1uwFcCEi/xhgZ0h4sIMbRvSkHm3hRssR6SxZ63hO5m3+xAhe4vRctYY9iQ6nmc/njn1u9BXzW/MiYM=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a25:abb3:0:b0:66e:2f9a:4201 with SMTP id
- v48-20020a25abb3000000b0066e2f9a4201mr26479914ybi.125.1657144864829; Wed, 06
- Jul 2022 15:01:04 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:18 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-10-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 09/14] mm: multi-gen LRU: optimize multiple memcgs
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Brian Geffon <bgeffon@google.com>,
- Jan Alexander Steffens <heftig@archlinux.org>,
-  Oleksandr Natalenko <oleksandr@natalenko.name>,
- Steven Barrett <steven@liquorix.net>,
-  Suleiman Souhlal <suleiman@google.com>, Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657144865; a=rsa-sha256;
-	cv=none;
-	b=q3V8GcW5a7gLdEkDRvzjbN1oCqdl/PcNPOBK/4yn5O6DZ96fhshx4mHlVqwifd6VM/h6DQ
-	bokhlmu8Wk2Z61Pnli47ITBMWiwbyG5GqreBCrRln1NfcGXS0mPhEW7lQeDU+ca5XNHA6R
-	1crW4jbDQ+Ez8oz8X4F5X5OxrI6ddoA=
-ARC-Authentication-Results: i=1;
-	imf28.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=Is9nnwDL;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf28.hostedemail.com: domain of
- 3IAbGYgYKCGEXTYG9NFNNFKD.BNLKHMTW-LLJU9BJ.NQF@flex--yuzhao.bounces.google.com
- designates 209.85.128.201 as permitted sender)
- smtp.mailfrom=3IAbGYgYKCGEXTYG9NFNNFKD.BNLKHMTW-LLJU9BJ.NQF@flex--yuzhao.bounces.google.com
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657144865;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=ZS7i+zWbsuFYZiRlenI/F/Y7PzZj3Cv3ABmuogIV+d0=;
-	b=fen/7hqEjb4DZZ+j8OhMLxCTQaBQg1nYoyqyzaLOG2yllRdoXgC8upN+NQjx2/OsQCSFdY
-	tIpgzM/hy9Vb35EZoTXPI0b6U/1kGDSWHJyLMU3CkJKzSF9lMbdGm9UdAtXJ/1dnB/CxlX
-	R3DdBW8MzocpYgtBQDvKhNaIKlqhdRg=
-X-Rspamd-Server: rspam08
-X-Rspamd-Queue-Id: 97908C0054
-X-Rspam-User: 
-Authentication-Results: imf28.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=Is9nnwDL;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf28.hostedemail.com: domain of
- 3IAbGYgYKCGEXTYG9NFNNFKD.BNLKHMTW-LLJU9BJ.NQF@flex--yuzhao.bounces.google.com
- designates 209.85.128.201 as permitted sender)
- smtp.mailfrom=3IAbGYgYKCGEXTYG9NFNNFKD.BNLKHMTW-LLJU9BJ.NQF@flex--yuzhao.bounces.google.com
-X-Stat-Signature: xt8apxhnez18ydabrirx1u5kimzk5obt
-X-HE-Tag: 1657144865-519413
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-When multiple memcgs are available, it is possible to make better
-choices based on generations and tiers and therefore improve the
-overall performance under global memory pressure. This patch adds a
-rudimentary optimization to select memcgs that can drop single-use
-unmapped clean pages first. Doing so reduces the chance of going into
-the aging path or swapping. These two decisions can be costly.
-
-A typical example that benefits from this optimization is a server
-running mixed types of workloads, e.g., heavy anon workload in one
-memcg and heavy buffered I/O workload in the other.
-
-Though this optimization can be applied to both kswapd and direct
-reclaim, it is only added to kswapd to keep the patchset manageable.
-Later improvements will cover the direct reclaim path.
-
-Server benchmark results:
-  Mixed workloads:
-    fio (buffered I/O): +[19, 21]%
-                IOPS         BW
-      patch1-8: 1880k        7343MiB/s
-      patch1-9: 2252k        8796MiB/s
-
-    memcached (anon): +[119, 123]%
-                Ops/sec      KB/sec
-      patch1-8: 862768.65    33514.68
-      patch1-9: 1911022.12   74234.54
-
-  Mixed workloads:
-    fio (buffered I/O): +[75, 77]%
-                IOPS         BW
-      5.19-rc1: 1279k        4996MiB/s
-      patch1-9: 2252k        8796MiB/s
-
-    memcached (anon): +[13, 15]%
-                Ops/sec      KB/sec
-      5.19-rc1: 1673524.04   65008.87
-      patch1-9: 1911022.12   74234.54
-
-  Configurations:
-    (changes since patch 6)
-
-    cat mixed.sh
-    modprobe brd rd_nr=2 rd_size=56623104
-
-    swapoff -a
-    mkswap /dev/ram0
-    swapon /dev/ram0
-
-    mkfs.ext4 /dev/ram1
-    mount -t ext4 /dev/ram1 /mnt
-
-    memtier_benchmark -S /var/run/memcached/memcached.sock \
-      -P memcache_binary -n allkeys --key-minimum=1 \
-      --key-maximum=50000000 --key-pattern=P:P -c 1 -t 36 \
-      --ratio 1:0 --pipeline 8 -d 2000
-
-    fio -name=mglru --numjobs=36 --directory=/mnt --size=1408m \
-      --buffered=1 --ioengine=io_uring --iodepth=128 \
-      --iodepth_batch_submit=32 --iodepth_batch_complete=32 \
-      --rw=randread --random_distribution=random --norandommap \
-      --time_based --ramp_time=10m --runtime=90m --group_reporting &
-    pid=$!
-
-    sleep 200
-
-    memtier_benchmark -S /var/run/memcached/memcached.sock \
-      -P memcache_binary -n allkeys --key-minimum=1 \
-      --key-maximum=50000000 --key-pattern=R:R -c 1 -t 36 \
-      --ratio 0:1 --pipeline 8 --randomize --distinct-client-seed
-
-    kill -INT $pid
-    wait
-
-Client benchmark results:
-  no change (CONFIG_MEMCG=n)
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
----
- mm/vmscan.c | 55 ++++++++++++++++++++++++++++++++++++++++++++---------
- 1 file changed, 46 insertions(+), 9 deletions(-)
-
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 8e55a1ce1ae0..f469a2740835 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -129,6 +129,13 @@ struct scan_control {
- 	/* Always discard instead of demoting to lower tier memory */
- 	unsigned int no_demotion:1;
- 
-+#ifdef CONFIG_LRU_GEN
-+	/* help make better choices when multiple memcgs are available */
-+	unsigned int memcgs_need_aging:1;
-+	unsigned int memcgs_need_swapping:1;
-+	unsigned int memcgs_avoid_swapping:1;
-+#endif
-+
- 	/* Allocation order */
- 	s8 order;
- 
-@@ -4372,6 +4379,22 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
- 
- 	VM_WARN_ON_ONCE(!current_is_kswapd());
- 
-+	/*
-+	 * To reduce the chance of going into the aging path or swapping, which
-+	 * can be costly, optimistically skip them unless their corresponding
-+	 * flags were cleared in the eviction path. This improves the overall
-+	 * performance when multiple memcgs are available.
-+	 */
-+	if (!sc->memcgs_need_aging) {
-+		sc->memcgs_need_aging = true;
-+		sc->memcgs_avoid_swapping = !sc->memcgs_need_swapping;
-+		sc->memcgs_need_swapping = true;
-+		return;
-+	}
-+
-+	sc->memcgs_need_swapping = true;
-+	sc->memcgs_avoid_swapping = true;
-+
- 	set_mm_walk(pgdat);
- 
- 	memcg = mem_cgroup_iter(NULL, NULL, NULL);
-@@ -4781,7 +4804,8 @@ static int isolate_folios(struct lruvec *lruvec, struct scan_control *sc, int sw
- 	return scanned;
- }
- 
--static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
-+static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
-+			bool *need_swapping)
- {
- 	int type;
- 	int scanned;
-@@ -4844,14 +4868,16 @@ static int evict_folios(struct lruvec *lruvec, struct scan_control *sc, int swap
- 
- 	sc->nr_reclaimed += reclaimed;
- 
-+	if (type == LRU_GEN_ANON && need_swapping)
-+		*need_swapping = true;
-+
- 	return scanned;
- }
- 
- static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
--				    bool can_swap, unsigned long reclaimed)
-+				    bool can_swap, unsigned long reclaimed, bool *need_aging)
- {
- 	int priority;
--	bool need_aging;
- 	unsigned long nr_to_scan;
- 	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
- 	DEFINE_MAX_SEQ(lruvec);
-@@ -4861,7 +4887,7 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *
- 	    (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
- 		return 0;
- 
--	nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, can_swap, &need_aging);
-+	nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, can_swap, need_aging);
- 	if (!nr_to_scan)
- 		return 0;
- 
-@@ -4877,7 +4903,7 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *
- 	if (!nr_to_scan)
- 		return 0;
- 
--	if (!need_aging)
-+	if (!*need_aging)
- 		return nr_to_scan;
- 
- 	/* skip the aging path at the default priority */
-@@ -4897,6 +4923,8 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *
- static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
- {
- 	struct blk_plug plug;
-+	bool need_aging = false;
-+	bool need_swapping = false;
- 	unsigned long scanned = 0;
- 	unsigned long reclaimed = sc->nr_reclaimed;
- 
-@@ -4918,21 +4946,30 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
- 		else
- 			swappiness = 0;
- 
--		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, reclaimed);
-+		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, reclaimed, &need_aging);
- 		if (!nr_to_scan)
--			break;
-+			goto done;
- 
--		delta = evict_folios(lruvec, sc, swappiness);
-+		delta = evict_folios(lruvec, sc, swappiness, &need_swapping);
- 		if (!delta)
--			break;
-+			goto done;
- 
- 		scanned += delta;
- 		if (scanned >= nr_to_scan)
- 			break;
- 
-+		if (sc->memcgs_avoid_swapping && swappiness < 200 && need_swapping)
-+			break;
-+
- 		cond_resched();
- 	}
- 
-+	/* see the comment in lru_gen_age_node() */
-+	if (!need_aging)
-+		sc->memcgs_need_aging = false;
-+	if (!need_swapping)
-+		sc->memcgs_need_swapping = false;
-+done:
- 	clear_mm_walk();
- 
- 	blk_finish_plug(&plug);
-
-From patchwork Wed Jul  6 22:00:19 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908707
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id 8DE21CCA480
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:01:14 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id D162E8E0006; Wed,  6 Jul 2022 18:01:07 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id C9D778E0001; Wed,  6 Jul 2022 18:01:07 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id B177F8E0006; Wed,  6 Jul 2022 18:01:07 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0014.hostedemail.com
- [216.40.44.14])
-	by kanga.kvack.org (Postfix) with ESMTP id 9D5508E0001
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:01:07 -0400 (EDT)
-Received: from smtpin29.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay12.hostedemail.com (Postfix) with ESMTP id 5B55512053A
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:07 +0000 (UTC)
-X-FDA: 79658046174.29.1D659FF
-Received: from mail-yw1-f201.google.com (mail-yw1-f201.google.com
- [209.85.128.201])
-	by imf23.hostedemail.com (Postfix) with ESMTP id F2017140064
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:06 +0000 (UTC)
-Received: by mail-yw1-f201.google.com with SMTP id
- 00721157ae682-31c9a49a1a8so63946947b3.9
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:01:06 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=t3JqYbFJT9lP6E96sRUzmCzQEu0iJg+mfU6dciROW6I=;
-        b=K/nKIb14JmIaSQ25G+voEr3Xu6sFBToolWxLX2DrPdbxAa6BpfoEW4/5621Rzsff4D
-         1k3G9tp+5ESbNVZCZfqietdtMt6OTAchdy14TXI4WTiTZLglVlIfr80zpxGfIGcphLBv
-         c2R6icWOjZ0upEVkivTfwH9rKBl233YFlYCWfHzoiU07eBFA2yPOzHZx49n6UFl3tbHt
-         eSai05q6oFPAPMqEwWKLLg5e2ewTiqoowbahH4nTTyw69dIDZhmip41HFaA0/Sczzyq3
-         JDic9dSJ+BDTRQ6TaWU0nw7eqP8mi+/sxNdfATpIluPgr0W9A0QZ1JCn1D9q09woZwV/
-         PFjA==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=t3JqYbFJT9lP6E96sRUzmCzQEu0iJg+mfU6dciROW6I=;
-        b=HM7LGBCrS1eJ/vRo3XCktj01RjkBHhQmWt02aEfYLwa8PL1HdwG+c4Me4gn54xxguO
-         czAtvRKRQXHGFYRw3EumNTE4ZGOfg2XqVtN9EjwqQhTlBSwX+fziamWFBeroVGwQW5G1
-         dGw5hoaY2I+TFoZJ29KsIagqOSfrJETzQGULi/sVBWKaWeb/S8HFZY/EKyoYDxZqIBmP
-         sF9WEmAcW0+fvVxqWYl3uVJzRtjHRL6YsrIahPgXVedZvFAkXUNU4kkV7vNYV3lz5mBQ
-         xaBVG/fe1KTKIxNs72Rv8R1FEPhZcIGFotO3DUMW0MRlYpm9F1IVf32khtQ+h2Ym9As/
-         PzIA==
-X-Gm-Message-State: AJIora954b3OSYa+S8ljATClx2rklKm1t2+1N36MlNK4kzbf9PLLetyh
-	9bLCk9rYASJ4G36LmS+oOZUzhHu3MzI=
-X-Google-Smtp-Source: 
- AGRyM1uCmWNAcyv7l4c+bwlvsNWjdcmS50NXK/ousi79Gs9bHWyAObimB3RXzG41nJY/wFbH1TL7Js/68Zk=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a25:e74e:0:b0:66e:32d4:1f0 with SMTP id
- e75-20020a25e74e000000b0066e32d401f0mr24265460ybh.421.1657144866511; Wed, 06
- Jul 2022 15:01:06 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:19 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-11-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 10/14] mm: multi-gen LRU: kill switch
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Brian Geffon <bgeffon@google.com>,
- Jan Alexander Steffens <heftig@archlinux.org>,
-  Oleksandr Natalenko <oleksandr@natalenko.name>,
- Steven Barrett <steven@liquorix.net>,
-  Suleiman Souhlal <suleiman@google.com>, Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657144867; a=rsa-sha256;
-	cv=none;
-	b=srv0M1GwC9igO3Ssc6UQavsL8SeMR4TafyrVnDRr+qa3IdXnaeYuD7mVXVj2DTKftiNgsf
-	Z0438E6xGFdYlVffElthsW5+/j/7H+6w/+Rrj2/CtQGpnJriXc77Tp9lWDSGzJMNo/18Ur
-	5XMrBTHRNBBRZNKQECXPfqx8mY2KlTw=
-ARC-Authentication-Results: i=1;
-	imf23.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b="K/nKIb14";
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf23.hostedemail.com: domain of
- 3IgbGYgYKCGMZVaIBPHPPHMF.DPNMJOVY-NNLWBDL.PSH@flex--yuzhao.bounces.google.com
- designates 209.85.128.201 as permitted sender)
- smtp.mailfrom=3IgbGYgYKCGMZVaIBPHPPHMF.DPNMJOVY-NNLWBDL.PSH@flex--yuzhao.bounces.google.com
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657144867;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=t3JqYbFJT9lP6E96sRUzmCzQEu0iJg+mfU6dciROW6I=;
-	b=v9VdQ/ak+0604gCltqLudvPrAy3WcrJhWxCXksIxicPZWyjAnzABJHeJwcXRUr74ilTy45
-	1o9D+n98WPgODBBDXuQgOxdZ/m1FekCnCpnWnR72lB+33NlF8zDMR0jbie23mZMDqsMO/w
-	cYTpCFhyTn0ribSQdUv7TlFoFBXyRTw=
-Authentication-Results: imf23.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b="K/nKIb14";
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf23.hostedemail.com: domain of
- 3IgbGYgYKCGMZVaIBPHPPHMF.DPNMJOVY-NNLWBDL.PSH@flex--yuzhao.bounces.google.com
- designates 209.85.128.201 as permitted sender)
- smtp.mailfrom=3IgbGYgYKCGMZVaIBPHPPHMF.DPNMJOVY-NNLWBDL.PSH@flex--yuzhao.bounces.google.com
-X-Stat-Signature: u9yuk5ppb8f6meekzwox11y8u8f8zsip
-X-Rspamd-Queue-Id: F2017140064
-X-Rspamd-Server: rspam05
-X-Rspam-User: 
-X-HE-Tag: 1657144866-771308
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-Add /sys/kernel/mm/lru_gen/enabled as a kill switch. Components that
-can be disabled include:
-  0x0001: the multi-gen LRU core
-  0x0002: walking page table, when arch_has_hw_pte_young() returns
-          true
-  0x0004: clearing the accessed bit in non-leaf PMD entries, when
-          CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y
-  [yYnN]: apply to all the components above
-E.g.,
-  echo y >/sys/kernel/mm/lru_gen/enabled
-  cat /sys/kernel/mm/lru_gen/enabled
-  0x0007
-  echo 5 >/sys/kernel/mm/lru_gen/enabled
-  cat /sys/kernel/mm/lru_gen/enabled
-  0x0005
-
-NB: the page table walks happen on the scale of seconds under heavy
-memory pressure, in which case the mmap_lock contention is a lesser
-concern, compared with the LRU lock contention and the I/O congestion.
-So far the only well-known case of the mmap_lock contention happens on
-Android, due to Scudo [1] which allocates several thousand VMAs for
-merely a few hundred MBs. The SPF and the Maple Tree also have
-provided their own assessments [2][3]. However, if walking page tables
-does worsen the mmap_lock contention, the kill switch can be used to
-disable it. In this case the multi-gen LRU will suffer a minor
-performance degradation, as shown previously.
-
-Clearing the accessed bit in non-leaf PMD entries can also be
-disabled, since this behavior was not tested on x86 varieties other
-than Intel and AMD.
-
-[1] https://source.android.com/devices/tech/debug/scudo
-[2] https://lore.kernel.org/r/20220128131006.67712-1-michel@lespinasse.org/
-[3] https://lore.kernel.org/r/20220426150616.3937571-1-Liam.Howlett@oracle.com/
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
----
- include/linux/cgroup.h          |  15 ++-
- include/linux/mm_inline.h       |  15 ++-
- include/linux/mmzone.h          |   9 ++
- kernel/cgroup/cgroup-internal.h |   1 -
- mm/Kconfig                      |   6 +
- mm/vmscan.c                     | 231 +++++++++++++++++++++++++++++++-
- 6 files changed, 268 insertions(+), 9 deletions(-)
-
-diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
-index 0d1ada8968d7..1bc0cabf993f 100644
---- a/include/linux/cgroup.h
-+++ b/include/linux/cgroup.h
-@@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgroup *cgrp)
- 	css_put(&cgrp->self);
- }
- 
-+extern struct mutex cgroup_mutex;
-+
-+static inline void cgroup_lock(void)
-+{
-+	mutex_lock(&cgroup_mutex);
-+}
-+
-+static inline void cgroup_unlock(void)
-+{
-+	mutex_unlock(&cgroup_mutex);
-+}
-+
- /**
-  * task_css_set_check - obtain a task's css_set with extra access conditions
-  * @task: the task to obtain css_set for
-@@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgroup *cgrp)
-  * as locks used during the cgroup_subsys::attach() methods.
-  */
- #ifdef CONFIG_PROVE_RCU
--extern struct mutex cgroup_mutex;
- extern spinlock_t css_set_lock;
- #define task_css_set_check(task, __c)					\
- 	rcu_dereference_check((task)->cgroups,				\
-@@ -708,6 +719,8 @@ struct cgroup;
- static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
- static inline void css_get(struct cgroup_subsys_state *css) {}
- static inline void css_put(struct cgroup_subsys_state *css) {}
-+static inline void cgroup_lock(void) {}
-+static inline void cgroup_unlock(void) {}
- static inline int cgroup_attach_task_all(struct task_struct *from,
- 					 struct task_struct *t) { return 0; }
- static inline int cgroupstats_build(struct cgroupstats *stats,
-diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
-index f2b2296a42f9..4949eda9a9a2 100644
---- a/include/linux/mm_inline.h
-+++ b/include/linux/mm_inline.h
-@@ -106,10 +106,21 @@ static __always_inline enum lru_list folio_lru_list(struct folio *folio)
- 
- #ifdef CONFIG_LRU_GEN
- 
-+#ifdef CONFIG_LRU_GEN_ENABLED
- static inline bool lru_gen_enabled(void)
- {
--	return true;
-+	DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]);
-+
-+	return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]);
- }
-+#else
-+static inline bool lru_gen_enabled(void)
-+{
-+	DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]);
-+
-+	return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]);
-+}
-+#endif
- 
- static inline bool lru_gen_in_fault(void)
- {
-@@ -222,7 +233,7 @@ static inline bool lru_gen_add_folio(struct lruvec *lruvec, struct folio *folio,
- 
- 	VM_WARN_ON_ONCE_FOLIO(gen != -1, folio);
- 
--	if (folio_test_unevictable(folio))
-+	if (folio_test_unevictable(folio) || !lrugen->enabled)
- 		return false;
- 	/*
- 	 * There are three common cases for this page:
-diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
-index 0cf0856b484a..840b7ca8b91f 100644
---- a/include/linux/mmzone.h
-+++ b/include/linux/mmzone.h
-@@ -384,6 +384,13 @@ enum {
- 	LRU_GEN_FILE,
- };
- 
-+enum {
-+	LRU_GEN_CORE,
-+	LRU_GEN_MM_WALK,
-+	LRU_GEN_NONLEAF_YOUNG,
-+	NR_LRU_GEN_CAPS
-+};
-+
- #define MIN_LRU_BATCH		BITS_PER_LONG
- #define MAX_LRU_BATCH		(MIN_LRU_BATCH * 128)
- 
-@@ -425,6 +432,8 @@ struct lru_gen_struct {
- 	/* can be modified without holding the LRU lock */
- 	atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
- 	atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS];
-+	/* whether the multi-gen LRU is enabled */
-+	bool enabled;
- };
- 
- enum {
-diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
-index 5da09c74228d..c966e55cab29 100644
---- a/kernel/cgroup/cgroup-internal.h
-+++ b/kernel/cgroup/cgroup-internal.h
-@@ -164,7 +164,6 @@ struct cgroup_mgctx {
- #define DEFINE_CGROUP_MGCTX(name)						\
- 	struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
- 
--extern struct mutex cgroup_mutex;
- extern spinlock_t css_set_lock;
- extern struct cgroup_subsys *cgroup_subsys[];
- extern struct list_head cgroup_roots;
-diff --git a/mm/Kconfig b/mm/Kconfig
-index a93478acf341..0c2ef0af0036 100644
---- a/mm/Kconfig
-+++ b/mm/Kconfig
-@@ -1139,6 +1139,12 @@ config LRU_GEN
- 	help
- 	  A high performance LRU implementation to overcommit memory.
- 
-+config LRU_GEN_ENABLED
-+	bool "Enable by default"
-+	depends on LRU_GEN
-+	help
-+	  This option enables the multi-gen LRU by default.
-+
- config LRU_GEN_STATS
- 	bool "Full stats for debugging"
- 	depends on LRU_GEN
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index f469a2740835..4c8b475429ed 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -52,6 +52,7 @@
- #include <linux/psi.h>
- #include <linux/pagewalk.h>
- #include <linux/shmem_fs.h>
-+#include <linux/ctype.h>
- 
- #include <asm/tlbflush.h>
- #include <asm/div64.h>
-@@ -3013,6 +3014,14 @@ static bool can_age_anon_pages(struct pglist_data *pgdat,
- 
- #ifdef CONFIG_LRU_GEN
- 
-+#ifdef CONFIG_LRU_GEN_ENABLED
-+DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS);
-+#define get_cap(cap)	static_branch_likely(&lru_gen_caps[cap])
-+#else
-+DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS);
-+#define get_cap(cap)	static_branch_unlikely(&lru_gen_caps[cap])
-+#endif
-+
- /******************************************************************************
-  *                          shorthand helpers
-  ******************************************************************************/
-@@ -3890,7 +3899,8 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area
- 			goto next;
- 
- 		if (!pmd_trans_huge(pmd[i])) {
--			if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG))
-+			if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) &&
-+			    get_cap(LRU_GEN_NONLEAF_YOUNG))
- 				pmdp_test_and_clear_young(vma, addr, pmd + i);
- 			goto next;
- 		}
-@@ -3988,10 +3998,12 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
- 		walk->mm_stats[MM_NONLEAF_TOTAL]++;
- 
- #ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG
--		if (!pmd_young(val))
--			continue;
-+		if (get_cap(LRU_GEN_NONLEAF_YOUNG)) {
-+			if (!pmd_young(val))
-+				continue;
- 
--		walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
-+			walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos);
-+		}
- #endif
- 		if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i))
- 			continue;
-@@ -4249,7 +4261,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
- 	 * handful of PTEs. Spreading the work out over a period of time usually
- 	 * is less efficient, but it avoids bursty page faults.
- 	 */
--	if (!arch_has_hw_pte_young()) {
-+	if (!(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
- 		success = iterate_mm_list_nowalk(lruvec, max_seq);
- 		goto done;
- 	}
-@@ -4975,6 +4987,211 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
- 	blk_finish_plug(&plug);
- }
- 
-+/******************************************************************************
-+ *                          state change
-+ ******************************************************************************/
-+
-+static bool __maybe_unused state_is_valid(struct lruvec *lruvec)
-+{
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+
-+	if (lrugen->enabled) {
-+		enum lru_list lru;
-+
-+		for_each_evictable_lru(lru) {
-+			if (!list_empty(&lruvec->lists[lru]))
-+				return false;
-+		}
-+	} else {
-+		int gen, type, zone;
-+
-+		for_each_gen_type_zone(gen, type, zone) {
-+			if (!list_empty(&lrugen->lists[gen][type][zone]))
-+				return false;
-+
-+			/* unlikely but not a bug when reset_batch_size() is pending */
-+			VM_WARN_ON_ONCE(lrugen->nr_pages[gen][type][zone]);
-+		}
-+	}
-+
-+	return true;
-+}
-+
-+static bool fill_evictable(struct lruvec *lruvec)
-+{
-+	enum lru_list lru;
-+	int remaining = MAX_LRU_BATCH;
-+
-+	for_each_evictable_lru(lru) {
-+		int type = is_file_lru(lru);
-+		bool active = is_active_lru(lru);
-+		struct list_head *head = &lruvec->lists[lru];
-+
-+		while (!list_empty(head)) {
-+			bool success;
-+			struct folio *folio = lru_to_folio(head);
-+
-+			VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
-+			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio) != active, folio);
-+			VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
-+			VM_WARN_ON_ONCE_FOLIO(folio_lru_gen(folio) != -1, folio);
-+
-+			lruvec_del_folio(lruvec, folio);
-+			success = lru_gen_add_folio(lruvec, folio, false);
-+			VM_WARN_ON_ONCE(!success);
-+
-+			if (!--remaining)
-+				return false;
-+		}
-+	}
-+
-+	return true;
-+}
-+
-+static bool drain_evictable(struct lruvec *lruvec)
-+{
-+	int gen, type, zone;
-+	int remaining = MAX_LRU_BATCH;
-+
-+	for_each_gen_type_zone(gen, type, zone) {
-+		struct list_head *head = &lruvec->lrugen.lists[gen][type][zone];
-+
-+		while (!list_empty(head)) {
-+			bool success;
-+			struct folio *folio = lru_to_folio(head);
-+
-+			VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
-+			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
-+			VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
-+			VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
-+
-+			success = lru_gen_del_folio(lruvec, folio, false);
-+			VM_WARN_ON_ONCE(!success);
-+			lruvec_add_folio(lruvec, folio);
-+
-+			if (!--remaining)
-+				return false;
-+		}
-+	}
-+
-+	return true;
-+}
-+
-+static void lru_gen_change_state(bool enabled)
-+{
-+	static DEFINE_MUTEX(state_mutex);
-+
-+	struct mem_cgroup *memcg;
-+
-+	cgroup_lock();
-+	cpus_read_lock();
-+	get_online_mems();
-+	mutex_lock(&state_mutex);
-+
-+	if (enabled == lru_gen_enabled())
-+		goto unlock;
-+
-+	if (enabled)
-+		static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
-+	else
-+		static_branch_disable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]);
-+
-+	memcg = mem_cgroup_iter(NULL, NULL, NULL);
-+	do {
-+		int nid;
-+
-+		for_each_node(nid) {
-+			struct lruvec *lruvec = get_lruvec(memcg, nid);
-+
-+			if (!lruvec)
-+				continue;
-+
-+			spin_lock_irq(&lruvec->lru_lock);
-+
-+			VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
-+			VM_WARN_ON_ONCE(!state_is_valid(lruvec));
-+
-+			lruvec->lrugen.enabled = enabled;
-+
-+			while (!(enabled ? fill_evictable(lruvec) : drain_evictable(lruvec))) {
-+				spin_unlock_irq(&lruvec->lru_lock);
-+				cond_resched();
-+				spin_lock_irq(&lruvec->lru_lock);
-+			}
-+
-+			spin_unlock_irq(&lruvec->lru_lock);
-+		}
-+
-+		cond_resched();
-+	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
-+unlock:
-+	mutex_unlock(&state_mutex);
-+	put_online_mems();
-+	cpus_read_unlock();
-+	cgroup_unlock();
-+}
-+
-+/******************************************************************************
-+ *                          sysfs interface
-+ ******************************************************************************/
-+
-+static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
-+{
-+	unsigned int caps = 0;
-+
-+	if (get_cap(LRU_GEN_CORE))
-+		caps |= BIT(LRU_GEN_CORE);
-+
-+	if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))
-+		caps |= BIT(LRU_GEN_MM_WALK);
-+
-+	if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG))
-+		caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
-+
-+	return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
-+}
-+
-+static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr,
-+			     const char *buf, size_t len)
-+{
-+	int i;
-+	unsigned int caps;
-+
-+	if (tolower(*buf) == 'n')
-+		caps = 0;
-+	else if (tolower(*buf) == 'y')
-+		caps = -1;
-+	else if (kstrtouint(buf, 0, &caps))
-+		return -EINVAL;
-+
-+	for (i = 0; i < NR_LRU_GEN_CAPS; i++) {
-+		bool enabled = caps & BIT(i);
-+
-+		if (i == LRU_GEN_CORE)
-+			lru_gen_change_state(enabled);
-+		else if (enabled)
-+			static_branch_enable(&lru_gen_caps[i]);
-+		else
-+			static_branch_disable(&lru_gen_caps[i]);
-+	}
-+
-+	return len;
-+}
-+
-+static struct kobj_attribute lru_gen_enabled_attr = __ATTR(
-+	enabled, 0644, show_enabled, store_enabled
-+);
-+
-+static struct attribute *lru_gen_attrs[] = {
-+	&lru_gen_enabled_attr.attr,
-+	NULL
-+};
-+
-+static struct attribute_group lru_gen_attr_group = {
-+	.name = "lru_gen",
-+	.attrs = lru_gen_attrs,
-+};
-+
- /******************************************************************************
-  *                          initialization
-  ******************************************************************************/
-@@ -4985,6 +5202,7 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
- 	struct lru_gen_struct *lrugen = &lruvec->lrugen;
- 
- 	lrugen->max_seq = MIN_NR_GENS + 1;
-+	lrugen->enabled = lru_gen_enabled();
- 
- 	for_each_gen_type_zone(gen, type, zone)
- 		INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
-@@ -5024,6 +5242,9 @@ static int __init init_lru_gen(void)
- 	BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
- 	BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
- 
-+	if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
-+		pr_err("lru_gen: failed to create sysfs group\n");
-+
- 	return 0;
- };
- late_initcall(init_lru_gen);
-
-From patchwork Wed Jul  6 22:00:20 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908708
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id E259FCCA47C
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:01:17 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id 1A2F98E0007; Wed,  6 Jul 2022 18:01:09 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id 153298E0001; Wed,  6 Jul 2022 18:01:09 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id EE7558E0007; Wed,  6 Jul 2022 18:01:08 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0016.hostedemail.com
- [216.40.44.16])
-	by kanga.kvack.org (Postfix) with ESMTP id DAE728E0001
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:01:08 -0400 (EDT)
-Received: from smtpin28.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay12.hostedemail.com (Postfix) with ESMTP id BCB43120606
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:08 +0000 (UTC)
-X-FDA: 79658046216.28.7964C66
-Received: from mail-yb1-f201.google.com (mail-yb1-f201.google.com
- [209.85.219.201])
-	by imf22.hostedemail.com (Postfix) with ESMTP id 5376DC0059
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:08 +0000 (UTC)
-Received: by mail-yb1-f201.google.com with SMTP id
- k18-20020a25fe12000000b0066e21b72767so9506499ybe.5
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:01:08 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=m3EW4cfAlntTqnxn3SvhsZvF1ytN+sfDtB6iRdzihvY=;
-        b=NNZxOJisLedvEph13coGoCeVo89XYF3cKhoLr0Qj+8EQSroRh25w+qZuSGaKvrNfmO
-         djUv79dYHeRCliQ2lBYEsuuPJN6lgSZ6cKW987LKYkUaRIiHw552kndr1VR1raRgUvCU
-         568te5aggKYg95okJZ0cLsdFaiOBB18/hCGgU+4bQM73SosPCL/NpSqGWL8mW9AiVFs+
-         hT7ErHYOnMn+bCDzuk8GAu9J4/5Gq8c/6z9M6D6X+HmVK0MeVpaKpZ0jPz/vsi747v3J
-         zvNibUS9XJKNBhR7/Fg26FpINdlMkWHvvcikRiTD5O+czcMeNF2XfnGAvAgAPgyPnYK8
-         b6mw==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=m3EW4cfAlntTqnxn3SvhsZvF1ytN+sfDtB6iRdzihvY=;
-        b=zXNzXRZtXF7HqnKj+YqGY7LRRVq6lHVBV5jsPt3MSSmDDGJn6CoeLbEp4cPtjTwO6B
-         PdPka308tTjCbbT5NueJUGYkQFn+dt6QZPZS/jb4O/Jp4FJYfjG0o4VrhF0wFgETWdJW
-         SlKTi07ik53nd5tJXcgBVdPvMoYv2WqllknYQA/iDN2/SNhuFxoKXFeHv/5ulZkc5nBp
-         SmXgJE0BppobJXNXNvFGVF0nGLh8MGF2CDBRi/+lvRIg3ypzFxQ/hVUXa8U2PNCdICqT
-         s8aNXeAFHJX2x34DN+/C6pMD4gEu9krSDK9BNN79fEhVT7obeGbxtKJ4DZnLSgUa0Xm+
-         emKQ==
-X-Gm-Message-State: AJIora+LcODNzO0M8diNM7f7lT7CGqqvE1q+GRK1vGVAVA10uhTltSsu
-	Casv9y2GuL6ljruRdbC60eu1gck5MIA=
-X-Google-Smtp-Source: 
- AGRyM1uVFLai60fRsrxUz+UveX+2HvTnchQxr73gyI+bA9ud92MMOTkT47lvZz9+aNC2VPhD8jfbEwKxJDM=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a25:1c56:0:b0:66e:2d23:d65d with SMTP id
- c83-20020a251c56000000b0066e2d23d65dmr26931039ybc.253.1657144867700; Wed, 06
- Jul 2022 15:01:07 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:20 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-12-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 11/14] mm: multi-gen LRU: thrashing prevention
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Brian Geffon <bgeffon@google.com>,
- Jan Alexander Steffens <heftig@archlinux.org>,
-  Oleksandr Natalenko <oleksandr@natalenko.name>,
- Steven Barrett <steven@liquorix.net>,
-  Suleiman Souhlal <suleiman@google.com>, Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657144868; a=rsa-sha256;
-	cv=none;
-	b=8QjwJzQPm7r/G+Ug8d4Bn/JrZtirxW14NE/TPM5Yuz8TtgqfXHSgDZ0NZs+0NMnmPdFebK
-	BewOSgj/R+9PisPRBLUEepAkTTAjyW6prOGRhTAKigLh6I3aJOU2/+iXQO+eUvhPXHNnMf
-	KRnEXerAaMeP1dBwH0VFivF74hvg2OQ=
-ARC-Authentication-Results: i=1;
-	imf22.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=NNZxOJis;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf22.hostedemail.com: domain of
- 3IwbGYgYKCGQaWbJCQIQQING.EQONKPWZ-OOMXCEM.QTI@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3IwbGYgYKCGQaWbJCQIQQING.EQONKPWZ-OOMXCEM.QTI@flex--yuzhao.bounces.google.com
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657144868;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=m3EW4cfAlntTqnxn3SvhsZvF1ytN+sfDtB6iRdzihvY=;
-	b=DTQGqCEN2saKpCn2Rlj0DwxYUYns5aLH6ctyLw23CxaYk5FVEKFifd/4msPagn2x3OyYoJ
-	IHUvwyXUjQkcTa1cZQoQjZtTkZ5tAB3HGWKknBtj00SV590QYCz1tvu/9DdrTQBAJJQVkL
-	NDOvAf+Q5C0pIHmFotDqxriphq5nQvg=
-X-Stat-Signature: ob4t1mrtn1zzw1fgdc6fyo685ij649e9
-X-Rspam-User: 
-X-Rspamd-Server: rspam12
-X-Rspamd-Queue-Id: 5376DC0059
-Authentication-Results: imf22.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=NNZxOJis;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf22.hostedemail.com: domain of
- 3IwbGYgYKCGQaWbJCQIQQING.EQONKPWZ-OOMXCEM.QTI@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3IwbGYgYKCGQaWbJCQIQQING.EQONKPWZ-OOMXCEM.QTI@flex--yuzhao.bounces.google.com
-X-HE-Tag: 1657144868-301835
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-Add /sys/kernel/mm/lru_gen/min_ttl_ms for thrashing prevention, as
-requested by many desktop users [1].
-
-When set to value N, it prevents the working set of N milliseconds
-from getting evicted. The OOM killer is triggered if this working set
-cannot be kept in memory. Based on the average human detectable lag
-(~100ms), N=1000 usually eliminates intolerable lags due to thrashing.
-Larger values like N=3000 make lags less noticeable at the risk of
-premature OOM kills.
-
-Compared with the size-based approach [2], this time-based approach
-has the following advantages:
-1. It is easier to configure because it is agnostic to applications
-   and memory sizes.
-2. It is more reliable because it is directly wired to the OOM killer.
-
-[1] https://lore.kernel.org/r/Ydza%2FzXKY9ATRoh6@google.com/
-[2] https://lore.kernel.org/r/20101028191523.GA14972@google.com/
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
----
- include/linux/mmzone.h |  2 ++
- mm/vmscan.c            | 71 +++++++++++++++++++++++++++++++++++++++---
- 2 files changed, 69 insertions(+), 4 deletions(-)
-
-diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
-index 840b7ca8b91f..472bd5335517 100644
---- a/include/linux/mmzone.h
-+++ b/include/linux/mmzone.h
-@@ -419,6 +419,8 @@ struct lru_gen_struct {
- 	unsigned long max_seq;
- 	/* the eviction increments the oldest generation numbers */
- 	unsigned long min_seq[ANON_AND_FILE];
-+	/* the birth time of each generation in jiffies */
-+	unsigned long timestamps[MAX_NR_GENS];
- 	/* the multi-gen LRU lists, lazily sorted on eviction */
- 	struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
- 	/* the multi-gen LRU sizes, eventually consistent */
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 4c8b475429ed..1f2892a0dc41 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -4233,6 +4233,7 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap)
- 	for (type = 0; type < ANON_AND_FILE; type++)
- 		reset_ctrl_pos(lruvec, type, false);
- 
-+	WRITE_ONCE(lrugen->timestamps[next], jiffies);
- 	/* make sure preceding modifications appear */
- 	smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
- 
-@@ -4359,7 +4360,7 @@ static unsigned long get_nr_evictable(struct lruvec *lruvec, unsigned long max_s
- 	return total;
- }
- 
--static void age_lruvec(struct lruvec *lruvec, struct scan_control *sc)
-+static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl)
- {
- 	bool need_aging;
- 	unsigned long nr_to_scan;
-@@ -4373,21 +4374,40 @@ static void age_lruvec(struct lruvec *lruvec, struct scan_control *sc)
- 	mem_cgroup_calculate_protection(NULL, memcg);
- 
- 	if (mem_cgroup_below_min(memcg))
--		return;
-+		return false;
- 
- 	nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, swappiness, &need_aging);
- 	if (!nr_to_scan)
--		return;
-+		return false;
- 
- 	nr_to_scan >>= mem_cgroup_online(memcg) ? sc->priority : 0;
- 
-+	if (min_ttl) {
-+		int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
-+		unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
-+
-+		if (time_is_after_jiffies(birth + min_ttl))
-+			return false;
-+
-+		/* the size is likely too small to be helpful */
-+		if (!nr_to_scan && sc->priority != DEF_PRIORITY)
-+			return false;
-+	}
-+
- 	if (nr_to_scan && need_aging)
- 		try_to_inc_max_seq(lruvec, max_seq, sc, swappiness);
-+
-+	return true;
- }
- 
-+/* to protect the working set of the last N jiffies */
-+static unsigned long lru_gen_min_ttl __read_mostly;
-+
- static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
- {
- 	struct mem_cgroup *memcg;
-+	bool success = false;
-+	unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
- 
- 	VM_WARN_ON_ONCE(!current_is_kswapd());
- 
-@@ -4413,12 +4433,28 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
- 	do {
- 		struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
- 
--		age_lruvec(lruvec, sc);
-+		if (age_lruvec(lruvec, sc, min_ttl))
-+			success = true;
- 
- 		cond_resched();
- 	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
- 
- 	clear_mm_walk();
-+
-+	/*
-+	 * The main goal is to OOM kill if every generation from all memcgs is
-+	 * younger than min_ttl. However, another theoretical possibility is all
-+	 * memcgs are either below min or empty.
-+	 */
-+	if (!success && !sc->order && mutex_trylock(&oom_lock)) {
-+		struct oom_control oc = {
-+			.gfp_mask = sc->gfp_mask,
-+		};
-+
-+		out_of_memory(&oc);
-+
-+		mutex_unlock(&oom_lock);
-+	}
- }
- 
- /*
-@@ -5135,6 +5171,28 @@ static void lru_gen_change_state(bool enabled)
-  *                          sysfs interface
-  ******************************************************************************/
- 
-+static ssize_t show_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
-+{
-+	return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl)));
-+}
-+
-+static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr,
-+			     const char *buf, size_t len)
-+{
-+	unsigned int msecs;
-+
-+	if (kstrtouint(buf, 0, &msecs))
-+		return -EINVAL;
-+
-+	WRITE_ONCE(lru_gen_min_ttl, msecs_to_jiffies(msecs));
-+
-+	return len;
-+}
-+
-+static struct kobj_attribute lru_gen_min_ttl_attr = __ATTR(
-+	min_ttl_ms, 0644, show_min_ttl, store_min_ttl
-+);
-+
- static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
- {
- 	unsigned int caps = 0;
-@@ -5183,6 +5241,7 @@ static struct kobj_attribute lru_gen_enabled_attr = __ATTR(
- );
- 
- static struct attribute *lru_gen_attrs[] = {
-+	&lru_gen_min_ttl_attr.attr,
- 	&lru_gen_enabled_attr.attr,
- 	NULL
- };
-@@ -5198,12 +5257,16 @@ static struct attribute_group lru_gen_attr_group = {
- 
- void lru_gen_init_lruvec(struct lruvec *lruvec)
- {
-+	int i;
- 	int gen, type, zone;
- 	struct lru_gen_struct *lrugen = &lruvec->lrugen;
- 
- 	lrugen->max_seq = MIN_NR_GENS + 1;
- 	lrugen->enabled = lru_gen_enabled();
- 
-+	for (i = 0; i <= MIN_NR_GENS + 1; i++)
-+		lrugen->timestamps[i] = jiffies;
-+
- 	for_each_gen_type_zone(gen, type, zone)
- 		INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]);
- 
-
-From patchwork Wed Jul  6 22:00:21 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908710
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id 69F71CCA47C
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:01:25 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id 57B768E0009; Wed,  6 Jul 2022 18:01:12 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id 4D6338E0001; Wed,  6 Jul 2022 18:01:12 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id 3007F8E0009; Wed,  6 Jul 2022 18:01:12 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0012.hostedemail.com
- [216.40.44.12])
-	by kanga.kvack.org (Postfix) with ESMTP id 1256B8E0001
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:01:12 -0400 (EDT)
-Received: from smtpin31.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay09.hostedemail.com (Postfix) with ESMTP id AB0F535EC6
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:11 +0000 (UTC)
-X-FDA: 79658046342.31.A60FB64
-Received: from mail-yb1-f201.google.com (mail-yb1-f201.google.com
- [209.85.219.201])
-	by imf26.hostedemail.com (Postfix) with ESMTP id 4A6EA140028
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:10 +0000 (UTC)
-Received: by mail-yb1-f201.google.com with SMTP id
- p7-20020a25d807000000b0066e36989a90so7892676ybg.8
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:01:10 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=nff1jLrA4AEpo88lpO2ZCXRvuzs0CKl/TI+ofmEg1y8=;
-        b=CPQvXMErOqHr1LM+OMqtT0F59XyB+HiQxBX+EbwoUSnPn/FOpbR4dV1NCCwYakR+KD
-         gThfZIfqp3Y1SzCO2443reP2Soe3KDHNgAEXCZ5YNoeE7AXlAuA2fgD7YeAXZovjmVIh
-         7mERrjTMT6/EWjW531e5FNoxfhaMBEMBEgwjAOQ3Km57LeRgBcWr2IgRe48XaW69M16C
-         KWj2PGLEmurhGwwHU4NVVPpbjL3o7cE3vD/yehuUCz476hIOcC2Nqpn4krz36H5vP68u
-         MNeJkhynrE7FhYi7+GgffibtX96Vf3x/16YGAxyUCnSyvvk6OhNUeqKo/LQmoS3LAyl4
-         LFpw==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=nff1jLrA4AEpo88lpO2ZCXRvuzs0CKl/TI+ofmEg1y8=;
-        b=VL/nujGONvdvil9k1He7hpkq5LhpScFPlvGxIB7gfBV4qX15+ZTbNG009jHkmfEswJ
-         LM40W6DV3mGXjx6Gy2MTjobH0jL4c9qrU1ia5WRKzWkXlxaCkDE82vwuaz7rycBaAiPt
-         JhRi9ADSMoA9G43MZZei2oSwmUoW9WcH4Umy1YImLdHAjkYdJQ+Ss3Q4uYfGGw3866qm
-         nfc0pXT5KiNC2DMr+Cla/Llx1WlFNi7QIf3AmdpJ9gZTxCC28ikjniRVZN6b7bTrvjnO
-         iEyt9jKYEk9vW/yTUEzM8L41D+e+Z60AT6T0qi0KACO4Tp3xz77ui1i2Q85btfHs4Uah
-         1qwQ==
-X-Gm-Message-State: AJIora+YmPGCc9b5W8VNnqsviDKSYwcLGbNwLNCyRBey3F3rMvUMSj7O
-	PlfF3OKJjs3zxsBvgcOgTGWclCLXuc4=
-X-Google-Smtp-Source: 
- AGRyM1shBkUATwCAbsz8cAeEoY3s7WAj+Jhs0L0rMlWdOOLCX8yRP4QO9OI90Aiszy92GtEPUW7W76UGd7w=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a0d:c486:0:b0:31c:3b63:91fe with SMTP id
- g128-20020a0dc486000000b0031c3b6391femr43427605ywd.7.1657144869573; Wed, 06
- Jul 2022 15:01:09 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:21 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-13-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 12/14] mm: multi-gen LRU: debugfs interface
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Qi Zheng <zhengqi.arch@bytedance.com>, Brian Geffon <bgeffon@google.com>,
-  Jan Alexander Steffens <heftig@archlinux.org>,
- Oleksandr Natalenko <oleksandr@natalenko.name>,
-  Steven Barrett <steven@liquorix.net>,
- Suleiman Souhlal <suleiman@google.com>,  Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657144870; a=rsa-sha256;
-	cv=none;
-	b=o2/nINgmuZxzIEJU+aSCudJDHKhL7ULIt8sF3JC62cV/HuuBLU0B/xVeMeA2f0cfJN2wtO
-	kh2UubWWhgsh8V4Cx5XQQfC0fnIjU7kesFrHJslbwx0sV7BFvFu/mCMwBdA2zb0NjFYQ+H
-	9ov/Z61nItyp9dvXEOPZKlu3qRCy8D4=
-ARC-Authentication-Results: i=1;
-	imf26.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=CPQvXMEr;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf26.hostedemail.com: domain of
- 3JQbGYgYKCGYcYdLESKSSKPI.GSQPMRYb-QQOZEGO.SVK@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3JQbGYgYKCGYcYdLESKSSKPI.GSQPMRYb-QQOZEGO.SVK@flex--yuzhao.bounces.google.com
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657144870;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=nff1jLrA4AEpo88lpO2ZCXRvuzs0CKl/TI+ofmEg1y8=;
-	b=RI+I7W6K9d5xsUHY54+KeCzGoeOqxuVYKkoikwvrtSbya6NSfQOh7+EFtaBhpVNiDwQMte
-	1gOSPtlHmqa//TuxixmT7E3h+4+bbMuck8gjgOl+LEQXqAO7KWKyE6sirgzmwX5HwXk8e5
-	zWZIZi2rLOPaapJlUtXn2+31FvtGh1c=
-Authentication-Results: imf26.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=CPQvXMEr;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf26.hostedemail.com: domain of
- 3JQbGYgYKCGYcYdLESKSSKPI.GSQPMRYb-QQOZEGO.SVK@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3JQbGYgYKCGYcYdLESKSSKPI.GSQPMRYb-QQOZEGO.SVK@flex--yuzhao.bounces.google.com
-X-Stat-Signature: oqpxscpz6ano7mm34xg1zaoyrcimtdxo
-X-Rspamd-Queue-Id: 4A6EA140028
-X-Rspamd-Server: rspam05
-X-Rspam-User: 
-X-HE-Tag: 1657144870-268992
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-Add /sys/kernel/debug/lru_gen for working set estimation and proactive
-reclaim. These techniques are commonly used to optimize job scheduling
-(bin packing) in data centers [1][2].
-
-Compared with the page table-based approach and the PFN-based
-approach, this lruvec-based approach has the following advantages:
-1. It offers better choices because it is aware of memcgs, NUMA nodes,
-   shared mappings and unmapped page cache.
-2. It is more scalable because it is O(nr_hot_pages), whereas the
-   PFN-based approach is O(nr_total_pages).
-
-Add /sys/kernel/debug/lru_gen_full for debugging.
-
-[1] https://dl.acm.org/doi/10.1145/3297858.3304053
-[2] https://dl.acm.org/doi/10.1145/3503222.3507731
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Reviewed-by: Qi Zheng <zhengqi.arch@bytedance.com>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
----
- include/linux/nodemask.h |   1 +
- mm/vmscan.c              | 411 ++++++++++++++++++++++++++++++++++++++-
- 2 files changed, 402 insertions(+), 10 deletions(-)
-
-diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
-index 0f233b76c9ce..292ec0ce0d63 100644
---- a/include/linux/nodemask.h
-+++ b/include/linux/nodemask.h
-@@ -485,6 +485,7 @@ static inline int num_node_state(enum node_states state)
- #define first_online_node	0
- #define first_memory_node	0
- #define next_online_node(nid)	(MAX_NUMNODES)
-+#define next_memory_node(nid)	(MAX_NUMNODES)
- #define nr_node_ids		1U
- #define nr_online_nodes		1U
- 
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index 1f2892a0dc41..fbcd298adca7 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -53,6 +53,7 @@
- #include <linux/pagewalk.h>
- #include <linux/shmem_fs.h>
- #include <linux/ctype.h>
-+#include <linux/debugfs.h>
- 
- #include <asm/tlbflush.h>
- #include <asm/div64.h>
-@@ -4137,12 +4138,40 @@ static void clear_mm_walk(void)
- 		kfree(walk);
- }
- 
--static void inc_min_seq(struct lruvec *lruvec, int type)
-+static bool inc_min_seq(struct lruvec *lruvec, int type, bool can_swap)
- {
-+	int zone;
-+	int remaining = MAX_LRU_BATCH;
- 	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+	int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]);
- 
-+	if (type == LRU_GEN_ANON && !can_swap)
-+		goto done;
-+
-+	/* prevent cold/hot inversion if force_scan is true */
-+	for (zone = 0; zone < MAX_NR_ZONES; zone++) {
-+		struct list_head *head = &lrugen->lists[old_gen][type][zone];
-+
-+		while (!list_empty(head)) {
-+			struct folio *folio = lru_to_folio(head);
-+
-+			VM_WARN_ON_ONCE_FOLIO(folio_test_unevictable(folio), folio);
-+			VM_WARN_ON_ONCE_FOLIO(folio_test_active(folio), folio);
-+			VM_WARN_ON_ONCE_FOLIO(folio_is_file_lru(folio) != type, folio);
-+			VM_WARN_ON_ONCE_FOLIO(folio_zonenum(folio) != zone, folio);
-+
-+			new_gen = folio_inc_gen(lruvec, folio, false);
-+			list_move_tail(&folio->lru, &lrugen->lists[new_gen][type][zone]);
-+
-+			if (!--remaining)
-+				return false;
-+		}
-+	}
-+done:
- 	reset_ctrl_pos(lruvec, type, true);
- 	WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1);
-+
-+	return true;
- }
- 
- static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
-@@ -4188,7 +4217,7 @@ static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap)
- 	return success;
- }
- 
--static void inc_max_seq(struct lruvec *lruvec, bool can_swap)
-+static void inc_max_seq(struct lruvec *lruvec, bool can_swap, bool force_scan)
- {
- 	int prev, next;
- 	int type, zone;
-@@ -4202,9 +4231,13 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap)
- 		if (get_nr_gens(lruvec, type) != MAX_NR_GENS)
- 			continue;
- 
--		VM_WARN_ON_ONCE(type == LRU_GEN_FILE || can_swap);
-+		VM_WARN_ON_ONCE(!force_scan && (type == LRU_GEN_FILE || can_swap));
- 
--		inc_min_seq(lruvec, type);
-+		while (!inc_min_seq(lruvec, type, can_swap)) {
-+			spin_unlock_irq(&lruvec->lru_lock);
-+			cond_resched();
-+			spin_lock_irq(&lruvec->lru_lock);
-+		}
- 	}
- 
- 	/*
-@@ -4241,7 +4274,7 @@ static void inc_max_seq(struct lruvec *lruvec, bool can_swap)
- }
- 
- static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
--			       struct scan_control *sc, bool can_swap)
-+			       struct scan_control *sc, bool can_swap, bool force_scan)
- {
- 	bool success;
- 	struct lru_gen_mm_walk *walk;
-@@ -4262,7 +4295,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
- 	 * handful of PTEs. Spreading the work out over a period of time usually
- 	 * is less efficient, but it avoids bursty page faults.
- 	 */
--	if (!(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
-+	if (!force_scan && !(arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK))) {
- 		success = iterate_mm_list_nowalk(lruvec, max_seq);
- 		goto done;
- 	}
-@@ -4276,7 +4309,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
- 	walk->lruvec = lruvec;
- 	walk->max_seq = max_seq;
- 	walk->can_swap = can_swap;
--	walk->force_scan = false;
-+	walk->force_scan = force_scan;
- 
- 	do {
- 		success = iterate_mm_list(lruvec, walk, &mm);
-@@ -4296,7 +4329,7 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq,
- 
- 	VM_WARN_ON_ONCE(max_seq != READ_ONCE(lrugen->max_seq));
- 
--	inc_max_seq(lruvec, can_swap);
-+	inc_max_seq(lruvec, can_swap, force_scan);
- 	/* either this sees any waiters or they will see updated max_seq */
- 	if (wq_has_sleeper(&lruvec->mm_state.wait))
- 		wake_up_all(&lruvec->mm_state.wait);
-@@ -4395,7 +4428,7 @@ static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned
- 	}
- 
- 	if (nr_to_scan && need_aging)
--		try_to_inc_max_seq(lruvec, max_seq, sc, swappiness);
-+		try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false);
- 
- 	return true;
- }
-@@ -4962,7 +4995,7 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *
- 	if (current_is_kswapd())
- 		return 0;
- 
--	if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap))
-+	if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false))
- 		return nr_to_scan;
- done:
- 	return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0;
-@@ -5251,6 +5284,361 @@ static struct attribute_group lru_gen_attr_group = {
- 	.attrs = lru_gen_attrs,
- };
- 
-+/******************************************************************************
-+ *                          debugfs interface
-+ ******************************************************************************/
-+
-+static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos)
-+{
-+	struct mem_cgroup *memcg;
-+	loff_t nr_to_skip = *pos;
-+
-+	m->private = kvmalloc(PATH_MAX, GFP_KERNEL);
-+	if (!m->private)
-+		return ERR_PTR(-ENOMEM);
-+
-+	memcg = mem_cgroup_iter(NULL, NULL, NULL);
-+	do {
-+		int nid;
-+
-+		for_each_node_state(nid, N_MEMORY) {
-+			if (!nr_to_skip--)
-+				return get_lruvec(memcg, nid);
-+		}
-+	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
-+
-+	return NULL;
-+}
-+
-+static void lru_gen_seq_stop(struct seq_file *m, void *v)
-+{
-+	if (!IS_ERR_OR_NULL(v))
-+		mem_cgroup_iter_break(NULL, lruvec_memcg(v));
-+
-+	kvfree(m->private);
-+	m->private = NULL;
-+}
-+
-+static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos)
-+{
-+	int nid = lruvec_pgdat(v)->node_id;
-+	struct mem_cgroup *memcg = lruvec_memcg(v);
-+
-+	++*pos;
-+
-+	nid = next_memory_node(nid);
-+	if (nid == MAX_NUMNODES) {
-+		memcg = mem_cgroup_iter(NULL, memcg, NULL);
-+		if (!memcg)
-+			return NULL;
-+
-+		nid = first_memory_node;
-+	}
-+
-+	return get_lruvec(memcg, nid);
-+}
-+
-+static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
-+				  unsigned long max_seq, unsigned long *min_seq,
-+				  unsigned long seq)
-+{
-+	int i;
-+	int type, tier;
-+	int hist = lru_hist_from_seq(seq);
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+
-+	for (tier = 0; tier < MAX_NR_TIERS; tier++) {
-+		seq_printf(m, "            %10d", tier);
-+		for (type = 0; type < ANON_AND_FILE; type++) {
-+			const char *s = "   ";
-+			unsigned long n[3] = {};
-+
-+			if (seq == max_seq) {
-+				s = "RT ";
-+				n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]);
-+				n[1] = READ_ONCE(lrugen->avg_total[type][tier]);
-+			} else if (seq == min_seq[type] || NR_HIST_GENS > 1) {
-+				s = "rep";
-+				n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]);
-+				n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]);
-+				if (tier)
-+					n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]);
-+			}
-+
-+			for (i = 0; i < 3; i++)
-+				seq_printf(m, " %10lu%c", n[i], s[i]);
-+		}
-+		seq_putc(m, '\n');
-+	}
-+
-+	seq_puts(m, "                      ");
-+	for (i = 0; i < NR_MM_STATS; i++) {
-+		const char *s = "      ";
-+		unsigned long n = 0;
-+
-+		if (seq == max_seq && NR_HIST_GENS == 1) {
-+			s = "LOYNFA";
-+			n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
-+		} else if (seq != max_seq && NR_HIST_GENS > 1) {
-+			s = "loynfa";
-+			n = READ_ONCE(lruvec->mm_state.stats[hist][i]);
-+		}
-+
-+		seq_printf(m, " %10lu%c", n, s[i]);
-+	}
-+	seq_putc(m, '\n');
-+}
-+
-+static int lru_gen_seq_show(struct seq_file *m, void *v)
-+{
-+	unsigned long seq;
-+	bool full = !debugfs_real_fops(m->file)->write;
-+	struct lruvec *lruvec = v;
-+	struct lru_gen_struct *lrugen = &lruvec->lrugen;
-+	int nid = lruvec_pgdat(lruvec)->node_id;
-+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
-+	DEFINE_MAX_SEQ(lruvec);
-+	DEFINE_MIN_SEQ(lruvec);
-+
-+	if (nid == first_memory_node) {
-+		const char *path = memcg ? m->private : "";
-+
-+#ifdef CONFIG_MEMCG
-+		if (memcg)
-+			cgroup_path(memcg->css.cgroup, m->private, PATH_MAX);
-+#endif
-+		seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path);
-+	}
-+
-+	seq_printf(m, " node %5d\n", nid);
-+
-+	if (!full)
-+		seq = min_seq[LRU_GEN_ANON];
-+	else if (max_seq >= MAX_NR_GENS)
-+		seq = max_seq - MAX_NR_GENS + 1;
-+	else
-+		seq = 0;
-+
-+	for (; seq <= max_seq; seq++) {
-+		int type, zone;
-+		int gen = lru_gen_from_seq(seq);
-+		unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
-+
-+		seq_printf(m, " %10lu %10u", seq, jiffies_to_msecs(jiffies - birth));
-+
-+		for (type = 0; type < ANON_AND_FILE; type++) {
-+			unsigned long size = 0;
-+			char mark = full && seq < min_seq[type] ? 'x' : ' ';
-+
-+			for (zone = 0; zone < MAX_NR_ZONES; zone++)
-+				size += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L);
-+
-+			seq_printf(m, " %10lu%c", size, mark);
-+		}
-+
-+		seq_putc(m, '\n');
-+
-+		if (full)
-+			lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq);
-+	}
-+
-+	return 0;
-+}
-+
-+static const struct seq_operations lru_gen_seq_ops = {
-+	.start = lru_gen_seq_start,
-+	.stop = lru_gen_seq_stop,
-+	.next = lru_gen_seq_next,
-+	.show = lru_gen_seq_show,
-+};
-+
-+static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
-+		     bool can_swap, bool force_scan)
-+{
-+	DEFINE_MAX_SEQ(lruvec);
-+	DEFINE_MIN_SEQ(lruvec);
-+
-+	if (seq < max_seq)
-+		return 0;
-+
-+	if (seq > max_seq)
-+		return -EINVAL;
-+
-+	if (!force_scan && min_seq[!can_swap] + MAX_NR_GENS - 1 <= max_seq)
-+		return -ERANGE;
-+
-+	try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, force_scan);
-+
-+	return 0;
-+}
-+
-+static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc,
-+			int swappiness, unsigned long nr_to_reclaim)
-+{
-+	DEFINE_MAX_SEQ(lruvec);
-+
-+	if (seq + MIN_NR_GENS > max_seq)
-+		return -EINVAL;
-+
-+	sc->nr_reclaimed = 0;
-+
-+	while (!signal_pending(current)) {
-+		DEFINE_MIN_SEQ(lruvec);
-+
-+		if (seq < min_seq[!swappiness])
-+			return 0;
-+
-+		if (sc->nr_reclaimed >= nr_to_reclaim)
-+			return 0;
-+
-+		if (!evict_folios(lruvec, sc, swappiness, NULL))
-+			return 0;
-+
-+		cond_resched();
-+	}
-+
-+	return -EINTR;
-+}
-+
-+static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq,
-+		   struct scan_control *sc, int swappiness, unsigned long opt)
-+{
-+	struct lruvec *lruvec;
-+	int err = -EINVAL;
-+	struct mem_cgroup *memcg = NULL;
-+
-+	if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY))
-+		return -EINVAL;
-+
-+	if (!mem_cgroup_disabled()) {
-+		rcu_read_lock();
-+		memcg = mem_cgroup_from_id(memcg_id);
-+#ifdef CONFIG_MEMCG
-+		if (memcg && !css_tryget(&memcg->css))
-+			memcg = NULL;
-+#endif
-+		rcu_read_unlock();
-+
-+		if (!memcg)
-+			return -EINVAL;
-+	}
-+
-+	if (memcg_id != mem_cgroup_id(memcg))
-+		goto done;
-+
-+	lruvec = get_lruvec(memcg, nid);
-+
-+	if (swappiness < 0)
-+		swappiness = get_swappiness(lruvec, sc);
-+	else if (swappiness > 200)
-+		goto done;
-+
-+	switch (cmd) {
-+	case '+':
-+		err = run_aging(lruvec, seq, sc, swappiness, opt);
-+		break;
-+	case '-':
-+		err = run_eviction(lruvec, seq, sc, swappiness, opt);
-+		break;
-+	}
-+done:
-+	mem_cgroup_put(memcg);
-+
-+	return err;
-+}
-+
-+static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
-+				 size_t len, loff_t *pos)
-+{
-+	void *buf;
-+	char *cur, *next;
-+	unsigned int flags;
-+	struct blk_plug plug;
-+	int err = -EINVAL;
-+	struct scan_control sc = {
-+		.may_writepage = true,
-+		.may_unmap = true,
-+		.may_swap = true,
-+		.reclaim_idx = MAX_NR_ZONES - 1,
-+		.gfp_mask = GFP_KERNEL,
-+	};
-+
-+	buf = kvmalloc(len + 1, GFP_KERNEL);
-+	if (!buf)
-+		return -ENOMEM;
-+
-+	if (copy_from_user(buf, src, len)) {
-+		kvfree(buf);
-+		return -EFAULT;
-+	}
-+
-+	set_task_reclaim_state(current, &sc.reclaim_state);
-+	flags = memalloc_noreclaim_save();
-+	blk_start_plug(&plug);
-+	if (!set_mm_walk(NULL)) {
-+		err = -ENOMEM;
-+		goto done;
-+	}
-+
-+	next = buf;
-+	next[len] = '\0';
-+
-+	while ((cur = strsep(&next, ",;\n"))) {
-+		int n;
-+		int end;
-+		char cmd;
-+		unsigned int memcg_id;
-+		unsigned int nid;
-+		unsigned long seq;
-+		unsigned int swappiness = -1;
-+		unsigned long opt = -1;
-+
-+		cur = skip_spaces(cur);
-+		if (!*cur)
-+			continue;
-+
-+		n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid,
-+			   &seq, &end, &swappiness, &end, &opt, &end);
-+		if (n < 4 || cur[end]) {
-+			err = -EINVAL;
-+			break;
-+		}
-+
-+		err = run_cmd(cmd, memcg_id, nid, seq, &sc, swappiness, opt);
-+		if (err)
-+			break;
-+	}
-+done:
-+	clear_mm_walk();
-+	blk_finish_plug(&plug);
-+	memalloc_noreclaim_restore(flags);
-+	set_task_reclaim_state(current, NULL);
-+
-+	kvfree(buf);
-+
-+	return err ? : len;
-+}
-+
-+static int lru_gen_seq_open(struct inode *inode, struct file *file)
-+{
-+	return seq_open(file, &lru_gen_seq_ops);
-+}
-+
-+static const struct file_operations lru_gen_rw_fops = {
-+	.open = lru_gen_seq_open,
-+	.read = seq_read,
-+	.write = lru_gen_seq_write,
-+	.llseek = seq_lseek,
-+	.release = seq_release,
-+};
-+
-+static const struct file_operations lru_gen_ro_fops = {
-+	.open = lru_gen_seq_open,
-+	.read = seq_read,
-+	.llseek = seq_lseek,
-+	.release = seq_release,
-+};
-+
- /******************************************************************************
-  *                          initialization
-  ******************************************************************************/
-@@ -5308,6 +5696,9 @@ static int __init init_lru_gen(void)
- 	if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
- 		pr_err("lru_gen: failed to create sysfs group\n");
- 
-+	debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops);
-+	debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops);
-+
- 	return 0;
- };
- late_initcall(init_lru_gen);
-
-From patchwork Wed Jul  6 22:00:22 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908711
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id 6414FC43334
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:01:29 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id 39E278E000A; Wed,  6 Jul 2022 18:01:13 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id 34DB98E0001; Wed,  6 Jul 2022 18:01:13 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id 1A1728E000A; Wed,  6 Jul 2022 18:01:13 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0017.hostedemail.com
- [216.40.44.17])
-	by kanga.kvack.org (Postfix) with ESMTP id 047FA8E0001
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:01:13 -0400 (EDT)
-Received: from smtpin07.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay06.hostedemail.com (Postfix) with ESMTP id CA9C334906
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:12 +0000 (UTC)
-X-FDA: 79658046384.07.4AE281A
-Received: from mail-yb1-f201.google.com (mail-yb1-f201.google.com
- [209.85.219.201])
-	by imf19.hostedemail.com (Postfix) with ESMTP id E1D811A004A
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:11 +0000 (UTC)
-Received: by mail-yb1-f201.google.com with SMTP id
- j11-20020a05690212cb00b006454988d225so12639320ybu.10
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:01:11 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=gpspQZzCiCtLDF1mE2Rbzp3OUWg7vlq70C4xLE3ya+E=;
-        b=KmRh3W6zCTnYhuu2uLwH/71AGZzl5TVUrtsNnUP5zXTmGsYrVbcqdtCu+MA/r0Ndp0
-         Swx6K5/Y1yzZuona+ojX9pyfPH0vSgmsnPUuGuK8IgKoxke8pbVIOMVO1oHB4MFfbJr9
-         MZQ2DHsaZhnv+oABy231/ZNYVnut1uI8HXMoZE64GkKDaX0oTm6VD5IWp6Pjb9e4CCS2
-         4l6LRlV0GkUZbtfNu7oRMgYKOcOBXuCtbtOCopiW839uMoofW0liroJ2wElyPDiAsF2j
-         ZEKcyiLmzwxANf1QRl8D0H0t207nTseUwQuoJ0fGq2geu1GyW7/GzRuxYm66v/+UUfVJ
-         Ti/g==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=gpspQZzCiCtLDF1mE2Rbzp3OUWg7vlq70C4xLE3ya+E=;
-        b=Ct8NkvISAcd2F1onRi9j8wNBQ3yVS4sMkwQThZBmSai4nt0pCzUW6MSInM6la2RQ+6
-         Iyk/Q6V/4/M1AEzJ1CIyUOjtskptWB7g9JCLcYDV67l3e3cym3CfKO6faANsjcNo61aE
-         cGyF+8I3UwoMP2XkhiX8e+sh+JyAVS+7v6ah2jAK3rMcN9Qy3pRUpTzse16anYIvPXmH
-         D/n6XDiuVtka4xdvtVrXH1Ovj7jTQyu5zNSeDpYUHIIuY5HyyWlwP2GqOXO5+3ztetSe
-         lqHq/pwTeg5OaKzyo1/S4u5j63+cCDsRbst48LWqqY7iSJl7Jqjh9IcuciM5gwWyKVQq
-         exXg==
-X-Gm-Message-State: AJIora8Djp7T6fvZwj7nFJ1nTHsOTMleXrE/THizuhZy3oXIgXemxG6T
-	WRLpIC0iL2d+my0UEmLvbYJe1kwX4xc=
-X-Google-Smtp-Source: 
- AGRyM1tcS0bhkovBqaAMGcBTFG0LXet+IyIY3UhCyBJaxouYWPrdgSATtWZUnD1044Cxo6jW3UsFPLIHGBk=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a25:73d1:0:b0:66e:aee4:feb3 with SMTP id
- o200-20020a2573d1000000b0066eaee4feb3mr1925521ybc.452.1657144871215; Wed, 06
- Jul 2022 15:01:11 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:22 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-14-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 13/14] mm: multi-gen LRU: admin guide
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Brian Geffon <bgeffon@google.com>,
- Jan Alexander Steffens <heftig@archlinux.org>,
-  Oleksandr Natalenko <oleksandr@natalenko.name>,
- Steven Barrett <steven@liquorix.net>,
-  Suleiman Souhlal <suleiman@google.com>, Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657144872; a=rsa-sha256;
-	cv=none;
-	b=JsWQytQvs1ZknqPoqD3qo1TJldLBGiKSTga/ejO8CyQYViqdXml7nvJD7fQyRxXf/sYCeO
-	o91ZwxjqHFV+Qk45x3ZWpVnbVz5s7Ub1LlWxdnj2ACxVxDi2i4I70KlZDYV1V7+0DLXiwM
-	Cf5UnSo0xArYOHGQTNNAa/beRpM+U2U=
-ARC-Authentication-Results: i=1;
-	imf19.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=KmRh3W6z;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf19.hostedemail.com: domain of
- 3JwbGYgYKCGgeafNGUMUUMRK.IUSROTad-SSQbGIQ.UXM@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3JwbGYgYKCGgeafNGUMUUMRK.IUSROTad-SSQbGIQ.UXM@flex--yuzhao.bounces.google.com
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657144872;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=gpspQZzCiCtLDF1mE2Rbzp3OUWg7vlq70C4xLE3ya+E=;
-	b=7SORwSc3XuCDIhY4Nnt3155Fml8B5PM7q+cxyDoyzRH9f30a8JT7kTOzO43GZtqu1vi0gx
-	ZvOQWsmLsXdrJ4He9F7TCEfWwHvTKJw2xq5RY+ztHvYdkw0u4ntOGqKfhRRpSNYMieV4f5
-	IIb7Tz/BoxO7bW/vk+Bjj4szoFKZSXU=
-X-Rspamd-Server: rspam04
-X-Rspam-User: 
-Authentication-Results: imf19.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=KmRh3W6z;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf19.hostedemail.com: domain of
- 3JwbGYgYKCGgeafNGUMUUMRK.IUSROTad-SSQbGIQ.UXM@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3JwbGYgYKCGgeafNGUMUUMRK.IUSROTad-SSQbGIQ.UXM@flex--yuzhao.bounces.google.com
-X-Stat-Signature: a3k84bgjbfr9z8g5wse9kf3mp6fodfft
-X-Rspamd-Queue-Id: E1D811A004A
-X-HE-Tag: 1657144871-897432
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-Add an admin guide.
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
-Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
----
- Documentation/admin-guide/mm/index.rst        |   1 +
- Documentation/admin-guide/mm/multigen_lru.rst | 156 ++++++++++++++++++
- mm/Kconfig                                    |   3 +-
- mm/vmscan.c                                   |   4 +
- 4 files changed, 163 insertions(+), 1 deletion(-)
- create mode 100644 Documentation/admin-guide/mm/multigen_lru.rst
-
-diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst
-index c21b5823f126..2cf5bae62036 100644
---- a/Documentation/admin-guide/mm/index.rst
-+++ b/Documentation/admin-guide/mm/index.rst
-@@ -32,6 +32,7 @@ the Linux memory management.
-    idle_page_tracking
-    ksm
-    memory-hotplug
-+   multigen_lru
-    nommu-mmap
-    numa_memory_policy
-    numaperf
-diff --git a/Documentation/admin-guide/mm/multigen_lru.rst b/Documentation/admin-guide/mm/multigen_lru.rst
-new file mode 100644
-index 000000000000..6355f2b5019d
---- /dev/null
-+++ b/Documentation/admin-guide/mm/multigen_lru.rst
-@@ -0,0 +1,156 @@
-+.. SPDX-License-Identifier: GPL-2.0
-+
-+=============
-+Multi-Gen LRU
-+=============
-+The multi-gen LRU is an alternative LRU implementation that optimizes
-+page reclaim and improves performance under memory pressure. Page
-+reclaim decides the kernel's caching policy and ability to overcommit
-+memory. It directly impacts the kswapd CPU usage and RAM efficiency.
-+
-+Quick start
-+===========
-+Build the kernel with the following configurations.
-+
-+* ``CONFIG_LRU_GEN=y``
-+* ``CONFIG_LRU_GEN_ENABLED=y``
-+
-+All set!
-+
-+Runtime options
-+===============
-+``/sys/kernel/mm/lru_gen/`` contains stable ABIs described in the
-+following subsections.
-+
-+Kill switch
-+-----------
-+``enabled`` accepts different values to enable or disable the
-+following components. Its default value depends on
-+``CONFIG_LRU_GEN_ENABLED``. All the components should be enabled
-+unless some of them have unforeseen side effects. Writing to
-+``enabled`` has no effect when a component is not supported by the
-+hardware, and valid values will be accepted even when the main switch
-+is off.
-+
-+====== ===============================================================
-+Values Components
-+====== ===============================================================
-+0x0001 The main switch for the multi-gen LRU.
-+0x0002 Clearing the accessed bit in leaf page table entries in large
-+       batches, when MMU sets it (e.g., on x86). This behavior can
-+       theoretically worsen lock contention (mmap_lock). If it is
-+       disabled, the multi-gen LRU will suffer a minor performance
-+       degradation for workloads that contiguously map hot pages,
-+       whose accessed bits can be otherwise cleared by fewer larger
-+       batches.
-+0x0004 Clearing the accessed bit in non-leaf page table entries as
-+       well, when MMU sets it (e.g., on x86). This behavior was not
-+       verified on x86 varieties other than Intel and AMD. If it is
-+       disabled, the multi-gen LRU will suffer a negligible
-+       performance degradation.
-+[yYnN] Apply to all the components above.
-+====== ===============================================================
-+
-+E.g.,
-+::
-+
-+    echo y >/sys/kernel/mm/lru_gen/enabled
-+    cat /sys/kernel/mm/lru_gen/enabled
-+    0x0007
-+    echo 5 >/sys/kernel/mm/lru_gen/enabled
-+    cat /sys/kernel/mm/lru_gen/enabled
-+    0x0005
-+
-+Thrashing prevention
-+--------------------
-+Personal computers are more sensitive to thrashing because it can
-+cause janks (lags when rendering UI) and negatively impact user
-+experience. The multi-gen LRU offers thrashing prevention to the
-+majority of laptop and desktop users who do not have ``oomd``.
-+
-+Users can write ``N`` to ``min_ttl_ms`` to prevent the working set of
-+``N`` milliseconds from getting evicted. The OOM killer is triggered
-+if this working set cannot be kept in memory. In other words, this
-+option works as an adjustable pressure relief valve, and when open, it
-+terminates applications that are hopefully not being used.
-+
-+Based on the average human detectable lag (~100ms), ``N=1000`` usually
-+eliminates intolerable janks due to thrashing. Larger values like
-+``N=3000`` make janks less noticeable at the risk of premature OOM
-+kills.
-+
-+The default value ``0`` means disabled.
-+
-+Experimental features
-+=====================
-+``/sys/kernel/debug/lru_gen`` accepts commands described in the
-+following subsections. Multiple command lines are supported, so does
-+concatenation with delimiters ``,`` and ``;``.
-+
-+``/sys/kernel/debug/lru_gen_full`` provides additional stats for
-+debugging. ``CONFIG_LRU_GEN_STATS=y`` keeps historical stats from
-+evicted generations in this file.
-+
-+Working set estimation
-+----------------------
-+Working set estimation measures how much memory an application needs
-+in a given time interval, and it is usually done with little impact on
-+the performance of the application. E.g., data centers want to
-+optimize job scheduling (bin packing) to improve memory utilizations.
-+When a new job comes in, the job scheduler needs to find out whether
-+each server it manages can allocate a certain amount of memory for
-+this new job before it can pick a candidate. To do so, the job
-+scheduler needs to estimate the working sets of the existing jobs.
-+
-+When it is read, ``lru_gen`` returns a histogram of numbers of pages
-+accessed over different time intervals for each memcg and node.
-+``MAX_NR_GENS`` decides the number of bins for each histogram. The
-+histograms are noncumulative.
-+::
-+
-+    memcg  memcg_id  memcg_path
-+       node  node_id
-+           min_gen_nr  age_in_ms  nr_anon_pages  nr_file_pages
-+           ...
-+           max_gen_nr  age_in_ms  nr_anon_pages  nr_file_pages
-+
-+Each bin contains an estimated number of pages that have been accessed
-+within ``age_in_ms``. E.g., ``min_gen_nr`` contains the coldest pages
-+and ``max_gen_nr`` contains the hottest pages, since ``age_in_ms`` of
-+the former is the largest and that of the latter is the smallest.
-+
-+Users can write ``+ memcg_id node_id max_gen_nr
-+[can_swap [force_scan]]`` to ``lru_gen`` to create a new generation
-+``max_gen_nr+1``. ``can_swap`` defaults to the swap setting and, if it
-+is set to ``1``, it forces the scan of anon pages when swap is off,
-+and vice versa. ``force_scan`` defaults to ``1`` and, if it is set to
-+``0``, it employs heuristics to reduce the overhead, which is likely
-+to reduce the coverage as well.
-+
-+A typical use case is that a job scheduler writes to ``lru_gen`` at a
-+certain time interval to create new generations, and it ranks the
-+servers it manages based on the sizes of their cold pages defined by
-+this time interval.
-+
-+Proactive reclaim
-+-----------------
-+Proactive reclaim induces page reclaim when there is no memory
-+pressure. It usually targets cold pages only. E.g., when a new job
-+comes in, the job scheduler wants to proactively reclaim cold pages on
-+the server it selected to improve the chance of successfully landing
-+this new job.
-+
-+Users can write ``- memcg_id node_id min_gen_nr [swappiness
-+[nr_to_reclaim]]`` to ``lru_gen`` to evict generations less than or
-+equal to ``min_gen_nr``. Note that ``min_gen_nr`` should be less than
-+``max_gen_nr-1`` as ``max_gen_nr`` and ``max_gen_nr-1`` are not fully
-+aged and therefore cannot be evicted. ``swappiness`` overrides the
-+default value in ``/proc/sys/vm/swappiness``. ``nr_to_reclaim`` limits
-+the number of pages to evict.
-+
-+A typical use case is that a job scheduler writes to ``lru_gen``
-+before it tries to land a new job on a server. If it fails to
-+materialize enough cold pages because of the overestimation, it
-+retries on the next server according to the ranking result obtained
-+from the working set estimation step. This less forceful approach
-+limits the impacts on the existing jobs.
-diff --git a/mm/Kconfig b/mm/Kconfig
-index 0c2ef0af0036..a0f7b6e66410 100644
---- a/mm/Kconfig
-+++ b/mm/Kconfig
-@@ -1137,7 +1137,8 @@ config LRU_GEN
- 	# make sure folio->flags has enough spare bits
- 	depends on 64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP
- 	help
--	  A high performance LRU implementation to overcommit memory.
-+	  A high performance LRU implementation to overcommit memory. See
-+	  Documentation/admin-guide/mm/multigen_lru.rst for details.
- 
- config LRU_GEN_ENABLED
- 	bool "Enable by default"
-diff --git a/mm/vmscan.c b/mm/vmscan.c
-index fbcd298adca7..7096ff7836db 100644
---- a/mm/vmscan.c
-+++ b/mm/vmscan.c
-@@ -5209,6 +5209,7 @@ static ssize_t show_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, c
- 	return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl)));
- }
- 
-+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
- static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr,
- 			     const char *buf, size_t len)
- {
-@@ -5242,6 +5243,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c
- 	return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
- }
- 
-+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
- static ssize_t store_enabled(struct kobject *kobj, struct kobj_attribute *attr,
- 			     const char *buf, size_t len)
- {
-@@ -5389,6 +5391,7 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
- 	seq_putc(m, '\n');
- }
- 
-+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
- static int lru_gen_seq_show(struct seq_file *m, void *v)
- {
- 	unsigned long seq;
-@@ -5547,6 +5550,7 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq,
- 	return err;
- }
- 
-+/* see Documentation/admin-guide/mm/multigen_lru.rst for details */
- static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
- 				 size_t len, loff_t *pos)
- {
-
-From patchwork Wed Jul  6 22:00:23 2022
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-X-Patchwork-Submitter: Yu Zhao <yuzhao@google.com>
-X-Patchwork-Id: 12908712
-Return-Path: <owner-linux-mm@kvack.org>
-X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
-	aws-us-west-2-korg-lkml-1.web.codeaurora.org
-Received: from kanga.kvack.org (kanga.kvack.org [205.233.56.17])
-	by smtp.lore.kernel.org (Postfix) with ESMTP id 6E4E6C43334
-	for <linux-mm@archiver.kernel.org>; Wed,  6 Jul 2022 22:01:33 +0000 (UTC)
-Received: by kanga.kvack.org (Postfix)
-	id 59C1C8E000B; Wed,  6 Jul 2022 18:01:14 -0400 (EDT)
-Received: by kanga.kvack.org (Postfix, from userid 40)
-	id 5235B8E0001; Wed,  6 Jul 2022 18:01:14 -0400 (EDT)
-X-Delivered-To: int-list-linux-mm@kvack.org
-Received: by kanga.kvack.org (Postfix, from userid 63042)
-	id 350398E000B; Wed,  6 Jul 2022 18:01:14 -0400 (EDT)
-X-Delivered-To: linux-mm@kvack.org
-Received: from relay.hostedemail.com (smtprelay0012.hostedemail.com
- [216.40.44.12])
-	by kanga.kvack.org (Postfix) with ESMTP id 225F58E0001
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 18:01:14 -0400 (EDT)
-Received: from smtpin31.hostedemail.com (a10.router.float.18 [10.200.18.1])
-	by unirelay08.hostedemail.com (Postfix) with ESMTP id EE41D2169C
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:13 +0000 (UTC)
-X-FDA: 79658046426.31.47294E4
-Received: from mail-yb1-f201.google.com (mail-yb1-f201.google.com
- [209.85.219.201])
-	by imf01.hostedemail.com (Postfix) with ESMTP id 5513940016
-	for <linux-mm@kvack.org>; Wed,  6 Jul 2022 22:01:13 +0000 (UTC)
-Received: by mail-yb1-f201.google.com with SMTP id
- a8-20020a25a188000000b0066839c45fe8so12515135ybi.17
-        for <linux-mm@kvack.org>; Wed, 06 Jul 2022 15:01:13 -0700 (PDT)
-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=google.com; s=20210112;
-        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
-         :cc:content-transfer-encoding;
-        bh=Y7M5+uMCyjK2Tw3gtvlFnf3s0uMKtiqOOKU+iupOzGc=;
-        b=RaJYVCw6kQFWZr57Fj6Z+M7CjIu+Fy2mkXaD9icGpAKOAxyz1uufDA95qkMfXqksCy
-         CttyIsR4+X5trkDvd0W5HTI3/XFLKoLEsiRSAv23qebNkIOkH8cPlNd2JsU/+DVzJUpM
-         TGOZ6teMB/sFPIH8IZKMODnpg+VxKIyScGqlsqOiDoxcPPCMP8e0zolM240kI1HmhYsj
-         WxZdSDL+OZnX2V8pTDz516/mmCsEM23W0x65TiLdKDGOIFAAkNP/EIcvQWWj8SBUz/dL
-         a0IGdBEhZobBNts8S/4QPXOFk1zc9TBNhY+OPo4y5YJG3duUWWVQ+373DmVdZPluRI23
-         DgVQ==
-X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
-        d=1e100.net; s=20210112;
-        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
-         :references:subject:from:to:cc:content-transfer-encoding;
-        bh=Y7M5+uMCyjK2Tw3gtvlFnf3s0uMKtiqOOKU+iupOzGc=;
-        b=t8KFMI+odj2H0PYsSR514rWxJS26xzV7AKq6CAfD49T5kfUjPz8wfdAfySr/kBsGFX
-         Ijo9N8v2aDQNSOqwxiy6N0WnrzD6bgFtRWeblglDP9rnKXQmG38PpjQrFbGbWRu0JwOP
-         V4GhBBsVBqsEbP6lV54mE8LL+AX6orjSmsdYgGuR7py2ze/69AI+KXkU4wuGGk7f02J6
-         NOorMZZljVWHawNiYzwJ/nSCIEDP1RdLxj/QR1X2gsT6fGY0XqrePFMti1n8UBr5vGzF
-         qDM3r6uoPM0Dl89KQfjhANf8jyajCPr0wd7Ldc2REEmnDU12jZhd2cV3sTQEKMbtFvsH
-         RiDA==
-X-Gm-Message-State: AJIora9JyuHh+WKBn43isO3BKSkb8MvQEqp82Y/5Bs0mEkxpSgPDJzSM
-	HtWXZ+iDc2EVNjhmgizIp3qSZYJgkRM=
-X-Google-Smtp-Source: 
- AGRyM1tPyG6w7lg37p0dKVbMplDSUgwZboH2lG42opEnpdXZgbjOhtWD7cZCMHKO+sLemtrKnNTphNyTinE=
-X-Received: from yuzhao.bld.corp.google.com
- ([2620:15c:183:200:b89c:e10a:466e:cf7d])
- (user=yuzhao job=sendgmr) by 2002:a25:b806:0:b0:663:d35d:8b8a with SMTP id
- v6-20020a25b806000000b00663d35d8b8amr45647399ybj.69.1657144872662; Wed, 06
- Jul 2022 15:01:12 -0700 (PDT)
-Date: Wed,  6 Jul 2022 16:00:23 -0600
-In-Reply-To: <20220706220022.968789-1-yuzhao@google.com>
-Message-Id: <20220706220022.968789-15-yuzhao@google.com>
-Mime-Version: 1.0
-References: <20220706220022.968789-1-yuzhao@google.com>
-X-Mailer: git-send-email 2.37.0.rc0.161.g10f37bed90-goog
-Subject: [PATCH v13 14/14] mm: multi-gen LRU: design doc
-From: Yu Zhao <yuzhao@google.com>
-To: Andrew Morton <akpm@linux-foundation.org>
-Cc: Andi Kleen <ak@linux.intel.com>,
- Aneesh Kumar <aneesh.kumar@linux.ibm.com>,
-  Catalin Marinas <catalin.marinas@arm.com>,
- Dave Hansen <dave.hansen@linux.intel.com>,  Hillf Danton <hdanton@sina.com>,
- Jens Axboe <axboe@kernel.dk>, Johannes Weiner <hannes@cmpxchg.org>,
-  Jonathan Corbet <corbet@lwn.net>,
- Linus Torvalds <torvalds@linux-foundation.org>,
-  Matthew Wilcox <willy@infradead.org>, Mel Gorman <mgorman@suse.de>,
-  Michael Larabel <Michael@michaellarabel.com>,
- Michal Hocko <mhocko@kernel.org>,  Mike Rapoport <rppt@kernel.org>,
- Peter Zijlstra <peterz@infradead.org>, Tejun Heo <tj@kernel.org>,
-  Vlastimil Babka <vbabka@suse.cz>, Will Deacon <will@kernel.org>,
- linux-arm-kernel@lists.infradead.org,  linux-doc@vger.kernel.org,
- linux-kernel@vger.kernel.org, linux-mm@kvack.org,  x86@kernel.org,
- page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
-  Brian Geffon <bgeffon@google.com>,
- Jan Alexander Steffens <heftig@archlinux.org>,
-  Oleksandr Natalenko <oleksandr@natalenko.name>,
- Steven Barrett <steven@liquorix.net>,
-  Suleiman Souhlal <suleiman@google.com>, Daniel Byrne <djbyrne@mtu.edu>,
- Donald Carr <d@chaos-reins.com>,
-  " =?utf-8?q?Holger_Hoffst=C3=A4tte?= " <holger@applied-asynchrony.com>,
- Konstantin Kharlamov <Hi-Angel@yandex.ru>,
-  Shuang Zhai <szhai2@cs.rochester.edu>, Sofia Trinh <sofia.trinh@edi.works>,
-  Vaibhav Jain <vaibhav@linux.ibm.com>
-ARC-Seal: i=1; s=arc-20220608; d=hostedemail.com; t=1657144873; a=rsa-sha256;
-	cv=none;
-	b=UrrRpXp7KWnXHmjT/QxuJ33LiGsO02xp/Gl5IKp2przZQE/MN2oPkN0qvS6FM/HpuayBLm
-	zd3wW1kYV7c+CYfLpUIs4G8pg9A6gNyLzycabKZPgoBu+fqMU04tsshxN75CQVnnpFeUVh
-	ZD4xhdIcppi7j9nVM9IcKC/45QGbnp4=
-ARC-Authentication-Results: i=1;
-	imf01.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=RaJYVCw6;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf01.hostedemail.com: domain of
- 3KAbGYgYKCGkfbgOHVNVVNSL.JVTSPUbe-TTRcHJR.VYN@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3KAbGYgYKCGkfbgOHVNVVNSL.JVTSPUbe-TTRcHJR.VYN@flex--yuzhao.bounces.google.com
-ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed;
- d=hostedemail.com;
-	s=arc-20220608; t=1657144873;
-	h=from:from:sender:reply-to:subject:subject:date:date:
-	 message-id:message-id:to:to:cc:cc:mime-version:mime-version:
-	 content-type:content-type:
-	 content-transfer-encoding:content-transfer-encoding:
-	 in-reply-to:in-reply-to:references:references:dkim-signature;
-	bh=Y7M5+uMCyjK2Tw3gtvlFnf3s0uMKtiqOOKU+iupOzGc=;
-	b=CC8ORwOmRVo1ysrsxcLM/w/OQsNgtHVDsWXjTolVPaVGtsBAmORZs9mo/t9qQJXlTbpE6W
-	MK4e1j+KxvgzJ4hEk7FEh4udfXbo/i2Zs4SIAS1fMWoE8oSUqdpISvSeaeM8m9OTpSMv9b
-	y/YSdGTLFiLWNyHM+yI8Q6QaQPpR8FA=
-X-Rspamd-Server: rspam04
-X-Rspam-User: 
-Authentication-Results: imf01.hostedemail.com;
-	dkim=pass header.d=google.com header.s=20210112 header.b=RaJYVCw6;
-	dmarc=pass (policy=reject) header.from=google.com;
-	spf=pass (imf01.hostedemail.com: domain of
- 3KAbGYgYKCGkfbgOHVNVVNSL.JVTSPUbe-TTRcHJR.VYN@flex--yuzhao.bounces.google.com
- designates 209.85.219.201 as permitted sender)
- smtp.mailfrom=3KAbGYgYKCGkfbgOHVNVVNSL.JVTSPUbe-TTRcHJR.VYN@flex--yuzhao.bounces.google.com
-X-Stat-Signature: gkifem6ym4fgtjcteqxerconsisp8cqt
-X-Rspamd-Queue-Id: 5513940016
-X-HE-Tag: 1657144873-85540
-X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
-Sender: owner-linux-mm@kvack.org
-Precedence: bulk
-X-Loop: owner-majordomo@kvack.org
-List-ID: <linux-mm.kvack.org>
-
-Add a design doc.
-
-Signed-off-by: Yu Zhao <yuzhao@google.com>
-Acked-by: Brian Geffon <bgeffon@google.com>
-Acked-by: Jan Alexander Steffens (heftig) <heftig@archlinux.org>
-Acked-by: Oleksandr Natalenko <oleksandr@natalenko.name>
-Acked-by: Steven Barrett <steven@liquorix.net>
-Acked-by: Suleiman Souhlal <suleiman@google.com>
-Tested-by: Daniel Byrne <djbyrne@mtu.edu>
-Tested-by: Donald Carr <d@chaos-reins.com>
-Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
-Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
-Tested-by: Shuang Zhai <szhai2@cs.rochester.edu>
-Tested-by: Sofia Trinh <sofia.trinh@edi.works>
-Tested-by: Vaibhav Jain <vaibhav@linux.ibm.com>
-Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
----
- Documentation/vm/index.rst        |   1 +
- Documentation/vm/multigen_lru.rst | 159 ++++++++++++++++++++++++++++++
- 2 files changed, 160 insertions(+)
- create mode 100644 Documentation/vm/multigen_lru.rst
-
-diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
-index 575ccd40e30c..4aa12b8be278 100644
---- a/Documentation/vm/index.rst
-+++ b/Documentation/vm/index.rst
-@@ -51,6 +51,7 @@ above structured documentation, or deleted if it has served its purpose.
-    ksm
-    memory-model
-    mmu_notifier
-+   multigen_lru
-    numa
-    overcommit-accounting
-    page_migration
-diff --git a/Documentation/vm/multigen_lru.rst b/Documentation/vm/multigen_lru.rst
-new file mode 100644
-index 000000000000..d7062c6a8946
---- /dev/null
-+++ b/Documentation/vm/multigen_lru.rst
-@@ -0,0 +1,159 @@
-+.. SPDX-License-Identifier: GPL-2.0
-+
-+=============
-+Multi-Gen LRU
-+=============
-+The multi-gen LRU is an alternative LRU implementation that optimizes
-+page reclaim and improves performance under memory pressure. Page
-+reclaim decides the kernel's caching policy and ability to overcommit
-+memory. It directly impacts the kswapd CPU usage and RAM efficiency.
-+
-+Design overview
-+===============
-+Objectives
-+----------
-+The design objectives are:
-+
-+* Good representation of access recency
-+* Try to profit from spatial locality
-+* Fast paths to make obvious choices
-+* Simple self-correcting heuristics
-+
-+The representation of access recency is at the core of all LRU
-+implementations. In the multi-gen LRU, each generation represents a
-+group of pages with similar access recency. Generations establish a
-+(time-based) common frame of reference and therefore help make better
-+choices, e.g., between different memcgs on a computer or different
-+computers in a data center (for job scheduling).
-+
-+Exploiting spatial locality improves efficiency when gathering the
-+accessed bit. A rmap walk targets a single page and does not try to
-+profit from discovering a young PTE. A page table walk can sweep all
-+the young PTEs in an address space, but the address space can be too
-+sparse to make a profit. The key is to optimize both methods and use
-+them in combination.
-+
-+Fast paths reduce code complexity and runtime overhead. Unmapped pages
-+do not require TLB flushes; clean pages do not require writeback.
-+These facts are only helpful when other conditions, e.g., access
-+recency, are similar. With generations as a common frame of reference,
-+additional factors stand out. But obvious choices might not be good
-+choices; thus self-correction is necessary.
-+
-+The benefits of simple self-correcting heuristics are self-evident.
-+Again, with generations as a common frame of reference, this becomes
-+attainable. Specifically, pages in the same generation can be
-+categorized based on additional factors, and a feedback loop can
-+statistically compare the refault percentages across those categories
-+and infer which of them are better choices.
-+
-+Assumptions
-+-----------
-+The protection of hot pages and the selection of cold pages are based
-+on page access channels and patterns. There are two access channels:
-+
-+* Accesses through page tables
-+* Accesses through file descriptors
-+
-+The protection of the former channel is by design stronger because:
-+
-+1. The uncertainty in determining the access patterns of the former
-+   channel is higher due to the approximation of the accessed bit.
-+2. The cost of evicting the former channel is higher due to the TLB
-+   flushes required and the likelihood of encountering the dirty bit.
-+3. The penalty of underprotecting the former channel is higher because
-+   applications usually do not prepare themselves for major page
-+   faults like they do for blocked I/O. E.g., GUI applications
-+   commonly use dedicated I/O threads to avoid blocking rendering
-+   threads.
-+
-+There are also two access patterns:
-+
-+* Accesses exhibiting temporal locality
-+* Accesses not exhibiting temporal locality
-+
-+For the reasons listed above, the former channel is assumed to follow
-+the former pattern unless ``VM_SEQ_READ`` or ``VM_RAND_READ`` is
-+present, and the latter channel is assumed to follow the latter
-+pattern unless outlying refaults have been observed.
-+
-+Workflow overview
-+=================
-+Evictable pages are divided into multiple generations for each
-+``lruvec``. The youngest generation number is stored in
-+``lrugen->max_seq`` for both anon and file types as they are aged on
-+an equal footing. The oldest generation numbers are stored in
-+``lrugen->min_seq[]`` separately for anon and file types as clean file
-+pages can be evicted regardless of swap constraints. These three
-+variables are monotonically increasing.
-+
-+Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)``
-+bits in order to fit into the gen counter in ``folio->flags``. Each
-+truncated generation number is an index to ``lrugen->lists[]``. The
-+sliding window technique is used to track at least ``MIN_NR_GENS`` and
-+at most ``MAX_NR_GENS`` generations. The gen counter stores a value
-+within ``[1, MAX_NR_GENS]`` while a page is on one of
-+``lrugen->lists[]``; otherwise it stores zero.
-+
-+Each generation is divided into multiple tiers. A page accessed ``N``
-+times through file descriptors is in tier ``order_base_2(N)``. Unlike
-+generations, tiers do not have dedicated ``lrugen->lists[]``. In
-+contrast to moving across generations, which requires the LRU lock,
-+moving across tiers only involves atomic operations on
-+``folio->flags`` and therefore has a negligible cost. A feedback loop
-+modeled after the PID controller monitors refaults over all the tiers
-+from anon and file types and decides which tiers from which types to
-+evict or protect.
-+
-+There are two conceptually independent procedures: the aging and the
-+eviction. They form a closed-loop system, i.e., the page reclaim.
-+
-+Aging
-+-----
-+The aging produces young generations. Given an ``lruvec``, it
-+increments ``max_seq`` when ``max_seq-min_seq+1`` approaches
-+``MIN_NR_GENS``. The aging promotes hot pages to the youngest
-+generation when it finds them accessed through page tables; the
-+demotion of cold pages happens consequently when it increments
-+``max_seq``. The aging uses page table walks and rmap walks to find
-+young PTEs. For the former, it iterates ``lruvec_memcg()->mm_list``
-+and calls ``walk_page_range()`` with each ``mm_struct`` on this list
-+to scan PTEs, and after each iteration, it increments ``max_seq``. For
-+the latter, when the eviction walks the rmap and finds a young PTE,
-+the aging scans the adjacent PTEs. For both, on finding a young PTE,
-+the aging clears the accessed bit and updates the gen counter of the
-+page mapped by this PTE to ``(max_seq%MAX_NR_GENS)+1``.
-+
-+Eviction
-+--------
-+The eviction consumes old generations. Given an ``lruvec``, it
-+increments ``min_seq`` when ``lrugen->lists[]`` indexed by
-+``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to
-+evict from, it first compares ``min_seq[]`` to select the older type.
-+If both types are equally old, it selects the one whose first tier has
-+a lower refault percentage. The first tier contains single-use
-+unmapped clean pages, which are the best bet. The eviction sorts a
-+page according to its gen counter if the aging has found this page
-+accessed through page tables and updated its gen counter. It also
-+moves a page to the next generation, i.e., ``min_seq+1``, if this page
-+was accessed multiple times through file descriptors and the feedback
-+loop has detected outlying refaults from the tier this page is in. To
-+this end, the feedback loop uses the first tier as the baseline, for
-+the reason stated earlier.
-+
-+Summary
-+-------
-+The multi-gen LRU can be disassembled into the following parts:
-+
-+* Generations
-+* Rmap walks
-+* Page table walks
-+* Bloom filters
-+* PID controller
-+
-+The aging and the eviction form a producer-consumer model;
-+specifically, the latter drives the former by the sliding window over
-+generations. Within the aging, rmap walks drive page table walks by
-+inserting hot densely populated page tables to the Bloom filters.
-+Within the eviction, the PID controller uses refaults as the feedback
-+to select types to evict and tiers to protect.
diff --git a/sys-kernel/pinephone-pro-sources/files/config-ppp b/sys-kernel/pinephone-sources/files/config-ppp
similarity index 100%
rename from sys-kernel/pinephone-pro-sources/files/config-ppp
rename to sys-kernel/pinephone-sources/files/config-ppp
diff --git a/sys-kernel/pinephone-pro-sources/files/config-ppp-old b/sys-kernel/pinephone-sources/files/config-ppp-old
similarity index 100%
rename from sys-kernel/pinephone-pro-sources/files/config-ppp-old
rename to sys-kernel/pinephone-sources/files/config-ppp-old
diff --git a/sys-kernel/pinephone-pro-sources/files/dracut-ppp.conf b/sys-kernel/pinephone-sources/files/dracut-ppp.conf
similarity index 100%
rename from sys-kernel/pinephone-pro-sources/files/dracut-ppp.conf
rename to sys-kernel/pinephone-sources/files/dracut-ppp.conf
diff --git a/sys-kernel/pinephone-sources/files/pp-keyboard.patch b/sys-kernel/pinephone-sources/files/pp-keyboard.patch
deleted file mode 100644
index a8e818e..0000000
--- a/sys-kernel/pinephone-sources/files/pp-keyboard.patch
+++ /dev/null
@@ -1,176 +0,0 @@
-From d1d849cae12db71aa81ceedaedc1b17a34790367 Mon Sep 17 00:00:00 2001
-From: Samuel Holland <samuel@sholland.org>
-Date: Sat, 19 Jun 2021 18:36:05 -0500
-Subject: [PATCH] Input: kb151 - Add a driver for the KB151 keyboard
-
-This keyboard is found in the official Pine64 PinePhone keyboard case.
-It is connected over I2C and runs a libre firmware.
-
-Signed-off-by: Samuel Holland <samuel@sholland.org>
----
- .../dts/allwinner/sun50i-a64-pinephone.dtsi   |  64 +++++
- drivers/input/keyboard/Kconfig                |  10 +
- drivers/input/keyboard/Makefile               |   1 +
- drivers/input/keyboard/kb151.c                | 246 ++++++++++++++++++
- 4 files changed, 321 insertions(+)
- create mode 100644 drivers/input/keyboard/kb151.c
-
-diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
-index 4ede9fe66020c..0bdc6eceec609 100644
---- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
-+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
-@@ -551,6 +551,70 @@
- /* Connected to pogo pins (external spring based pinheader for user addons) */
- &i2c2 {
- 	status = "okay";
-+
-+	keyboard@15 {
-+		compatible = "pine64,kb151";
-+		reg = <0x15>;
-+		interrupt-parent = <&r_pio>;
-+		interrupts = <0 12 IRQ_TYPE_EDGE_FALLING>; /* PL12 */
-+		keypad,num-rows = <6>;
-+		keypad,num-columns = <12>;
-+		linux,keymap = <MATRIX_KEY(0,  0, KEY_ESC)
-+				MATRIX_KEY(0,  1, KEY_1)
-+				MATRIX_KEY(0,  2, KEY_2)
-+				MATRIX_KEY(0,  3, KEY_3)
-+				MATRIX_KEY(0,  4, KEY_4)
-+				MATRIX_KEY(0,  5, KEY_5)
-+				MATRIX_KEY(0,  6, KEY_6)
-+				MATRIX_KEY(0,  7, KEY_7)
-+				MATRIX_KEY(0,  8, KEY_8)
-+				MATRIX_KEY(0,  9, KEY_9)
-+				MATRIX_KEY(0, 10, KEY_0)
-+				MATRIX_KEY(0, 11, KEY_BACKSPACE)
-+				MATRIX_KEY(1,  0, KEY_TAB)
-+				MATRIX_KEY(1,  1, KEY_Q)
-+				MATRIX_KEY(1,  2, KEY_W)
-+				MATRIX_KEY(1,  3, KEY_E)
-+				MATRIX_KEY(1,  4, KEY_R)
-+				MATRIX_KEY(1,  5, KEY_T)
-+				MATRIX_KEY(1,  6, KEY_Y)
-+				MATRIX_KEY(1,  7, KEY_U)
-+				MATRIX_KEY(1,  8, KEY_I)
-+				MATRIX_KEY(1,  9, KEY_O)
-+				MATRIX_KEY(1, 10, KEY_P)
-+				MATRIX_KEY(1, 11, KEY_ENTER)
-+				MATRIX_KEY(2,  0, KEY_LEFTMETA)
-+				MATRIX_KEY(2,  1, KEY_A)
-+				MATRIX_KEY(2,  2, KEY_S)
-+				MATRIX_KEY(2,  3, KEY_D)
-+				MATRIX_KEY(2,  4, KEY_F)
-+				MATRIX_KEY(2,  5, KEY_G)
-+				MATRIX_KEY(2,  6, KEY_H)
-+				MATRIX_KEY(2,  7, KEY_J)
-+				MATRIX_KEY(2,  8, KEY_K)
-+				MATRIX_KEY(2,  9, KEY_L)
-+				MATRIX_KEY(2, 10, KEY_SEMICOLON)
-+				MATRIX_KEY(3,  0, KEY_LEFTSHIFT)
-+				MATRIX_KEY(3,  1, KEY_Z)
-+				MATRIX_KEY(3,  2, KEY_X)
-+				MATRIX_KEY(3,  3, KEY_C)
-+				MATRIX_KEY(3,  4, KEY_V)
-+				MATRIX_KEY(3,  5, KEY_B)
-+				MATRIX_KEY(3,  6, KEY_N)
-+				MATRIX_KEY(3,  7, KEY_M)
-+				MATRIX_KEY(3,  8, KEY_COMMA)
-+				MATRIX_KEY(3,  9, KEY_DOT)
-+				MATRIX_KEY(3, 10, KEY_SLASH)
-+				MATRIX_KEY(4,  1, KEY_LEFTCTRL)
-+				MATRIX_KEY(4,  4, KEY_SPACE)
-+				MATRIX_KEY(4,  6, KEY_APOSTROPHE)
-+				MATRIX_KEY(4,  8, KEY_RIGHTBRACE)
-+				MATRIX_KEY(4,  9, KEY_LEFTBRACE)
-+				MATRIX_KEY(5,  2, KEY_FN)
-+				MATRIX_KEY(5,  3, KEY_LEFTALT)
-+				MATRIX_KEY(5,  5, KEY_RIGHTALT)>;
-+		wakeup-source;
-+	};
- };
- 
- &i2s2 {
-diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
-index 40a070a2e7f5b..0259e9133f469 100644
---- a/drivers/input/keyboard/Kconfig
-+++ b/drivers/input/keyboard/Kconfig
-@@ -353,6 +353,16 @@ config KEYBOARD_HP7XX
- 	  To compile this driver as a module, choose M here: the
- 	  module will be called jornada720_kbd.
- 
-+config KEYBOARD_KB151
-+	tristate "Pine64 KB151 Keyboard"
-+	depends on I2C
-+	select CRC8
-+	select INPUT_MATRIXKMAP
-+	help
-+	  Say Y here to enable support for the KB151 keyboard used in the
-+	  Pine64 PinePhone keyboard case. This driver supports the FLOSS
-+	  firmware available at https://megous.com/git/pinephone-keyboard/
-+
- config KEYBOARD_LM8323
- 	tristate "LM8323 keypad chip"
- 	depends on I2C
-From 2423aac2d6f5db55da99e11fd799ee66fe6f54c6 Mon Sep 17 00:00:00 2001
-From: Samuel Holland <samuel@sholland.org>
-Date: Mon, 9 Aug 2021 19:30:18 -0500
-Subject: [PATCH] Input: kb151 - Add support for the FN layer
-
-Signed-off-by: Samuel Holland <samuel@sholland.org>
----
- .../dts/allwinner/sun50i-a64-pinephone.dtsi   | 34 +++++++++++++++++--
- drivers/input/keyboard/kb151.c                | 33 ++++++++++--------
- 2 files changed, 51 insertions(+), 16 deletions(-)
-
-diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
-index 0bdc6eceec609..68f5730cf164c 100644
---- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
-+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
-@@ -557,7 +557,7 @@
- 		reg = <0x15>;
- 		interrupt-parent = <&r_pio>;
- 		interrupts = <0 12 IRQ_TYPE_EDGE_FALLING>; /* PL12 */
--		keypad,num-rows = <6>;
-+		keypad,num-rows = <12>;
- 		keypad,num-columns = <12>;
- 		linux,keymap = <MATRIX_KEY(0,  0, KEY_ESC)
- 				MATRIX_KEY(0,  1, KEY_1)
-@@ -612,7 +612,37 @@
- 				MATRIX_KEY(4,  9, KEY_LEFTBRACE)
- 				MATRIX_KEY(5,  2, KEY_FN)
- 				MATRIX_KEY(5,  3, KEY_LEFTALT)
--				MATRIX_KEY(5,  5, KEY_RIGHTALT)>;
-+				MATRIX_KEY(5,  5, KEY_RIGHTALT)
-+
-+				/* FN layer */
-+				MATRIX_KEY(6,  1, KEY_BACKSLASH)
-+				MATRIX_KEY(6,  2, KEY_BACKSLASH)
-+				MATRIX_KEY(6,  3, KEY_DOLLAR)
-+				MATRIX_KEY(6,  4, KEY_EURO)
-+				MATRIX_KEY(6,  5, KEY_GRAVE)
-+				MATRIX_KEY(6,  6, KEY_GRAVE)
-+				MATRIX_KEY(6,  7, KEY_MINUS)
-+				MATRIX_KEY(6,  8, KEY_EQUAL)
-+				MATRIX_KEY(6,  9, KEY_MINUS)
-+				MATRIX_KEY(6, 10, KEY_EQUAL)
-+				MATRIX_KEY(6, 11, KEY_DELETE)
-+
-+				MATRIX_KEY(8,  0, KEY_SYSRQ)
-+				MATRIX_KEY(8, 10, KEY_INSERT)
-+
-+				MATRIX_KEY(9,  0, KEY_LEFTSHIFT)
-+				MATRIX_KEY(9,  8, KEY_HOME)
-+				MATRIX_KEY(9,  9, KEY_UP)
-+				MATRIX_KEY(9, 10, KEY_END)
-+
-+				MATRIX_KEY(10, 1, KEY_LEFTCTRL)
-+				MATRIX_KEY(10, 6, KEY_LEFT)
-+				MATRIX_KEY(10, 8, KEY_RIGHT)
-+				MATRIX_KEY(10, 9, KEY_DOWN)
-+
-+				MATRIX_KEY(11, 2, KEY_FN)
-+				MATRIX_KEY(11, 3, KEY_LEFTALT)
-+				MATRIX_KEY(11, 5, KEY_RIGHTALT)>;
- 		wakeup-source;
- 	};
- };
diff --git a/sys-kernel/pinephone-sources/pinephone-sources-5.19.0.ebuild b/sys-kernel/pinephone-sources/pinephone-sources-5.19.12.ebuild
similarity index 71%
rename from sys-kernel/pinephone-sources/pinephone-sources-5.19.0.ebuild
rename to sys-kernel/pinephone-sources/pinephone-sources-5.19.12.ebuild
index 650a9b2..010ff7a 100644
--- a/sys-kernel/pinephone-sources/pinephone-sources-5.19.0.ebuild
+++ b/sys-kernel/pinephone-sources/pinephone-sources-5.19.12.ebuild
@@ -17,7 +17,7 @@ DEPEND="${RDEPEND}
 
 DESCRIPTION="Full sources for the Linux kernel, with megi's patch for pinephone and gentoo patchset"
 
-MEGI_TAG="orange-pi-5.19-20220802-0940"
+MEGI_TAG="orange-pi-5.19-20220909-1622"
 SRC_URI="https://github.com/megous/linux/archive/${MEGI_TAG}.tar.gz"
 
 PATCHES=(
@@ -35,18 +35,20 @@ PATCHES=(
 	${FILESDIR}/5021_BMQ-and-PDS-gentoo-defaults.patch
 
 	#PinePhone Patches
-	${FILESDIR}/0101-arm64-dts-pinephone-drop-modem-power-node.patch
 	${FILESDIR}/0102-arm64-dts-pinephone-pro-remove-modem-node.patch
+	${FILESDIR}/0103-arm64-dts-rk3399-pinephone-pro-add-modem-RI-pin.patch
 	${FILESDIR}/0103-ccu-sun50i-a64-reparent-clocks-to-lower-speed-oscillator.patch
+	${FILESDIR}/0104-PPP-Add-reset-resume-to-usb_wwan.patch
 	${FILESDIR}/0104-quirk-kernel-org-bug-210681-firmware_rome_error.patch
+	${FILESDIR}/0104-Revert-usb-quirks-Add-USB_QUIRK_RESET-for-Quectel-EG25G.patch
+	${FILESDIR}/0104-rk818_charger-use-type-battery-again.patch
 	${FILESDIR}/0105-leds-gpio-make-max_brightness-configurable.patch
-    	${FILESDIR}/0106-panic-led.patch
+	${FILESDIR}/0106-panic-led.patch
+	${FILESDIR}/0106-sound-rockchip-i2s-Dont-disable-mclk-on-suspend.patch
+	${FILESDIR}/0201-revert-fbcon-remove-now-unusued-softback_lines-cursor-argument.patch
+	${FILESDIR}/0202-revert-fbcon-remove-no-op-fbcon_set_origin.patch
+	${FILESDIR}/0203-revert-fbcon-remove-soft-scrollback-code.patch
 
-	# keyboard
-	${FILESDIR}/pp-keyboard.patch
-
-	# LRU
-	${FILESDIR}/Multi-Gen-LRU-Framework.patch
 )
 
 S="${WORKDIR}/linux-${MEGI_TAG}"
@@ -61,18 +63,17 @@ src_prepare() {
 }
 
 pkg_postinst() {
-	kernel-2_pkg_postinst
 	kernel-2_pkg_postinst
 	einfo "To build and install the kernel use the following commands:"
 	einfo "# make Image modules"
 	einfo "# make DTC_FLAGS="-@" dtbs"
 	einfo "# cp arch/arm64/boot/Image /boot"
-	einfo "# make INSTALL_MOD_PATH=/usr modules_install"
+	einfo "# make INSTALL_MOD_PATH=/ modules_intall"
 	einfo "# make INSTALL_DTBS_PATH=/boot/dtbs dtbs_install"
 	einfo "You will need to create and initramfs afterwards."
 	einfo "If you use dracut you can run:"
-	einfo "# dracut -m \"rootfs-block base\" --host-only --kver 5.19.0-gentoo-arm64"
-	einfo "Change 5.19.0-gentoo-arm64 to your kernel version installed in /lib/modules"
+	einfo "# dracut -m \"rootfs-block base\" --host-only --kver 5.19.12-pinehone-gentoo-arm64"
+	einfo "Change 5.19.12-pinehone-gentoo-arm64 to your kernel version installed in /lib/modules"
 }
 
 pkg_postrm() {