Added all pinephone kernels

This commit is contained in:
Gerben Jan Dijkman 2021-12-03 10:03:17 +01:00
parent 98888f9134
commit b996c0d554
122 changed files with 645402 additions and 1443 deletions

View File

@ -1,3 +0,0 @@
DIST anx7688-fw.bin 220344 BLAKE2B ff4f3aa6a01c77c687edc4a8dcef8f32754838354497c5b92ec54564b95ed02e737b411d24d14b79c49b186271536e22b1ef6a65f7eda21ed2b55d4500a3bf6a SHA512 d17155a48ce0916d788f763e5a46e1a3a069970f3ec7d3404bc56a7da89d423682050ba7bf4993b222ace609202bdbddf42a0b4506c4193b03bc324c8cfe1be4
DIST ov5640_af.bin 30768 BLAKE2B cfac7640de622e59cdae83d63ce853805ced6801c007ac124cd23e368fb3a0faae5dec6509b2b3cc6e5fc7fe607a547f1fb68f4c4e33b4f5f8014edd66d25970 SHA512 e5d6493c4b9714ec1b96d07efdfcf14b32aecdc1778993d97e71d7d9458c709211d0673788302a336b99a8de7287efe4df5ccd3bf617608efdf3d326d3b16153
DIST rtl8723cs_xx-fw.bin 169150 BLAKE2B 9a99b21dca6b49acd90dd54cb7146c501f1a4dfc8246587383d5115d1b9400a271888a4cc3129573abce491a9980b1a0b30cc7f4b7ccccdf6866a399238f4aee SHA512 f77fef9729d9f2971458fda84d44cb7bc2c500eda0155f5f00cca7bc2bc85a17671255abdd000b505f82130fcfa99f3be963539f488b72cc7dd06eca46285878

View File

@ -1,8 +0,0 @@
<?xml version='1.0' encoding='UTF-8'?>
<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd">
<pkgmetadata>
<maintainer type="person">
<email>gjdijkman@gjdwebserver.nl</email>
<name>Gerben Jan Dijkman</name>
</maintainer>
</pkgmetadata>

View File

@ -1,27 +0,0 @@
# Copyright 1999-2021 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=7
DESCRIPTION="Firmwares files for PinePhone"
HOMEPAGE="https://xff.cz/git/linux-firmware"
SRC_URI="https://xff.cz/git/linux-firmware/tree/ov5640_af.bin?id=4ec2645b007ba4c3f2962e38b50c06f274abbf7c -> ov5640_af.bin
https://xff.cz/git/linux-firmware/tree/anx7688-fw.bin?id=4ec2645b007ba4c3f2962e38b50c06f274abbf7c -> anx7688-fw.bin
https://xff.cz/git/linux-firmware/tree/rtl_bt/rtl8723cs_xx_fw.bin?id=4ec2645b007ba4c3f2962e38b50c06f274abbf7c -> rtl8723cs_xx-fw.bin
"
LICENSE="linux-fw-redistributable no-source-code"
SLOT="0"
KEYWORDS="~amd64 ~arm64"
S="${WORKDIR}"
src_install() {
mkdir -p "${D}"/lib/firmware/ || die
mkdir -p "${D}"/lib/firmware/rtl_bt || die
insinto /lib/firmware/
doins "${DISTDIR}"/anx7688-fw.bin
doins "${DISTDIR}"/ov5640_af.bin
insinto /lib/firmware/rtl_bt
doins "${DISTDIR}"/rtl8723cs_xx-fw.bin
}

View File

@ -1,5 +1,17 @@
DIST all-5.13.5.patch 15071574 BLAKE2B f0b44888b216a60bb12a920a170ffb8ee705e357b82b0cacd58551e2d0e257c0f4419c34976263dc062335bb37f4b3a7418f3d9674e601fd8adda88bacad97d6 SHA512 046f42a5c8fe6477cdda82f47a07093ea51cf26b231b1c58230885954b7ecab9faa9eb72ac3c0cb1603dd6ca2b5b0d76421de6d2c3c05a0bee3ca6e080bfa084
DIST all-5.15.0.patch 15521358 BLAKE2B 3979612d6c0b949cd1cc3a915a590d91ade30913b3daf9bbf3794a58c2139abf46e407ddee6641308d0265ebbeb675b051697a3cc18ec7b028cd7da9e06748b6 SHA512 3bcb9774c126fb704c028775856c5ba92760c313ade8301e99e46fed10a0e3848b5982d4ba4d989ed7f4a29a2e2d9c679105ea108719e87f18307e6f636a3a3c
DIST all-5.11.18.patch 15061000 BLAKE2B aea03c5307da5417db752879318f64f9fedd9a4113d96c466c45f6a85fd357b1dcf57403dd16b849654a0ddabbf5305e45d842d5b8953034a084c69f822d97cd SHA512 ce2bf9bb95d5e0392839a4a46d3cd9e148d110f2c498afc2c285b7f61868c0a7da4e652476004362e3bb80eb943968df00112aa3d27ae0c3fb7d9977d30babc1
DIST all-5.12.17.patch 15073524 BLAKE2B bd085b619a3bc1190cbfdc6bfd74cd6606f67f55106f89e0522619aaf69b672d68a7e93d843d7a4680529532e654d71becf4a7378d453fcc72978ee24d415b57 SHA512 e4bae86407767b685b3ec47f383d2db44281fe6a58914e4a117449c2ff96fe3c6341a7b59d1a92790c1565ecc3c14dacbbdc96fbc1b718ec7402c27c2dc37d0d
DIST all-5.13.13.patch 15071574 BLAKE2B 539e30aac6e465c7e402fdd2d705fe187411f2a92ca84e8dfe1407d0913a5b137a26134cb3364fb4c0df55aaf233ba7ec39eb2b258b8af7926c37530a4dbab07 SHA512 63b7a2845eb8e5387d5085403fa8051240d7760652790133f73d491fed061ee213d5ceb6f3a2e77d93d2caebd205545c3fd96bc85d8c1b5fcb19b8ae622e2b66
DIST all-5.14.17.patch 15283595 BLAKE2B d4e8635727bde7c61a13261be96fc2b0d4bf6acd48f6756d79e6caf193a0f05e6a5fc298933ca15906640cb371c79e35103436ae16590d9336f8ebd81b9b7c8e SHA512 ab87a6ec0fca1357cef3c84b7e0a180303eec968661143928c522aff3771e21c474aa568fef1d276c69ce0232e349c9b6732173517e8aab43c43db04cb0e96df
DIST all-5.15.1.patch 15521358 BLAKE2B 3979612d6c0b949cd1cc3a915a590d91ade30913b3daf9bbf3794a58c2139abf46e407ddee6641308d0265ebbeb675b051697a3cc18ec7b028cd7da9e06748b6 SHA512 3bcb9774c126fb704c028775856c5ba92760c313ade8301e99e46fed10a0e3848b5982d4ba4d989ed7f4a29a2e2d9c679105ea108719e87f18307e6f636a3a3c
DIST all-5.15.2.patch 15521358 BLAKE2B 3979612d6c0b949cd1cc3a915a590d91ade30913b3daf9bbf3794a58c2139abf46e407ddee6641308d0265ebbeb675b051697a3cc18ec7b028cd7da9e06748b6 SHA512 3bcb9774c126fb704c028775856c5ba92760c313ade8301e99e46fed10a0e3848b5982d4ba4d989ed7f4a29a2e2d9c679105ea108719e87f18307e6f636a3a3c
DIST linux-5.11.tar.xz 117619104 BLAKE2B 81300c27bd5476387a83123aaeb4163c73eb61e9245806c23660cb5e6a4fa88ffc9def027031335fa0270fc4080506cd415990014364e3a98b9d2e8c58a29524 SHA512 a567ec133018bb5ec00c60281479b466c26e02137a93a9c690e83997947df02b6fd94e76e8df748f6d70ceb58a19bacc3b1467de10b7a1fad2763db32b3f1330
DIST linux-5.12.tar.xz 118112412 BLAKE2B 842d921b9a73d2aaade763dbd2ec67bdfe0275baa6d628b775f5c87574ad7dc86f0419afcd48c10c1235f4bffa16084243f2cf4556e6afcd391e975fe8ba530b SHA512 be03b6fee1d1ea8087b09874d27c0a602c0b04fd90ad38b975bd2c8455a07e83c29b56814aaf1389e82305fae0e4c2d1701075a7f0a7295dd28149f967ec5b3d
DIST linux-5.13.tar.xz 119297284 BLAKE2B 9c4c12e2394dec064adff51f7ccdf389192eb27ba7906db5eda543afe3d04afca6b9ea0848a057571bf2534eeb98e1e3a67734deff82c0d3731be205ad995668 SHA512 a8edf97e9d38a49f1be2bde1e29ad96274bb2c6f7e8a2bebaa1161dd4df9cabcbaec4ff644c45bee94f86ae47725087d6deed0cd954209cec717621d137db85e
DIST linux-5.14.tar.xz 120669872 BLAKE2B 0047f5aaa3940dff97f4055ef544faafbbb5282128e6afe21d2f47d8dc8c395806a17016febfa050117d16f59e74b882cb8b9c5011d68f119c230d0a4d120524 SHA512 8e4f3ec3d36f774280f75dc7b004a43e09417af58f12e9c9f8348976659d4cfda7ad905f306f43fed66a27922e5c45db22e46bbfa7a0b9f365012380de3b6f64
DIST linux-5.15.tar.xz 121913744 BLAKE2B 3921274b23f7938abdf3ed9334534b4581e13d7484303d3a5280eddb038999aaa8b836666a487472d9c4a219af0f06b9fecccaf348fb5510ab8762f4ef4b7e83 SHA512 d25ad40b5bcd6a4c6042fd0fd84e196e7a58024734c3e9a484fd0d5d54a0c1d87db8a3c784eff55e43b6f021709dc685eb0efa18d2aec327e4f88a79f405705a
DIST patch-5.13.5.xz 473120 BLAKE2B a0dd9f3f972a16de87f0d2d8daa7f5d35b27314d22597a28f471cdbe6cedfa7d4bf69e41504d6a9b9d4c1f085146604394747771185dd0a09276cfd92820b4a8 SHA512 1e4eb575775ccbc2e88b34b902a75562e49d6dfb4699dadd5b41fff9db8c2bc994d946d1e60f6320f48ef233aa721d3725582d4ec57458f2293da9a85806c7b1
DIST patch-5.11.18.xz 651148 BLAKE2B d09300b9e6da7fe55ebdd5f28179671158e6f50ed96924a74294f3c2fbdc1a385b9ef18d4894be0daaa3bd55738082d5101b35823e9ccf486d46a7be8c39abb6 SHA512 a987b45276d7c78df4ed2414b1b13db9f87b0dc958fd30087b9580eb6f6bf0e8b2fdcdfe1569014cc785582e01b5680c907628c39b92190fd4f2fcbb44ee5cac
DIST patch-5.12.17.xz 889940 BLAKE2B 78c10887c9c28261f5a99c3ed19c9390d8af92ee49dd4dd13092493e71c446fa58b9025eecb3ea8f605f1c3644c5ef85a25c056a4fbe143eb91f49eb9b2b866b SHA512 1c42b4efa3d26fb4bdc96c422a32646b2d9018cb84d231ccae4c64e1cdd342ed43828c6da3eb40cbe80550734254b8e824cec98cb1635d47baef75bae37bfd31
DIST patch-5.13.13.xz 688948 BLAKE2B a4cb958a472a53fddbd7e82117cb929acc614c5d50c08a4f2de7a209bfb2da9a5978e551694afa363e3f521f467b9d5f1cbbc77832df86bbcd624e8490ff7bea SHA512 5d311d74c30da6fcd8057a372fb467bf646888ab9c6d36144c1f0a68c2910b1e1315dc44ad1a7b7a8c82d03e109483e72d454f7a217e64a27b5df9124204be77
DIST patch-5.14.17.xz 606140 BLAKE2B dec4a45cbd9232860f91797559a736a33f1f448ebeba6edd7282e01f9e38c412e740375d25acd44bced992b04316d2ec67eea4bf3711e762b1d06fde32db6b7c SHA512 841978b7ced5c314593999f31200c89da14dbd4f61c7f2c2edc4a5ac1b296d5ef932710a7f5c767d14fdcae6f1566b7eb92dfbdf486c924b648ab898d877abae
DIST patch-5.15.1.xz 5736 BLAKE2B 0f45917dd16d683c1b4a316c7205f24e4d037477a0fe54a153a3d05d5ff390585d8e35e8588359a0a3745b08daa1752701e33b5986dc3445c9598270e5635db2 SHA512 8f7faca2e0e5c755b052b65b2aa46fee0317c915ec2475e97371d5da5e3adfa943397108fa40ebbac581254933f4ffdc6875306120c8c208561556fdd8bfb4ff
DIST patch-5.15.2.xz 17244 BLAKE2B 769ef83b6613d865b420d048c25ac1df4c2f88f7ae580b373f874d312720bad877e561756943c9833535a94e5621922bba24cb1b804a1540f2e67cfa23f1a1aa SHA512 5f0123bdc7c9875e7b3f02a89496a8a1e0808d77dc58fb725e250d93d69510a1ef6462cfb38cb38e78e20ca34fd7446f58327cad5e67fc68ec36d15777048edf

View File

@ -0,0 +1,40 @@
From 7045054c96224ead00aae09246f475dfe6202def Mon Sep 17 00:00:00 2001
From: Danct12 <danct12@disroot.org>
Date: Tue, 19 Jan 2021 10:09:01 +0700
Subject: [PATCH] arm64: dts: allwinner: pinephone: stop LEDs on suspend
Signed-off-by: Danct12 <danct12@disroot.org>
---
arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi | 3 ---
1 file changed, 3 deletions(-)
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
index 02d82980c..00ed866ae 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
@@ -218,14 +218,12 @@
function = LED_FUNCTION_INDICATOR;
color = <LED_COLOR_ID_BLUE>;
gpios = <&pio 3 20 GPIO_ACTIVE_HIGH>; /* PD20 */
- retain-state-suspended;
};
green {
function = LED_FUNCTION_INDICATOR;
color = <LED_COLOR_ID_GREEN>;
gpios = <&pio 3 18 GPIO_ACTIVE_HIGH>; /* PD18 */
- retain-state-suspended;
};
red {
@@ -233,7 +231,6 @@
function = LED_FUNCTION_INDICATOR;
color = <LED_COLOR_ID_RED>;
gpios = <&pio 3 19 GPIO_ACTIVE_HIGH>; /* PD19 */
- retain-state-suspended;
};
};
--
2.30.0

View File

@ -0,0 +1,746 @@
diff --git a/MAINTAINERS b/MAINTAINERS
index a74227ad082e..b5633b56391e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2705,6 +2705,14 @@ S: Supported
F: drivers/net/bonding/
F: include/uapi/linux/if_bonding.h
+BOOTSPLASH
+M: Max Staudt <mstaudt@suse.de>
+L: linux-fbdev@vger.kernel.org
+S: Maintained
+F: drivers/video/fbdev/core/bootsplash*.*
+F: drivers/video/fbdev/core/dummycon.c
+F: include/linux/bootsplash.h
+
BPF (Safe dynamic programs and tools)
M: Alexei Starovoitov <ast@kernel.org>
M: Daniel Borkmann <daniel@iogearbox.net>
diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig
index 7f1f1fbcef9e..f3ff976266fe 100644
--- a/drivers/video/console/Kconfig
+++ b/drivers/video/console/Kconfig
@@ -151,6 +151,30 @@ config FRAMEBUFFER_CONSOLE_ROTATION
such that other users of the framebuffer will remain normally
oriented.
+config BOOTSPLASH
+ bool "Bootup splash screen"
+ depends on FRAMEBUFFER_CONSOLE
+ help
+ This option enables the Linux bootsplash screen.
+
+ The bootsplash is a full-screen logo or animation indicating a
+ booting system. It replaces the classic scrolling text with a
+ graphical alternative, similar to other systems.
+
+ Since this is technically implemented as a hook on top of fbcon,
+ it can only work if the FRAMEBUFFER_CONSOLE is enabled and a
+ framebuffer driver is active. Thus, to get a text-free boot,
+ the system needs to boot with vesafb, efifb, or similar.
+
+ Once built into the kernel, the bootsplash needs to be enabled
+ with bootsplash.enabled=1 and a splash file needs to be supplied.
+
+ Further documentation can be found in:
+ Documentation/fb/bootsplash.txt
+
+ If unsure, say N.
+ This is typically used by distributors and system integrators.
+
config STI_CONSOLE
bool "STI text console"
depends on PARISC
diff --git a/drivers/video/fbdev/core/Makefile b/drivers/video/fbdev/core/Makefile
index 73493bbd7a15..66895321928e 100644
--- a/drivers/video/fbdev/core/Makefile
+++ b/drivers/video/fbdev/core/Makefile
@@ -29,3 +29,6 @@ obj-$(CONFIG_FB_SYS_IMAGEBLIT) += sysimgblt.o
obj-$(CONFIG_FB_SYS_FOPS) += fb_sys_fops.o
obj-$(CONFIG_FB_SVGALIB) += svgalib.o
obj-$(CONFIG_FB_DDC) += fb_ddc.o
+
+obj-$(CONFIG_BOOTSPLASH) += bootsplash.o bootsplash_render.o \
+ dummyblit.o
diff --git a/drivers/video/fbdev/core/bootsplash.c b/drivers/video/fbdev/core/bootsplash.c
new file mode 100644
index 000000000000..e449755af268
--- /dev/null
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -0,0 +1,294 @@
+/*
+ * Kernel based bootsplash.
+ *
+ * (Main file: Glue code, workers, timer, PM, kernel and userland API)
+ *
+ * Authors:
+ * Max Staudt <mstaudt@suse.de>
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#define pr_fmt(fmt) "bootsplash: " fmt
+
+
+#include <linux/atomic.h>
+#include <linux/bootsplash.h>
+#include <linux/console.h>
+#include <linux/device.h> /* dev_warn() */
+#include <linux/fb.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/selection.h> /* console_blanked */
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+#include <linux/vt_kern.h>
+#include <linux/workqueue.h>
+
+#include "bootsplash_internal.h"
+
+
+/*
+ * We only have one splash screen, so let's keep a single
+ * instance of the internal state.
+ */
+static struct splash_priv splash_state;
+
+
+static void splash_callback_redraw_vc(struct work_struct *ignored)
+{
+ if (console_blanked)
+ return;
+
+ console_lock();
+ if (vc_cons[fg_console].d)
+ update_screen(vc_cons[fg_console].d);
+ console_unlock();
+}
+
+
+static bool is_fb_compatible(const struct fb_info *info)
+{
+ if (!(info->flags & FBINFO_BE_MATH)
+ != !fb_be_math((struct fb_info *)info)) {
+ dev_warn(info->device,
+ "Can't draw on foreign endianness framebuffer.\n");
+
+ return false;
+ }
+
+ if (info->flags & FBINFO_MISC_TILEBLITTING) {
+ dev_warn(info->device,
+ "Can't draw splash on tiling framebuffer.\n");
+
+ return false;
+ }
+
+ if (info->fix.type != FB_TYPE_PACKED_PIXELS
+ || (info->fix.visual != FB_VISUAL_TRUECOLOR
+ && info->fix.visual != FB_VISUAL_DIRECTCOLOR)) {
+ dev_warn(info->device,
+ "Can't draw splash on non-packed or non-truecolor framebuffer.\n");
+
+ dev_warn(info->device,
+ " type: %u visual: %u\n",
+ info->fix.type, info->fix.visual);
+
+ return false;
+ }
+
+ if (info->var.bits_per_pixel != 16
+ && info->var.bits_per_pixel != 24
+ && info->var.bits_per_pixel != 32) {
+ dev_warn(info->device,
+ "We only support drawing on framebuffers with 16, 24, or 32 bpp, not %d.\n",
+ info->var.bits_per_pixel);
+
+ return false;
+ }
+
+ return true;
+}
+
+
+/*
+ * Called by fbcon_switch() when an instance is activated or refreshed.
+ */
+void bootsplash_render_full(struct fb_info *info)
+{
+ if (!is_fb_compatible(info))
+ return;
+
+ bootsplash_do_render_background(info);
+}
+
+
+/*
+ * External status enquiry and on/off switch
+ */
+bool bootsplash_would_render_now(void)
+{
+ return !oops_in_progress
+ && !console_blanked
+ && bootsplash_is_enabled();
+}
+
+bool bootsplash_is_enabled(void)
+{
+ bool was_enabled;
+
+ /* Make sure we have the newest state */
+ smp_rmb();
+
+ was_enabled = test_bit(0, &splash_state.enabled);
+
+ return was_enabled;
+}
+
+void bootsplash_disable(void)
+{
+ int was_enabled;
+
+ was_enabled = test_and_clear_bit(0, &splash_state.enabled);
+
+ if (was_enabled) {
+ if (oops_in_progress) {
+ /* Redraw screen now so we can see a panic */
+ if (vc_cons[fg_console].d)
+ update_screen(vc_cons[fg_console].d);
+ } else {
+ /* No urgency, redraw at next opportunity */
+ schedule_work(&splash_state.work_redraw_vc);
+ }
+ }
+}
+
+void bootsplash_enable(void)
+{
+ bool was_enabled;
+
+ if (oops_in_progress)
+ return;
+
+ was_enabled = test_and_set_bit(0, &splash_state.enabled);
+
+ if (!was_enabled)
+ schedule_work(&splash_state.work_redraw_vc);
+}
+
+
+/*
+ * Userland API via platform device in sysfs
+ */
+static ssize_t splash_show_enabled(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", bootsplash_is_enabled());
+}
+
+static ssize_t splash_store_enabled(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ bool enable;
+ int err;
+
+ if (!buf || !count)
+ return -EFAULT;
+
+ err = kstrtobool(buf, &enable);
+ if (err)
+ return err;
+
+ if (enable)
+ bootsplash_enable();
+ else
+ bootsplash_disable();
+
+ return count;
+}
+
+static DEVICE_ATTR(enabled, 0644, splash_show_enabled, splash_store_enabled);
+
+
+static struct attribute *splash_dev_attrs[] = {
+ &dev_attr_enabled.attr,
+ NULL
+};
+
+ATTRIBUTE_GROUPS(splash_dev);
+
+
+
+
+/*
+ * Power management fixup via platform device
+ *
+ * When the system is woken from sleep or restored after hibernating, we
+ * cannot expect the screen contents to still be present in video RAM.
+ * Thus, we have to redraw the splash if we're currently active.
+ */
+static int splash_resume(struct device *device)
+{
+ if (bootsplash_would_render_now())
+ schedule_work(&splash_state.work_redraw_vc);
+
+ return 0;
+}
+
+static int splash_suspend(struct device *device)
+{
+ cancel_work_sync(&splash_state.work_redraw_vc);
+
+ return 0;
+}
+
+
+static const struct dev_pm_ops splash_pm_ops = {
+ .thaw = splash_resume,
+ .restore = splash_resume,
+ .resume = splash_resume,
+ .suspend = splash_suspend,
+ .freeze = splash_suspend,
+};
+
+static struct platform_driver splash_driver = {
+ .driver = {
+ .name = "bootsplash",
+ .pm = &splash_pm_ops,
+ },
+};
+
+
+/*
+ * Main init
+ */
+void bootsplash_init(void)
+{
+ int ret;
+
+ /* Initialized already? */
+ if (splash_state.splash_device)
+ return;
+
+
+ /* Register platform device to export user API */
+ ret = platform_driver_register(&splash_driver);
+ if (ret) {
+ pr_err("platform_driver_register() failed: %d\n", ret);
+ goto err;
+ }
+
+ splash_state.splash_device
+ = platform_device_alloc("bootsplash", 0);
+
+ if (!splash_state.splash_device)
+ goto err_driver;
+
+ splash_state.splash_device->dev.groups = splash_dev_groups;
+
+ ret = platform_device_add(splash_state.splash_device);
+ if (ret) {
+ pr_err("platform_device_add() failed: %d\n", ret);
+ goto err_device;
+ }
+
+
+ INIT_WORK(&splash_state.work_redraw_vc, splash_callback_redraw_vc);
+
+ return;
+
+err_device:
+ platform_device_put(splash_state.splash_device);
+ splash_state.splash_device = NULL;
+err_driver:
+ platform_driver_unregister(&splash_driver);
+err:
+ pr_err("Failed to initialize.\n");
+}
diff --git a/drivers/video/fbdev/core/bootsplash_internal.h b/drivers/video/fbdev/core/bootsplash_internal.h
new file mode 100644
index 000000000000..b11da5cb90bf
--- /dev/null
+++ b/drivers/video/fbdev/core/bootsplash_internal.h
@@ -0,0 +1,55 @@
+/*
+ * Kernel based bootsplash.
+ *
+ * (Internal data structures used at runtime)
+ *
+ * Authors:
+ * Max Staudt <mstaudt@suse.de>
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#ifndef __BOOTSPLASH_INTERNAL_H
+#define __BOOTSPLASH_INTERNAL_H
+
+
+#include <linux/types.h>
+#include <linux/fb.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+
+
+/*
+ * Runtime types
+ */
+struct splash_priv {
+ /*
+ * Enabled/disabled state, to be used with atomic bit operations.
+ * Bit 0: 0 = Splash hidden
+ * 1 = Splash shown
+ *
+ * Note: fbcon.c uses this twice, by calling
+ * bootsplash_would_render_now() in set_blitting_type() and
+ * in fbcon_switch().
+ * This is racy, but eventually consistent: Turning the
+ * splash on/off will cause a redraw, which calls
+ * fbcon_switch(), which calls set_blitting_type().
+ * So the last on/off toggle will make things consistent.
+ */
+ unsigned long enabled;
+
+ /* Our gateway to userland via sysfs */
+ struct platform_device *splash_device;
+
+ struct work_struct work_redraw_vc;
+};
+
+
+
+/*
+ * Rendering functions
+ */
+void bootsplash_do_render_background(struct fb_info *info);
+
+#endif
diff --git a/drivers/video/fbdev/core/bootsplash_render.c b/drivers/video/fbdev/core/bootsplash_render.c
new file mode 100644
index 000000000000..4d7e0117f653
--- /dev/null
+++ b/drivers/video/fbdev/core/bootsplash_render.c
@@ -0,0 +1,93 @@
+/*
+ * Kernel based bootsplash.
+ *
+ * (Rendering functions)
+ *
+ * Authors:
+ * Max Staudt <mstaudt@suse.de>
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#define pr_fmt(fmt) "bootsplash: " fmt
+
+
+#include <linux/bootsplash.h>
+#include <linux/fb.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+
+#include "bootsplash_internal.h"
+
+
+
+
+/*
+ * Rendering: Internal drawing routines
+ */
+
+
+/*
+ * Pack pixel into target format and do Big/Little Endian handling.
+ * This would be a good place to handle endianness conversion if necessary.
+ */
+static inline u32 pack_pixel(const struct fb_var_screeninfo *dst_var,
+ u8 red, u8 green, u8 blue)
+{
+ u32 dstpix;
+
+ /* Quantize pixel */
+ red = red >> (8 - dst_var->red.length);
+ green = green >> (8 - dst_var->green.length);
+ blue = blue >> (8 - dst_var->blue.length);
+
+ /* Pack pixel */
+ dstpix = red << (dst_var->red.offset)
+ | green << (dst_var->green.offset)
+ | blue << (dst_var->blue.offset);
+
+ /*
+ * Move packed pixel to the beginning of the memory cell,
+ * so we can memcpy() it out easily
+ */
+#ifdef __BIG_ENDIAN
+ switch (dst_var->bits_per_pixel) {
+ case 16:
+ dstpix <<= 16;
+ break;
+ case 24:
+ dstpix <<= 8;
+ break;
+ case 32:
+ break;
+ }
+#else
+ /* This is intrinsically unnecessary on Little Endian */
+#endif
+
+ return dstpix;
+}
+
+
+void bootsplash_do_render_background(struct fb_info *info)
+{
+ unsigned int x, y;
+ u32 dstpix;
+ u32 dst_octpp = info->var.bits_per_pixel / 8;
+
+ dstpix = pack_pixel(&info->var,
+ 0,
+ 0,
+ 0);
+
+ for (y = 0; y < info->var.yres_virtual; y++) {
+ u8 *dstline = info->screen_buffer + (y * info->fix.line_length);
+
+ for (x = 0; x < info->var.xres_virtual; x++) {
+ memcpy(dstline, &dstpix, dst_octpp);
+
+ dstline += dst_octpp;
+ }
+ }
+}
diff --git a/drivers/video/fbdev/core/dummyblit.c b/drivers/video/fbdev/core/dummyblit.c
new file mode 100644
index 000000000000..8c22ff92ce24
--- /dev/null
+++ b/drivers/video/fbdev/core/dummyblit.c
@@ -0,0 +1,89 @@
+/*
+ * linux/drivers/video/fbdev/core/dummyblit.c -- Dummy Blitting Operation
+ *
+ * Authors:
+ * Max Staudt <mstaudt@suse.de>
+ *
+ * These functions are used in place of blitblit/tileblit to suppress
+ * fbcon's text output while a splash is shown.
+ *
+ * Only suppressing actual rendering keeps the text buffer in the VC layer
+ * intact and makes it easy to switch back from the bootsplash to a full
+ * text console with a simple redraw (with the original functions in place).
+ *
+ * Based on linux/drivers/video/fbdev/core/bitblit.c
+ * and linux/drivers/video/fbdev/core/tileblit.c
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#include <linux/module.h>
+#include <linux/fb.h>
+#include <linux/vt_kern.h>
+#include <linux/console.h>
+#include <asm/types.h>
+#include "fbcon.h"
+
+static void dummy_bmove(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int dy, int dx, int height, int width)
+{
+ ;
+}
+
+static void dummy_clear(struct vc_data *vc, struct fb_info *info, int sy,
+ int sx, int height, int width)
+{
+ ;
+}
+
+static void dummy_putcs(struct vc_data *vc, struct fb_info *info,
+ const unsigned short *s, int count, int yy, int xx,
+ int fg, int bg)
+{
+ ;
+}
+
+static void dummy_clear_margins(struct vc_data *vc, struct fb_info *info,
+ int color, int bottom_only)
+{
+ ;
+}
+
+static void dummy_cursor(struct vc_data *vc, struct fb_info *info, int mode,
+ int softback_lines, int fg, int bg)
+{
+ ;
+}
+
+static int dummy_update_start(struct fb_info *info)
+{
+ /*
+ * Copied from bitblit.c and tileblit.c
+ *
+ * As of Linux 4.12, nobody seems to care about our return value.
+ */
+ struct fbcon_ops *ops = info->fbcon_par;
+ int err;
+
+ err = fb_pan_display(info, &ops->var);
+ ops->var.xoffset = info->var.xoffset;
+ ops->var.yoffset = info->var.yoffset;
+ ops->var.vmode = info->var.vmode;
+ return err;
+}
+
+void fbcon_set_dummyops(struct fbcon_ops *ops)
+{
+ ops->bmove = dummy_bmove;
+ ops->clear = dummy_clear;
+ ops->putcs = dummy_putcs;
+ ops->clear_margins = dummy_clear_margins;
+ ops->cursor = dummy_cursor;
+ ops->update_start = dummy_update_start;
+ ops->rotate_font = NULL;
+}
+EXPORT_SYMBOL_GPL(fbcon_set_dummyops);
+
+MODULE_AUTHOR("Max Staudt <mstaudt@suse.de>");
+MODULE_DESCRIPTION("Dummy Blitting Operation");
+MODULE_LICENSE("GPL");
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 04612f938bab..9a39a6fcfe98 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -80,6 +80,7 @@
#include <asm/irq.h>
#include "fbcon.h"
+#include <linux/bootsplash.h>
#ifdef FBCONDEBUG
# define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt, __func__ , ## args)
@@ -542,6 +543,8 @@ static int do_fbcon_takeover(int show_logo)
for (i = first_fb_vc; i <= last_fb_vc; i++)
con2fb_map[i] = info_idx;
+ bootsplash_init();
+
err = do_take_over_console(&fb_con, first_fb_vc, last_fb_vc,
fbcon_is_default);
@@ -661,6 +664,9 @@ static void set_blitting_type(struct vc_data *vc, struct fb_info *info)
else {
fbcon_set_rotation(info);
fbcon_set_bitops(ops);
+
+ if (bootsplash_would_render_now())
+ fbcon_set_dummyops(ops);
}
}
@@ -683,6 +689,19 @@ static void set_blitting_type(struct vc_data *vc, struct fb_info *info)
ops->p = &fb_display[vc->vc_num];
fbcon_set_rotation(info);
fbcon_set_bitops(ops);
+
+ /*
+ * Note:
+ * This is *eventually correct*.
+ * Setting the fbcon operations and drawing the splash happen at
+ * different points in time. If the splash is enabled/disabled
+ * in between, then bootsplash_{en,dis}able will schedule a
+ * redraw, which will again render the splash (or not) and set
+ * the correct fbcon ops.
+ * The last run will then be the right one.
+ */
+ if (bootsplash_would_render_now())
+ fbcon_set_dummyops(ops);
}
static int fbcon_invalid_charcount(struct fb_info *info, unsigned charcount)
@@ -2184,6 +2203,9 @@ static int fbcon_switch(struct vc_data *vc)
info = registered_fb[con2fb_map[vc->vc_num]];
ops = info->fbcon_par;
+ if (bootsplash_would_render_now())
+ bootsplash_render_full(info);
+
if (softback_top) {
if (softback_lines)
fbcon_set_origin(vc);
diff --git a/drivers/video/fbdev/core/fbcon.h b/drivers/video/fbdev/core/fbcon.h
index 18f3ac144237..45f94347fe5e 100644
--- a/drivers/video/fbdev/core/fbcon.h
+++ b/drivers/video/fbdev/core/fbcon.h
@@ -214,6 +214,11 @@ static inline int attr_col_ec(int shift, struct vc_data *vc,
#define SCROLL_REDRAW 0x004
#define SCROLL_PAN_REDRAW 0x005
+#ifdef CONFIG_BOOTSPLASH
+extern void fbcon_set_dummyops(struct fbcon_ops *ops);
+#else /* CONFIG_BOOTSPLASH */
+#define fbcon_set_dummyops(x)
+#endif /* CONFIG_BOOTSPLASH */
#ifdef CONFIG_FB_TILEBLITTING
extern void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info);
#endif
diff --git a/include/linux/bootsplash.h b/include/linux/bootsplash.h
new file mode 100644
index 000000000000..c6dd0b43180d
--- /dev/null
+++ b/include/linux/bootsplash.h
@@ -0,0 +1,43 @@
+/*
+ * Kernel based bootsplash.
+ *
+ * Authors:
+ * Max Staudt <mstaudt@suse.de>
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#ifndef __LINUX_BOOTSPLASH_H
+#define __LINUX_BOOTSPLASH_H
+
+#include <linux/fb.h>
+
+
+#ifdef CONFIG_BOOTSPLASH
+
+extern void bootsplash_render_full(struct fb_info *info);
+
+extern bool bootsplash_would_render_now(void);
+
+extern bool bootsplash_is_enabled(void);
+extern void bootsplash_disable(void);
+extern void bootsplash_enable(void);
+
+extern void bootsplash_init(void);
+
+#else /* CONFIG_BOOTSPLASH */
+
+#define bootsplash_render_full(x)
+
+#define bootsplash_would_render_now() (false)
+
+#define bootsplash_is_enabled() (false)
+#define bootsplash_disable()
+#define bootsplash_enable()
+
+#define bootsplash_init()
+
+#endif /* CONFIG_BOOTSPLASH */
+
+
+#endif

View File

@ -0,0 +1,24 @@
From 22008251d617054271a65f29178e2df74dd3e33c Mon Sep 17 00:00:00 2001
From: Bhushan Shah <bshah@kde.org>
Date: Fri, 9 Apr 2021 16:22:49 +0530
Subject: [PATCH 1/5] drivers/usb: add reset_resume callback
---
drivers/usb/serial/option.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index c6969ca728390..21aeb7dc8f6ee 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -2105,6 +2105,7 @@ static struct usb_serial_driver option_1port_device = {
#ifdef CONFIG_PM
.suspend = usb_wwan_suspend,
.resume = usb_wwan_resume,
+ .reset_resume = usb_wwan_resume,
#endif
};
--
2.31.1

View File

@ -0,0 +1,150 @@
--- b/drivers/video/fbdev/core/bitblit.c
+++ a/drivers/video/fbdev/core/bitblit.c
@@ -234,7 +234,7 @@
}
static void bit_cursor(struct vc_data *vc, struct fb_info *info, int mode,
+ int softback_lines, int fg, int bg)
- int fg, int bg)
{
struct fb_cursor cursor;
struct fbcon_ops *ops = info->fbcon_par;
@@ -247,6 +247,15 @@
cursor.set = 0;
+ if (softback_lines) {
+ if (y + softback_lines >= vc->vc_rows) {
+ mode = CM_ERASE;
+ ops->cursor_flash = 0;
+ return;
+ } else
+ y += softback_lines;
+ }
+
c = scr_readw((u16 *) vc->vc_pos);
attribute = get_attribute(info, c);
src = vc->vc_font.data + ((c & charmask) * (w * vc->vc_font.height));
--- b/drivers/video/fbdev/core/fbcon.c
+++ a/drivers/video/fbdev/core/fbcon.c
@@ -394,7 +394,7 @@
c = scr_readw((u16 *) vc->vc_pos);
mode = (!ops->cursor_flash || ops->cursor_state.enable) ?
CM_ERASE : CM_DRAW;
+ ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1),
- ops->cursor(vc, info, mode, get_color(vc, info, c, 1),
get_color(vc, info, c, 0));
console_unlock();
}
@@ -1345,7 +1345,7 @@
ops->cursor_flash = (mode == CM_ERASE) ? 0 : 1;
+ ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1),
- ops->cursor(vc, info, mode, get_color(vc, info, c, 1),
get_color(vc, info, c, 0));
}
--- b/drivers/video/fbdev/core/fbcon.h
+++ a/drivers/video/fbdev/core/fbcon.h
@@ -62,7 +62,7 @@
void (*clear_margins)(struct vc_data *vc, struct fb_info *info,
int color, int bottom_only);
void (*cursor)(struct vc_data *vc, struct fb_info *info, int mode,
+ int softback_lines, int fg, int bg);
- int fg, int bg);
int (*update_start)(struct fb_info *info);
int (*rotate_font)(struct fb_info *info, struct vc_data *vc);
struct fb_var_screeninfo var; /* copy of the current fb_var_screeninfo */
--- b/drivers/video/fbdev/core/fbcon_ccw.c
+++ a/drivers/video/fbdev/core/fbcon_ccw.c
@@ -219,7 +219,7 @@
}
static void ccw_cursor(struct vc_data *vc, struct fb_info *info, int mode,
+ int softback_lines, int fg, int bg)
- int fg, int bg)
{
struct fb_cursor cursor;
struct fbcon_ops *ops = info->fbcon_par;
@@ -236,6 +236,15 @@
cursor.set = 0;
+ if (softback_lines) {
+ if (y + softback_lines >= vc->vc_rows) {
+ mode = CM_ERASE;
+ ops->cursor_flash = 0;
+ return;
+ } else
+ y += softback_lines;
+ }
+
c = scr_readw((u16 *) vc->vc_pos);
attribute = get_attribute(info, c);
src = ops->fontbuffer + ((c & charmask) * (w * vc->vc_font.width));
--- b/drivers/video/fbdev/core/fbcon_cw.c
+++ a/drivers/video/fbdev/core/fbcon_cw.c
@@ -202,7 +202,7 @@
}
static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode,
+ int softback_lines, int fg, int bg)
- int fg, int bg)
{
struct fb_cursor cursor;
struct fbcon_ops *ops = info->fbcon_par;
@@ -219,6 +219,15 @@
cursor.set = 0;
+ if (softback_lines) {
+ if (y + softback_lines >= vc->vc_rows) {
+ mode = CM_ERASE;
+ ops->cursor_flash = 0;
+ return;
+ } else
+ y += softback_lines;
+ }
+
c = scr_readw((u16 *) vc->vc_pos);
attribute = get_attribute(info, c);
src = ops->fontbuffer + ((c & charmask) * (w * vc->vc_font.width));
--- b/drivers/video/fbdev/core/fbcon_ud.c
+++ a/drivers/video/fbdev/core/fbcon_ud.c
@@ -249,7 +249,7 @@
}
static void ud_cursor(struct vc_data *vc, struct fb_info *info, int mode,
+ int softback_lines, int fg, int bg)
- int fg, int bg)
{
struct fb_cursor cursor;
struct fbcon_ops *ops = info->fbcon_par;
@@ -267,6 +267,15 @@
cursor.set = 0;
+ if (softback_lines) {
+ if (y + softback_lines >= vc->vc_rows) {
+ mode = CM_ERASE;
+ ops->cursor_flash = 0;
+ return;
+ } else
+ y += softback_lines;
+ }
+
c = scr_readw((u16 *) vc->vc_pos);
attribute = get_attribute(info, c);
src = ops->fontbuffer + ((c & charmask) * (w * vc->vc_font.height));
--- b/drivers/video/fbdev/core/tileblit.c
+++ a/drivers/video/fbdev/core/tileblit.c
@@ -80,7 +80,7 @@
}
static void tile_cursor(struct vc_data *vc, struct fb_info *info, int mode,
+ int softback_lines, int fg, int bg)
- int fg, int bg)
{
struct fb_tilecursor cursor;
int use_sw = (vc->vc_cursor_type & 0x10);

View File

@ -0,0 +1,67 @@
From 27061f0b322a585c30db111719f89c23c15a88b4 Mon Sep 17 00:00:00 2001
From: Sathish Narasimman <nsathish41@gmail.com>
Date: Thu, 29 Oct 2020 13:18:21 +0530
Subject: Bluetooth: Fix: LL PRivacy BLE device fails to connect
When adding device to white list the device is added to resolving list
also. It has to be added only when HCI_ENABLE_LL_PRIVACY flag is set.
HCI_ENABLE_LL_PRIVACY flag has to be tested before adding/deleting devices
to resolving list. use_ll_privacy macro is used only to check if controller
supports LL_Privacy.
https://bugzilla.kernel.org/show_bug.cgi?id=209745
Fixes: 0eee35bdfa3b ("Bluetooth: Update resolving list when updating whitelist")
Signed-off-by: Sathish Narasimman <sathish.narasimman@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
net/bluetooth/hci_request.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index e0269192f2e5..a565c91b8599 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -698,7 +698,8 @@ static void del_from_white_list(struct hci_request *req, bdaddr_t *bdaddr,
cp.bdaddr_type);
hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, sizeof(cp), &cp);
- if (use_ll_privacy(req->hdev)) {
+ if (use_ll_privacy(req->hdev) &&
+ hci_dev_test_flag(req->hdev, HCI_ENABLE_LL_PRIVACY)) {
struct smp_irk *irk;
irk = hci_find_irk_by_addr(req->hdev, bdaddr, bdaddr_type);
@@ -732,7 +733,8 @@ static int add_to_white_list(struct hci_request *req,
return -1;
/* White list can not be used with RPAs */
- if (!allow_rpa && !use_ll_privacy(hdev) &&
+ if (!allow_rpa &&
+ !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
hci_find_irk_by_addr(hdev, &params->addr, params->addr_type)) {
return -1;
}
@@ -750,7 +752,8 @@ static int add_to_white_list(struct hci_request *req,
cp.bdaddr_type);
hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp);
- if (use_ll_privacy(hdev)) {
+ if (use_ll_privacy(hdev) &&
+ hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY)) {
struct smp_irk *irk;
irk = hci_find_irk_by_addr(hdev, &params->addr,
@@ -812,7 +815,8 @@ static u8 update_white_list(struct hci_request *req)
}
/* White list can not be used with RPAs */
- if (!allow_rpa && !use_ll_privacy(hdev) &&
+ if (!allow_rpa &&
+ !hci_dev_test_flag(hdev, HCI_ENABLE_LL_PRIVACY) &&
hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) {
return 0x00;
}
--
cgit v1.2.3-1-gf6bb5

View File

@ -0,0 +1,28 @@
From 9d662fb865ae496a7eb51d2bdddefd2427d9a30e Mon Sep 17 00:00:00 2001
From: Bhushan Shah <bshah@kde.org>
Date: Fri, 9 Apr 2021 16:25:25 +0530
Subject: [PATCH 2/5] Revert "usb: quirks: Add USB_QUIRK_RESET for Quectel
EG25G Modem"
Reverts 8cc2a406ecc711f5
---
drivers/usb/core/quirks.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index cb556617aa34f..6ade3daf78584 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -501,9 +501,6 @@ static const struct usb_device_id usb_quirk_list[] = {
/* INTEL VALUE SSD */
{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
- /* Quectel EG25G Modem */
- { USB_DEVICE(0x2c7c, 0x0125), .driver_info = USB_QUIRK_RESET },
-
{ } /* terminating entry must be last */
};
--
2.31.1

View File

@ -0,0 +1,669 @@
diff --git a/MAINTAINERS b/MAINTAINERS
index b5633b56391e..5c237445761e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2712,6 +2712,7 @@ S: Maintained
F: drivers/video/fbdev/core/bootsplash*.*
F: drivers/video/fbdev/core/dummycon.c
F: include/linux/bootsplash.h
+F: include/uapi/linux/bootsplash_file.h
BPF (Safe dynamic programs and tools)
M: Alexei Starovoitov <ast@kernel.org>
diff --git a/drivers/video/fbdev/core/Makefile b/drivers/video/fbdev/core/Makefile
index 66895321928e..6a8d1bab8a01 100644
--- a/drivers/video/fbdev/core/Makefile
+++ b/drivers/video/fbdev/core/Makefile
@@ -31,4 +31,4 @@ obj-$(CONFIG_FB_SVGALIB) += svgalib.o
obj-$(CONFIG_FB_DDC) += fb_ddc.o
obj-$(CONFIG_BOOTSPLASH) += bootsplash.o bootsplash_render.o \
- dummyblit.o
+ bootsplash_load.o dummyblit.o
diff --git a/drivers/video/fbdev/core/bootsplash.c b/drivers/video/fbdev/core/bootsplash.c
index e449755af268..843c5400fefc 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -32,6 +32,7 @@
#include <linux/workqueue.h>
#include "bootsplash_internal.h"
+#include "uapi/linux/bootsplash_file.h"
/*
@@ -102,10 +103,17 @@ static bool is_fb_compatible(const struct fb_info *info)
*/
void bootsplash_render_full(struct fb_info *info)
{
+ mutex_lock(&splash_state.data_lock);
+
if (!is_fb_compatible(info))
- return;
+ goto out;
+
+ bootsplash_do_render_background(info, splash_state.file);
+
+ bootsplash_do_render_pictures(info, splash_state.file);
- bootsplash_do_render_background(info);
+out:
+ mutex_unlock(&splash_state.data_lock);
}
@@ -116,6 +124,7 @@ bool bootsplash_would_render_now(void)
{
return !oops_in_progress
&& !console_blanked
+ && splash_state.file
&& bootsplash_is_enabled();
}
@@ -252,6 +261,7 @@ static struct platform_driver splash_driver = {
void bootsplash_init(void)
{
int ret;
+ struct splash_file_priv *fp;
/* Initialized already? */
if (splash_state.splash_device)
@@ -280,8 +290,26 @@ void bootsplash_init(void)
}
+ mutex_init(&splash_state.data_lock);
+ set_bit(0, &splash_state.enabled);
+
INIT_WORK(&splash_state.work_redraw_vc, splash_callback_redraw_vc);
+
+ if (!splash_state.bootfile || !strlen(splash_state.bootfile))
+ return;
+
+ fp = bootsplash_load_firmware(&splash_state.splash_device->dev,
+ splash_state.bootfile);
+
+ if (!fp)
+ goto err;
+
+ mutex_lock(&splash_state.data_lock);
+ splash_state.splash_fb = NULL;
+ splash_state.file = fp;
+ mutex_unlock(&splash_state.data_lock);
+
return;
err_device:
@@ -292,3 +320,7 @@ void bootsplash_init(void)
err:
pr_err("Failed to initialize.\n");
}
+
+
+module_param_named(bootfile, splash_state.bootfile, charp, 0444);
+MODULE_PARM_DESC(bootfile, "Bootsplash file to load on boot");
diff --git a/drivers/video/fbdev/core/bootsplash_internal.h b/drivers/video/fbdev/core/bootsplash_internal.h
index b11da5cb90bf..71e2a27ac0b8 100644
--- a/drivers/video/fbdev/core/bootsplash_internal.h
+++ b/drivers/video/fbdev/core/bootsplash_internal.h
@@ -15,15 +15,43 @@
#include <linux/types.h>
#include <linux/fb.h>
+#include <linux/firmware.h>
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
+#include "uapi/linux/bootsplash_file.h"
+
/*
* Runtime types
*/
+struct splash_blob_priv {
+ struct splash_blob_header *blob_header;
+ const void *data;
+};
+
+
+struct splash_pic_priv {
+ const struct splash_pic_header *pic_header;
+
+ struct splash_blob_priv *blobs;
+ u16 blobs_loaded;
+};
+
+
+struct splash_file_priv {
+ const struct firmware *fw;
+ const struct splash_file_header *header;
+
+ struct splash_pic_priv *pics;
+};
+
+
struct splash_priv {
+ /* Bootup and runtime state */
+ char *bootfile;
+
/*
* Enabled/disabled state, to be used with atomic bit operations.
* Bit 0: 0 = Splash hidden
@@ -43,6 +71,13 @@ struct splash_priv {
struct platform_device *splash_device;
struct work_struct work_redraw_vc;
+
+ /* Splash data structures including lock for everything below */
+ struct mutex data_lock;
+
+ struct fb_info *splash_fb;
+
+ struct splash_file_priv *file;
};
@@ -50,6 +85,14 @@ struct splash_priv {
/*
* Rendering functions
*/
-void bootsplash_do_render_background(struct fb_info *info);
+void bootsplash_do_render_background(struct fb_info *info,
+ const struct splash_file_priv *fp);
+void bootsplash_do_render_pictures(struct fb_info *info,
+ const struct splash_file_priv *fp);
+
+
+void bootsplash_free_file(struct splash_file_priv *fp);
+struct splash_file_priv *bootsplash_load_firmware(struct device *device,
+ const char *path);
#endif
diff --git a/drivers/video/fbdev/core/bootsplash_load.c b/drivers/video/fbdev/core/bootsplash_load.c
new file mode 100644
index 000000000000..fd807571ab7d
--- /dev/null
+++ b/drivers/video/fbdev/core/bootsplash_load.c
@@ -0,0 +1,225 @@
+/*
+ * Kernel based bootsplash.
+ *
+ * (Loading and freeing functions)
+ *
+ * Authors:
+ * Max Staudt <mstaudt@suse.de>
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#define pr_fmt(fmt) "bootsplash: " fmt
+
+
+#include <linux/bootsplash.h>
+#include <linux/fb.h>
+#include <linux/firmware.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+
+#include "bootsplash_internal.h"
+#include "uapi/linux/bootsplash_file.h"
+
+
+
+
+/*
+ * Free all vmalloc()'d resources describing a splash file.
+ */
+void bootsplash_free_file(struct splash_file_priv *fp)
+{
+ if (!fp)
+ return;
+
+ if (fp->pics) {
+ unsigned int i;
+
+ for (i = 0; i < fp->header->num_pics; i++) {
+ struct splash_pic_priv *pp = &fp->pics[i];
+
+ if (pp->blobs)
+ vfree(pp->blobs);
+ }
+
+ vfree(fp->pics);
+ }
+
+ release_firmware(fp->fw);
+ vfree(fp);
+}
+
+
+
+
+/*
+ * Load a splash screen from a "firmware" file.
+ *
+ * Parsing, and sanity checks.
+ */
+#ifdef __BIG_ENDIAN
+ #define BOOTSPLASH_MAGIC BOOTSPLASH_MAGIC_BE
+#else
+ #define BOOTSPLASH_MAGIC BOOTSPLASH_MAGIC_LE
+#endif
+
+struct splash_file_priv *bootsplash_load_firmware(struct device *device,
+ const char *path)
+{
+ const struct firmware *fw;
+ struct splash_file_priv *fp;
+ unsigned int i;
+ const u8 *walker;
+
+ if (request_firmware(&fw, path, device))
+ return NULL;
+
+ if (fw->size < sizeof(struct splash_file_header)
+ || memcmp(fw->data, BOOTSPLASH_MAGIC, sizeof(fp->header->id))) {
+ pr_err("Not a bootsplash file.\n");
+
+ release_firmware(fw);
+ return NULL;
+ }
+
+ fp = vzalloc(sizeof(struct splash_file_priv));
+ if (!fp) {
+ release_firmware(fw);
+ return NULL;
+ }
+
+ pr_info("Loading splash file (%li bytes)\n", fw->size);
+
+ fp->fw = fw;
+ fp->header = (struct splash_file_header *)fw->data;
+
+ /* Sanity checks */
+ if (fp->header->version != BOOTSPLASH_VERSION) {
+ pr_err("Loaded v%d file, but we only support version %d\n",
+ fp->header->version,
+ BOOTSPLASH_VERSION);
+
+ goto err;
+ }
+
+ if (fw->size < sizeof(struct splash_file_header)
+ + fp->header->num_pics
+ * sizeof(struct splash_pic_header)
+ + fp->header->num_blobs
+ * sizeof(struct splash_blob_header)) {
+ pr_err("File incomplete.\n");
+
+ goto err;
+ }
+
+ /* Read picture headers */
+ if (fp->header->num_pics) {
+ fp->pics = vzalloc(fp->header->num_pics
+ * sizeof(struct splash_pic_priv));
+ if (!fp->pics)
+ goto err;
+ }
+
+ walker = fw->data + sizeof(struct splash_file_header);
+ for (i = 0; i < fp->header->num_pics; i++) {
+ struct splash_pic_priv *pp = &fp->pics[i];
+ struct splash_pic_header *ph = (void *)walker;
+
+ pr_debug("Picture %u: Size %ux%u\n", i, ph->width, ph->height);
+
+ if (ph->num_blobs < 1) {
+ pr_err("Picture %u: Zero blobs? Aborting load.\n", i);
+ goto err;
+ }
+
+ pp->pic_header = ph;
+ pp->blobs = vzalloc(ph->num_blobs
+ * sizeof(struct splash_blob_priv));
+ if (!pp->blobs)
+ goto err;
+
+ walker += sizeof(struct splash_pic_header);
+ }
+
+ /* Read blob headers */
+ for (i = 0; i < fp->header->num_blobs; i++) {
+ struct splash_blob_header *bh = (void *)walker;
+ struct splash_pic_priv *pp;
+
+ if (walker + sizeof(struct splash_blob_header)
+ > fw->data + fw->size)
+ goto err;
+
+ walker += sizeof(struct splash_blob_header);
+
+ if (walker + bh->length > fw->data + fw->size)
+ goto err;
+
+ if (bh->picture_id >= fp->header->num_pics)
+ goto nextblob;
+
+ pp = &fp->pics[bh->picture_id];
+
+ pr_debug("Blob %u, pic %u, blobs_loaded %u, num_blobs %u.\n",
+ i, bh->picture_id,
+ pp->blobs_loaded, pp->pic_header->num_blobs);
+
+ if (pp->blobs_loaded >= pp->pic_header->num_blobs)
+ goto nextblob;
+
+ switch (bh->type) {
+ case 0:
+ /* Raw 24-bit packed pixels */
+ if (bh->length != pp->pic_header->width
+ * pp->pic_header->height * 3) {
+ pr_err("Blob %u, type 1: Length doesn't match picture.\n",
+ i);
+
+ goto err;
+ }
+ break;
+ default:
+ pr_warn("Blob %u, unknown type %u.\n", i, bh->type);
+ goto nextblob;
+ }
+
+ pp->blobs[pp->blobs_loaded].blob_header = bh;
+ pp->blobs[pp->blobs_loaded].data = walker;
+ pp->blobs_loaded++;
+
+nextblob:
+ walker += bh->length;
+ if (bh->length % 16)
+ walker += 16 - (bh->length % 16);
+ }
+
+ if (walker != fw->data + fw->size)
+ pr_warn("Trailing data in splash file.\n");
+
+ /* Walk over pictures and ensure all blob slots are filled */
+ for (i = 0; i < fp->header->num_pics; i++) {
+ struct splash_pic_priv *pp = &fp->pics[i];
+
+ if (pp->blobs_loaded != pp->pic_header->num_blobs) {
+ pr_err("Picture %u doesn't have all blob slots filled.\n",
+ i);
+
+ goto err;
+ }
+ }
+
+ pr_info("Loaded (%ld bytes, %u pics, %u blobs).\n",
+ fw->size,
+ fp->header->num_pics,
+ fp->header->num_blobs);
+
+ return fp;
+
+
+err:
+ bootsplash_free_file(fp);
+ return NULL;
+}
diff --git a/drivers/video/fbdev/core/bootsplash_render.c b/drivers/video/fbdev/core/bootsplash_render.c
index 4d7e0117f653..2ae36949d0e3 100644
--- a/drivers/video/fbdev/core/bootsplash_render.c
+++ b/drivers/video/fbdev/core/bootsplash_render.c
@@ -19,6 +19,7 @@
#include <linux/types.h>
#include "bootsplash_internal.h"
+#include "uapi/linux/bootsplash_file.h"
@@ -70,16 +71,69 @@ static inline u32 pack_pixel(const struct fb_var_screeninfo *dst_var,
}
-void bootsplash_do_render_background(struct fb_info *info)
+/*
+ * Copy from source and blend into the destination picture.
+ * Currently assumes that the source picture is 24bpp.
+ * Currently assumes that the destination is <= 32bpp.
+ */
+static int splash_convert_to_fb(u8 *dst,
+ const struct fb_var_screeninfo *dst_var,
+ unsigned int dst_stride,
+ unsigned int dst_xoff,
+ unsigned int dst_yoff,
+ const u8 *src,
+ unsigned int src_width,
+ unsigned int src_height)
+{
+ unsigned int x, y;
+ unsigned int src_stride = 3 * src_width; /* Assume 24bpp packed */
+ u32 dst_octpp = dst_var->bits_per_pixel / 8;
+
+ dst_xoff += dst_var->xoffset;
+ dst_yoff += dst_var->yoffset;
+
+ /* Copy with stride and pixel size adjustment */
+ for (y = 0;
+ y < src_height && y + dst_yoff < dst_var->yres_virtual;
+ y++) {
+ const u8 *srcline = src + (y * src_stride);
+ u8 *dstline = dst + ((y + dst_yoff) * dst_stride)
+ + (dst_xoff * dst_octpp);
+
+ for (x = 0;
+ x < src_width && x + dst_xoff < dst_var->xres_virtual;
+ x++) {
+ u8 red, green, blue;
+ u32 dstpix;
+
+ /* Read pixel */
+ red = *srcline++;
+ green = *srcline++;
+ blue = *srcline++;
+
+ /* Write pixel */
+ dstpix = pack_pixel(dst_var, red, green, blue);
+ memcpy(dstline, &dstpix, dst_octpp);
+
+ dstline += dst_octpp;
+ }
+ }
+
+ return 0;
+}
+
+
+void bootsplash_do_render_background(struct fb_info *info,
+ const struct splash_file_priv *fp)
{
unsigned int x, y;
u32 dstpix;
u32 dst_octpp = info->var.bits_per_pixel / 8;
dstpix = pack_pixel(&info->var,
- 0,
- 0,
- 0);
+ fp->header->bg_red,
+ fp->header->bg_green,
+ fp->header->bg_blue);
for (y = 0; y < info->var.yres_virtual; y++) {
u8 *dstline = info->screen_buffer + (y * info->fix.line_length);
@@ -91,3 +145,44 @@ void bootsplash_do_render_background(struct fb_info *info)
}
}
}
+
+
+void bootsplash_do_render_pictures(struct fb_info *info,
+ const struct splash_file_priv *fp)
+{
+ unsigned int i;
+
+ for (i = 0; i < fp->header->num_pics; i++) {
+ struct splash_blob_priv *bp;
+ struct splash_pic_priv *pp = &fp->pics[i];
+ long dst_xoff, dst_yoff;
+
+ if (pp->blobs_loaded < 1)
+ continue;
+
+ bp = &pp->blobs[0];
+
+ if (!bp || bp->blob_header->type != 0)
+ continue;
+
+ dst_xoff = (info->var.xres - pp->pic_header->width) / 2;
+ dst_yoff = (info->var.yres - pp->pic_header->height) / 2;
+
+ if (dst_xoff < 0
+ || dst_yoff < 0
+ || dst_xoff + pp->pic_header->width > info->var.xres
+ || dst_yoff + pp->pic_header->height > info->var.yres) {
+ pr_info_once("Picture %u is out of bounds at current resolution: %dx%d\n"
+ "(this will only be printed once every reboot)\n",
+ i, info->var.xres, info->var.yres);
+
+ continue;
+ }
+
+ /* Draw next splash frame */
+ splash_convert_to_fb(info->screen_buffer, &info->var,
+ info->fix.line_length, dst_xoff, dst_yoff,
+ bp->data,
+ pp->pic_header->width, pp->pic_header->height);
+ }
+}
diff --git a/include/uapi/linux/bootsplash_file.h b/include/uapi/linux/bootsplash_file.h
new file mode 100644
index 000000000000..89dc9cca8f0c
--- /dev/null
+++ b/include/uapi/linux/bootsplash_file.h
@@ -0,0 +1,118 @@
+/*
+ * Kernel based bootsplash.
+ *
+ * (File format)
+ *
+ * Authors:
+ * Max Staudt <mstaudt@suse.de>
+ *
+ * SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+ */
+
+#ifndef __BOOTSPLASH_FILE_H
+#define __BOOTSPLASH_FILE_H
+
+
+#define BOOTSPLASH_VERSION 55561
+
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+
+/*
+ * On-disk types
+ *
+ * A splash file consists of:
+ * - One single 'struct splash_file_header'
+ * - An array of 'struct splash_pic_header'
+ * - An array of raw data blocks, each padded to 16 bytes and
+ * preceded by a 'struct splash_blob_header'
+ *
+ * A single-frame splash may look like this:
+ *
+ * +--------------------+
+ * | |
+ * | splash_file_header |
+ * | -> num_blobs = 1 |
+ * | -> num_pics = 1 |
+ * | |
+ * +--------------------+
+ * | |
+ * | splash_pic_header |
+ * | |
+ * +--------------------+
+ * | |
+ * | splash_blob_header |
+ * | -> type = 0 |
+ * | -> picture_id = 0 |
+ * | |
+ * | (raw RGB data) |
+ * | (pad to 16 bytes) |
+ * | |
+ * +--------------------+
+ *
+ * All multi-byte values are stored on disk in the native format
+ * expected by the system the file will be used on.
+ */
+#define BOOTSPLASH_MAGIC_BE "Linux bootsplash"
+#define BOOTSPLASH_MAGIC_LE "hsalpstoob xuniL"
+
+struct splash_file_header {
+ uint8_t id[16]; /* "Linux bootsplash" (no trailing NUL) */
+
+ /* Splash file format version to avoid clashes */
+ uint16_t version;
+
+ /* The background color */
+ uint8_t bg_red;
+ uint8_t bg_green;
+ uint8_t bg_blue;
+ uint8_t bg_reserved;
+
+ /*
+ * Number of pic/blobs so we can allocate memory for internal
+ * structures ahead of time when reading the file
+ */
+ uint16_t num_blobs;
+ uint8_t num_pics;
+
+ uint8_t padding[103];
+} __attribute__((__packed__));
+
+
+struct splash_pic_header {
+ uint16_t width;
+ uint16_t height;
+
+ /*
+ * Number of data packages associated with this picture.
+ * Currently, the only use for more than 1 is for animations.
+ */
+ uint8_t num_blobs;
+
+ uint8_t padding[27];
+} __attribute__((__packed__));
+
+
+struct splash_blob_header {
+ /* Length of the data block in bytes. */
+ uint32_t length;
+
+ /*
+ * Type of the contents.
+ * 0 - Raw RGB data.
+ */
+ uint16_t type;
+
+ /*
+ * Picture this blob is associated with.
+ * Blobs will be added to a picture in the order they are
+ * found in the file.
+ */
+ uint8_t picture_id;
+
+ uint8_t padding[9];
+} __attribute__((__packed__));
+
+#endif

View File

@ -0,0 +1,61 @@
From 33212e529708fd480eaf9cc76579f8e7044c0505 Mon Sep 17 00:00:00 2001
From: Martijn Braam <martijn@brixit.nl>
Date: Tue, 20 Oct 2020 14:42:01 +0200
Subject: [PATCH] dts: add pinetab-dev (old display panel)
---
arch/arm64/boot/dts/allwinner/Makefile | 1 +
.../dts/allwinner/sun50i-a64-pinetab-dev.dts | 29 +++++++++++++++++++
2 files changed, 30 insertions(+)
create mode 100644 arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab-dev.dts
diff --git a/arch/arm64/boot/dts/allwinner/Makefile b/arch/arm64/boot/dts/allwinner/Makefile
index a21cfdd8924d..2936092002b5 100644
--- a/arch/arm64/boot/dts/allwinner/Makefile
+++ b/arch/arm64/boot/dts/allwinner/Makefile
@@ -15,6 +15,7 @@ dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-pinephone-1.0.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-pinephone-1.1.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-pinephone-1.2.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-pinetab.dtb
+dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-pinetab-dev.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-sopine-baseboard.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-a64-teres-i.dtb
dtb-$(CONFIG_ARCH_SUNXI) += sun50i-h5-bananapi-m2-plus.dtb
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab-dev.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab-dev.dts
new file mode 100644
index 000000000000..1e287f2fb9f3
--- /dev/null
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab-dev.dts
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
+/*
+ * Copyright (C) 2019 Icenowy Zheng <icenowy@aosc.xyz>
+ *
+ */
+
+/dts-v1/;
+
+#include "sun50i-a64-pinetab.dts"
+
+/ {
+ model = "PineTab";
+ compatible = "pine64,pinetab", "allwinner,sun50i-a64";
+};
+
+&dsi {
+ vcc-dsi-supply = <&reg_dldo1>;
+ status = "okay";
+
+ panel@0 {
+ compatible = "feixin,k101-im2ba02";
+ reg = <0>;
+ avdd-supply = <&reg_dc1sw>;
+ dvdd-supply = <&reg_dc1sw>;
+ cvdd-supply = <&reg_ldo_io1>;
+ reset-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* PD24 */
+ backlight = <&backlight>;
+ };
+};
--
2.25.4

View File

@ -0,0 +1,31 @@
--- b/drivers/video/fbdev/core/fbcon.c
+++ a/drivers/video/fbdev/core/fbcon.c
@@ -163,6 +163,8 @@
#define advance_row(p, delta) (unsigned short *)((unsigned long)(p) + (delta) * vc->vc_size_row)
+static int fbcon_set_origin(struct vc_data *);
+
static int fbcon_cursor_noblink;
#define divides(a, b) ((!(a) || (b)%(a)) ? 0 : 1)
@@ -2633,6 +2635,11 @@
}
}
+static int fbcon_set_origin(struct vc_data *vc)
+{
+ return 0;
+}
+
void fbcon_suspended(struct fb_info *info)
{
struct vc_data *vc = NULL;
@@ -3103,6 +3110,7 @@
.con_font_default = fbcon_set_def_font,
.con_font_copy = fbcon_copy_font,
.con_set_palette = fbcon_set_palette,
+ .con_set_origin = fbcon_set_origin,
.con_invert_region = fbcon_invert_region,
.con_screen_pos = fbcon_screen_pos,
.con_getxy = fbcon_getxy,

View File

@ -0,0 +1,497 @@
--- b/drivers/video/fbdev/core/fbcon.c
+++ a/drivers/video/fbdev/core/fbcon.c
@@ -122,6 +122,12 @@
/* logo_shown is an index to vc_cons when >= 0; otherwise follows FBCON_LOGO
enums. */
static int logo_shown = FBCON_LOGO_CANSHOW;
+/* Software scrollback */
+static int fbcon_softback_size = 32768;
+static unsigned long softback_buf, softback_curr;
+static unsigned long softback_in;
+static unsigned long softback_top, softback_end;
+static int softback_lines;
/* console mappings */
static int first_fb_vc;
static int last_fb_vc = MAX_NR_CONSOLES - 1;
@@ -161,6 +167,8 @@
static const struct consw fb_con;
+#define CM_SOFTBACK (8)
+
#define advance_row(p, delta) (unsigned short *)((unsigned long)(p) + (delta) * vc->vc_size_row)
static int fbcon_set_origin(struct vc_data *);
@@ -365,6 +373,18 @@
return color;
}
+static void fbcon_update_softback(struct vc_data *vc)
+{
+ int l = fbcon_softback_size / vc->vc_size_row;
+
+ if (l > 5)
+ softback_end = softback_buf + l * vc->vc_size_row;
+ else
+ /* Smaller scrollback makes no sense, and 0 would screw
+ the operation totally */
+ softback_top = 0;
+}
+
static void fb_flashcursor(struct work_struct *work)
{
struct fb_info *info = container_of(work, struct fb_info, queue);
@@ -394,7 +414,7 @@
c = scr_readw((u16 *) vc->vc_pos);
mode = (!ops->cursor_flash || ops->cursor_state.enable) ?
CM_ERASE : CM_DRAW;
+ ops->cursor(vc, info, mode, softback_lines, get_color(vc, info, c, 1),
- ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1),
get_color(vc, info, c, 0));
console_unlock();
}
@@ -451,7 +471,13 @@
}
if (!strncmp(options, "scrollback:", 11)) {
+ options += 11;
+ if (*options) {
+ fbcon_softback_size = simple_strtoul(options, &options, 0);
+ if (*options == 'k' || *options == 'K') {
+ fbcon_softback_size *= 1024;
+ }
+ }
- pr_warn("Ignoring scrollback size option\n");
continue;
}
@@ -996,6 +1022,31 @@
set_blitting_type(vc, info);
+ if (info->fix.type != FB_TYPE_TEXT) {
+ if (fbcon_softback_size) {
+ if (!softback_buf) {
+ softback_buf =
+ (unsigned long)
+ kvmalloc(fbcon_softback_size,
+ GFP_KERNEL);
+ if (!softback_buf) {
+ fbcon_softback_size = 0;
+ softback_top = 0;
+ }
+ }
+ } else {
+ if (softback_buf) {
+ kvfree((void *) softback_buf);
+ softback_buf = 0;
+ softback_top = 0;
+ }
+ }
+ if (softback_buf)
+ softback_in = softback_top = softback_curr =
+ softback_buf;
+ softback_lines = 0;
+ }
+
/* Setup default font */
if (!p->fontdata && !vc->vc_font.data) {
if (!fontname[0] || !(font = find_font(fontname)))
@@ -1169,6 +1220,9 @@
if (logo)
fbcon_prepare_logo(vc, info, cols, rows, new_cols, new_rows);
+ if (vc == svc && softback_buf)
+ fbcon_update_softback(vc);
+
if (ops->rotate_font && ops->rotate_font(info, vc)) {
ops->rotate = FB_ROTATE_UR;
set_blitting_type(vc, info);
@@ -1331,6 +1385,7 @@
{
struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
struct fbcon_ops *ops = info->fbcon_par;
+ int y;
int c = scr_readw((u16 *) vc->vc_pos);
ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms);
@@ -1344,8 +1399,16 @@
fbcon_add_cursor_timer(info);
ops->cursor_flash = (mode == CM_ERASE) ? 0 : 1;
+ if (mode & CM_SOFTBACK) {
+ mode &= ~CM_SOFTBACK;
+ y = softback_lines;
+ } else {
+ if (softback_lines)
+ fbcon_set_origin(vc);
+ y = 0;
+ }
+ ops->cursor(vc, info, mode, y, get_color(vc, info, c, 1),
- ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1),
get_color(vc, info, c, 0));
}
@@ -1416,6 +1479,8 @@
if (con_is_visible(vc)) {
update_screen(vc);
+ if (softback_buf)
+ fbcon_update_softback(vc);
}
}
@@ -1553,6 +1618,99 @@
scrollback_current = 0;
}
+static void fbcon_redraw_softback(struct vc_data *vc, struct fbcon_display *p,
+ long delta)
+{
+ int count = vc->vc_rows;
+ unsigned short *d, *s;
+ unsigned long n;
+ int line = 0;
+
+ d = (u16 *) softback_curr;
+ if (d == (u16 *) softback_in)
+ d = (u16 *) vc->vc_origin;
+ n = softback_curr + delta * vc->vc_size_row;
+ softback_lines -= delta;
+ if (delta < 0) {
+ if (softback_curr < softback_top && n < softback_buf) {
+ n += softback_end - softback_buf;
+ if (n < softback_top) {
+ softback_lines -=
+ (softback_top - n) / vc->vc_size_row;
+ n = softback_top;
+ }
+ } else if (softback_curr >= softback_top
+ && n < softback_top) {
+ softback_lines -=
+ (softback_top - n) / vc->vc_size_row;
+ n = softback_top;
+ }
+ } else {
+ if (softback_curr > softback_in && n >= softback_end) {
+ n += softback_buf - softback_end;
+ if (n > softback_in) {
+ n = softback_in;
+ softback_lines = 0;
+ }
+ } else if (softback_curr <= softback_in && n > softback_in) {
+ n = softback_in;
+ softback_lines = 0;
+ }
+ }
+ if (n == softback_curr)
+ return;
+ softback_curr = n;
+ s = (u16 *) softback_curr;
+ if (s == (u16 *) softback_in)
+ s = (u16 *) vc->vc_origin;
+ while (count--) {
+ unsigned short *start;
+ unsigned short *le;
+ unsigned short c;
+ int x = 0;
+ unsigned short attr = 1;
+
+ start = s;
+ le = advance_row(s, 1);
+ do {
+ c = scr_readw(s);
+ if (attr != (c & 0xff00)) {
+ attr = c & 0xff00;
+ if (s > start) {
+ fbcon_putcs(vc, start, s - start,
+ line, x);
+ x += s - start;
+ start = s;
+ }
+ }
+ if (c == scr_readw(d)) {
+ if (s > start) {
+ fbcon_putcs(vc, start, s - start,
+ line, x);
+ x += s - start + 1;
+ start = s + 1;
+ } else {
+ x++;
+ start++;
+ }
+ }
+ s++;
+ d++;
+ } while (s < le);
+ if (s > start)
+ fbcon_putcs(vc, start, s - start, line, x);
+ line++;
+ if (d == (u16 *) softback_end)
+ d = (u16 *) softback_buf;
+ if (d == (u16 *) softback_in)
+ d = (u16 *) vc->vc_origin;
+ if (s == (u16 *) softback_end)
+ s = (u16 *) softback_buf;
+ if (s == (u16 *) softback_in)
+ s = (u16 *) vc->vc_origin;
+ }
+}
+
static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p,
int line, int count, int dy)
{
@@ -1692,6 +1850,31 @@
}
}
+static inline void fbcon_softback_note(struct vc_data *vc, int t,
+ int count)
+{
+ unsigned short *p;
+
+ if (vc->vc_num != fg_console)
+ return;
+ p = (unsigned short *) (vc->vc_origin + t * vc->vc_size_row);
+
+ while (count) {
+ scr_memcpyw((u16 *) softback_in, p, vc->vc_size_row);
+ count--;
+ p = advance_row(p, 1);
+ softback_in += vc->vc_size_row;
+ if (softback_in == softback_end)
+ softback_in = softback_buf;
+ if (softback_in == softback_top) {
+ softback_top += vc->vc_size_row;
+ if (softback_top == softback_end)
+ softback_top = softback_buf;
+ }
+ }
+ softback_curr = softback_in;
+}
+
static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
enum con_scroll dir, unsigned int count)
{
@@ -1714,6 +1897,8 @@
case SM_UP:
if (count > vc->vc_rows) /* Maximum realistic size */
count = vc->vc_rows;
+ if (softback_top)
+ fbcon_softback_note(vc, t, count);
if (logo_shown >= 0)
goto redraw_up;
switch (p->scrollmode) {
@@ -2084,6 +2269,14 @@
info = registered_fb[con2fb_map[vc->vc_num]];
ops = info->fbcon_par;
+ if (softback_top) {
+ if (softback_lines)
+ fbcon_set_origin(vc);
+ softback_top = softback_curr = softback_in = softback_buf;
+ softback_lines = 0;
+ fbcon_update_softback(vc);
+ }
+
if (logo_shown >= 0) {
struct vc_data *conp2 = vc_cons[logo_shown].d;
@@ -2407,6 +2600,9 @@
int cnt;
char *old_data = NULL;
+ if (con_is_visible(vc) && softback_lines)
+ fbcon_set_origin(vc);
+
resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
if (p->userfont)
old_data = vc->vc_font.data;
@@ -2432,6 +2628,8 @@
cols /= w;
rows /= h;
vc_resize(vc, cols, rows);
+ if (con_is_visible(vc) && softback_buf)
+ fbcon_update_softback(vc);
} else if (con_is_visible(vc)
&& vc->vc_mode == KD_TEXT) {
fbcon_clear_margins(vc, 0);
@@ -2590,7 +2788,19 @@
static u16 *fbcon_screen_pos(struct vc_data *vc, int offset)
{
+ unsigned long p;
+ int line;
+
+ if (vc->vc_num != fg_console || !softback_lines)
+ return (u16 *) (vc->vc_origin + offset);
+ line = offset / vc->vc_size_row;
+ if (line >= softback_lines)
+ return (u16 *) (vc->vc_origin + offset -
+ softback_lines * vc->vc_size_row);
+ p = softback_curr + offset;
+ if (p >= softback_end)
+ p += softback_buf - softback_end;
+ return (u16 *) p;
- return (u16 *) (vc->vc_origin + offset);
}
static unsigned long fbcon_getxy(struct vc_data *vc, unsigned long pos,
@@ -2604,7 +2814,22 @@
x = offset % vc->vc_cols;
y = offset / vc->vc_cols;
+ if (vc->vc_num == fg_console)
+ y += softback_lines;
ret = pos + (vc->vc_cols - x) * 2;
+ } else if (vc->vc_num == fg_console && softback_lines) {
+ unsigned long offset = pos - softback_curr;
+
+ if (pos < softback_curr)
+ offset += softback_end - softback_buf;
+ offset /= 2;
+ x = offset % vc->vc_cols;
+ y = offset / vc->vc_cols;
+ ret = pos + (vc->vc_cols - x) * 2;
+ if (ret == softback_end)
+ ret = softback_buf;
+ if (ret == softback_in)
+ ret = vc->vc_origin;
} else {
/* Should not happen */
x = y = 0;
@@ -2632,11 +2857,106 @@
a = ((a) & 0x88ff) | (((a) & 0x7000) >> 4) |
(((a) & 0x0700) << 4);
scr_writew(a, p++);
+ if (p == (u16 *) softback_end)
+ p = (u16 *) softback_buf;
+ if (p == (u16 *) softback_in)
+ p = (u16 *) vc->vc_origin;
+ }
+}
+
+static void fbcon_scrolldelta(struct vc_data *vc, int lines)
+{
+ struct fb_info *info = registered_fb[con2fb_map[fg_console]];
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fbcon_display *disp = &fb_display[fg_console];
+ int offset, limit, scrollback_old;
+
+ if (softback_top) {
+ if (vc->vc_num != fg_console)
+ return;
+ if (vc->vc_mode != KD_TEXT || !lines)
+ return;
+ if (logo_shown >= 0) {
+ struct vc_data *conp2 = vc_cons[logo_shown].d;
+
+ if (conp2->vc_top == logo_lines
+ && conp2->vc_bottom == conp2->vc_rows)
+ conp2->vc_top = 0;
+ if (logo_shown == vc->vc_num) {
+ unsigned long p, q;
+ int i;
+
+ p = softback_in;
+ q = vc->vc_origin +
+ logo_lines * vc->vc_size_row;
+ for (i = 0; i < logo_lines; i++) {
+ if (p == softback_top)
+ break;
+ if (p == softback_buf)
+ p = softback_end;
+ p -= vc->vc_size_row;
+ q -= vc->vc_size_row;
+ scr_memcpyw((u16 *) q, (u16 *) p,
+ vc->vc_size_row);
+ }
+ softback_in = softback_curr = p;
+ update_region(vc, vc->vc_origin,
+ logo_lines * vc->vc_cols);
+ }
+ logo_shown = FBCON_LOGO_CANSHOW;
+ }
+ fbcon_cursor(vc, CM_ERASE | CM_SOFTBACK);
+ fbcon_redraw_softback(vc, disp, lines);
+ fbcon_cursor(vc, CM_DRAW | CM_SOFTBACK);
+ return;
}
+
+ if (!scrollback_phys_max)
+ return;
+
+ scrollback_old = scrollback_current;
+ scrollback_current -= lines;
+ if (scrollback_current < 0)
+ scrollback_current = 0;
+ else if (scrollback_current > scrollback_max)
+ scrollback_current = scrollback_max;
+ if (scrollback_current == scrollback_old)
+ return;
+
+ if (fbcon_is_inactive(vc, info))
+ return;
+
+ fbcon_cursor(vc, CM_ERASE);
+
+ offset = disp->yscroll - scrollback_current;
+ limit = disp->vrows;
+ switch (disp->scrollmode) {
+ case SCROLL_WRAP_MOVE:
+ info->var.vmode |= FB_VMODE_YWRAP;
+ break;
+ case SCROLL_PAN_MOVE:
+ case SCROLL_PAN_REDRAW:
+ limit -= vc->vc_rows;
+ info->var.vmode &= ~FB_VMODE_YWRAP;
+ break;
+ }
+ if (offset < 0)
+ offset += limit;
+ else if (offset >= limit)
+ offset -= limit;
+
+ ops->var.xoffset = 0;
+ ops->var.yoffset = offset * vc->vc_font.height;
+ ops->update_start(info);
+
+ if (!scrollback_current)
+ fbcon_cursor(vc, CM_DRAW);
}
static int fbcon_set_origin(struct vc_data *vc)
{
+ if (softback_lines)
+ fbcon_scrolldelta(vc, softback_lines);
return 0;
}
@@ -2700,6 +3020,8 @@
fbcon_set_palette(vc, color_table);
update_screen(vc);
+ if (softback_buf)
+ fbcon_update_softback(vc);
}
}
@@ -3110,6 +3432,7 @@
.con_font_default = fbcon_set_def_font,
.con_font_copy = fbcon_copy_font,
.con_set_palette = fbcon_set_palette,
+ .con_scrolldelta = fbcon_scrolldelta,
.con_set_origin = fbcon_set_origin,
.con_invert_region = fbcon_invert_region,
.con_screen_pos = fbcon_screen_pos,
@@ -3344,6 +3667,9 @@
}
#endif
+ kvfree((void *)softback_buf);
+ softback_buf = 0UL;
+
for_each_registered_fb(i) {
int pending = 0;

View File

@ -0,0 +1,48 @@
From 4264c74c96e7907b60ee6ed82670317d19ed7ebe Mon Sep 17 00:00:00 2001
From: Edward Vear <edwardvear@gmail.com>
Date: Tue, 27 Oct 2020 00:02:03 -0700
Subject: Bluetooth: Fix attempting to set RPA timeout when unsupported
During controller initialization, an LE Set RPA Timeout command is sent
to the controller if supported. However, the value checked to determine
if the command is supported is incorrect. Page 1921 of the Bluetooth
Core Spec v5.2 shows that bit 2 of octet 35 of the Supported_Commands
field corresponds to the LE Set RPA Timeout command, but currently
bit 6 of octet 35 is checked. This patch checks the correct value
instead.
This issue led to the error seen in the following btmon output during
initialization of an adapter (rtl8761b) and prevented initialization
from completing.
< HCI Command: LE Set Resolvable Private Address Timeout (0x08|0x002e) plen 2
Timeout: 900 seconds
> HCI Event: Command Complete (0x0e) plen 4
LE Set Resolvable Private Address Timeout (0x08|0x002e) ncmd 2
Status: Unsupported Remote Feature / Unsupported LMP Feature (0x1a)
= Close Index: 00:E0:4C:6B:E5:03
The error did not appear when running with this patch.
Signed-off-by: Edward Vear <edwardvear@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
net/bluetooth/hci_core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index be9cdf5dabe5..30a5267af490 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -763,7 +763,7 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
hci_req_add(req, HCI_OP_LE_CLEAR_RESOLV_LIST, 0, NULL);
}
- if (hdev->commands[35] & 0x40) {
+ if (hdev->commands[35] & 0x04) {
__le16 rpa_timeout = cpu_to_le16(hdev->rpa_timeout);
/* Set RPA timeout */
--
cgit v1.2.3-1-gf6bb5

View File

@ -0,0 +1,40 @@
Bluetooth: btusb: Some Qualcomm Bluetooth adapters stop working
This issue starts from linux-5.10-rc1, I reproduced this issue on my
Dell Inspiron 7447 with BT adapter 0cf3:e005, the kernel will print
out: "Bluetooth: hci0: don't support firmware rome 0x31010000", and
someone else also reported the similar issue to bugzilla #211571.
I found this is a regression introduced by 'commit b40f58b97386
("Bluetooth: btusb: Add Qualcomm Bluetooth SoC WCN6855 support"), the
patch assumed that if high ROM version is not zero, it is an adapter
on WCN6855, but many old adapters don't need to load rampatch or nvm,
and they have non-zero high ROM version.
To fix it, let the driver match the rom_version in the
qca_devices_table first, if there is no entry matched, check the
high ROM version, if it is not zero, we assume this adapter is ready
to work and no need to load rampatch and nvm like previously.
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=211571
Fixes: b40f58b97386 ("Bluetooth: btusb: Add Qualcomm Bluetooth SoC WCN6855 support")
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 03b83aa91277..32161dd40ed6 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -4069,6 +4069,13 @@ static int btusb_setup_qca(struct hci_dev *hdev)
info = &qca_devices_table[i];
}
if (!info) {
+ /* If the rom_version is not matched in the qca_devices_table
+ * and the high ROM version is not zero, we assume this chip no
+ * need to load the rampatch and nvm.
+ */
+ if (ver_rom & ~0xffffU)
+ return 0;
+
bt_dev_err(hdev, "don't support firmware rome 0x%x", ver_rom);
return -ENODEV;
}

View File

@ -0,0 +1,66 @@
diff --git a/drivers/video/fbdev/core/bootsplash.c b/drivers/video/fbdev/core/bootsplash.c
index 843c5400fefc..815b007f81ca 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -112,6 +112,8 @@ void bootsplash_render_full(struct fb_info *info)
bootsplash_do_render_pictures(info, splash_state.file);
+ bootsplash_do_render_flush(info);
+
out:
mutex_unlock(&splash_state.data_lock);
}
diff --git a/drivers/video/fbdev/core/bootsplash_internal.h b/drivers/video/fbdev/core/bootsplash_internal.h
index 71e2a27ac0b8..0acb383aa4e3 100644
--- a/drivers/video/fbdev/core/bootsplash_internal.h
+++ b/drivers/video/fbdev/core/bootsplash_internal.h
@@ -89,6 +89,7 @@ void bootsplash_do_render_background(struct fb_info *info,
const struct splash_file_priv *fp);
void bootsplash_do_render_pictures(struct fb_info *info,
const struct splash_file_priv *fp);
+void bootsplash_do_render_flush(struct fb_info *info);
void bootsplash_free_file(struct splash_file_priv *fp);
diff --git a/drivers/video/fbdev/core/bootsplash_render.c b/drivers/video/fbdev/core/bootsplash_render.c
index 2ae36949d0e3..8c09c306ff67 100644
--- a/drivers/video/fbdev/core/bootsplash_render.c
+++ b/drivers/video/fbdev/core/bootsplash_render.c
@@ -186,3 +186,36 @@ void bootsplash_do_render_pictures(struct fb_info *info,
pp->pic_header->width, pp->pic_header->height);
}
}
+
+
+void bootsplash_do_render_flush(struct fb_info *info)
+{
+ /*
+ * FB drivers using deferred_io (such as Xen) need to sync the
+ * screen after modifying its contents. When the FB is mmap()ed
+ * from userspace, this happens via a dirty pages callback, but
+ * when modifying the FB from the kernel, there is no such thing.
+ *
+ * So let's issue a fake fb_copyarea (copying the FB onto itself)
+ * to trick the FB driver into syncing the screen.
+ *
+ * A few DRM drivers' FB implementations are broken by not using
+ * deferred_io when they really should - we match on the known
+ * bad ones manually for now.
+ */
+ if (info->fbdefio
+ || !strcmp(info->fix.id, "astdrmfb")
+ || !strcmp(info->fix.id, "cirrusdrmfb")
+ || !strcmp(info->fix.id, "mgadrmfb")) {
+ struct fb_copyarea area;
+
+ area.dx = 0;
+ area.dy = 0;
+ area.width = info->var.xres;
+ area.height = info->var.yres;
+ area.sx = 0;
+ area.sy = 0;
+
+ info->fbops->fb_copyarea(info, &area);
+ }
+}

View File

@ -0,0 +1,51 @@
From 562a6c114ce736db51e41b8c06c408104b79b126 Mon Sep 17 00:00:00 2001
From: Bhushan Shah <bshah@kde.org>
Date: Wed, 14 Apr 2021 10:29:39 +0530
Subject: [PATCH 3/5] qmi_wwan: provide wrapper for reset_resume
---
drivers/net/usb/qmi_wwan.c | 21 ++++++++++++++++++++-
1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index e18ded349d840..cd6ae9696b56a 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -840,6 +840,25 @@ static int qmi_wwan_resume(struct usb_interface *intf)
return ret;
}
+static int qmi_wwan_reset_resume(struct usb_interface *intf)
+{
+ struct usbnet *dev = usb_get_intfdata(intf);
+ struct qmi_wwan_state *info = (void *)&dev->data;
+ int ret = 0;
+ bool callsub = (intf == info->control && info->subdriver &&
+ info->subdriver->reset_resume);
+
+ if (callsub)
+ ret = info->subdriver->reset_resume(intf);
+ if (ret < 0)
+ goto err;
+ ret = usbnet_resume(intf);
+ if (ret < 0 && callsub)
+ info->subdriver->suspend(intf, PMSG_SUSPEND);
+err:
+ return ret;
+}
+
static const struct driver_info qmi_wwan_info = {
.description = "WWAN/QMI device",
.flags = FLAG_WWAN | FLAG_SEND_ZLP,
@@ -1478,7 +1497,7 @@ static struct usb_driver qmi_wwan_driver = {
.disconnect = qmi_wwan_disconnect,
.suspend = qmi_wwan_suspend,
.resume = qmi_wwan_resume,
- .reset_resume = qmi_wwan_resume,
+ .reset_resume = qmi_wwan_reset_resume,
.supports_autosuspend = 1,
.disable_hub_initiated_lpm = 1,
};
--
2.31.1

View File

@ -0,0 +1,500 @@
--- b/drivers/video/fbdev/core/fbcon.c
+++ a/drivers/video/fbdev/core/fbcon.c
@@ -122,6 +122,12 @@
/* logo_shown is an index to vc_cons when >= 0; otherwise follows FBCON_LOGO
enums. */
static int logo_shown = FBCON_LOGO_CANSHOW;
+/* Software scrollback */
+static int fbcon_softback_size = 32768;
+static unsigned long softback_buf, softback_curr;
+static unsigned long softback_in;
+static unsigned long softback_top, softback_end;
+static int softback_lines;
/* console mappings */
static int first_fb_vc;
static int last_fb_vc = MAX_NR_CONSOLES - 1;
@@ -161,6 +167,8 @@
static const struct consw fb_con;
+#define CM_SOFTBACK (8)
+
#define advance_row(p, delta) (unsigned short *)((unsigned long)(p) + (delta) * vc->vc_size_row)
static int fbcon_set_origin(struct vc_data *);
@@ -365,6 +373,18 @@
return color;
}
+static void fbcon_update_softback(struct vc_data *vc)
+{
+ int l = fbcon_softback_size / vc->vc_size_row;
+
+ if (l > 5)
+ softback_end = softback_buf + l * vc->vc_size_row;
+ else
+ /* Smaller scrollback makes no sense, and 0 would screw
+ the operation totally */
+ softback_top = 0;
+}
+
static void fb_flashcursor(struct work_struct *work)
{
struct fb_info *info = container_of(work, struct fb_info, queue);
@@ -394,7 +414,7 @@
c = scr_readw((u16 *) vc->vc_pos);
mode = (!ops->cursor_flash || ops->cursor_state.enable) ?
CM_ERASE : CM_DRAW;
+ ops->cursor(vc, info, mode, softback_lines, get_color(vc, info, c, 1),
- ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1),
get_color(vc, info, c, 0));
console_unlock();
}
@@ -451,7 +471,13 @@
}
if (!strncmp(options, "scrollback:", 11)) {
+ options += 11;
+ if (*options) {
+ fbcon_softback_size = simple_strtoul(options, &options, 0);
+ if (*options == 'k' || *options == 'K') {
+ fbcon_softback_size *= 1024;
+ }
+ }
- pr_warn("Ignoring scrollback size option\n");
continue;
}
@@ -996,6 +1022,31 @@
set_blitting_type(vc, info);
+ if (info->fix.type != FB_TYPE_TEXT) {
+ if (fbcon_softback_size) {
+ if (!softback_buf) {
+ softback_buf =
+ (unsigned long)
+ kvmalloc(fbcon_softback_size,
+ GFP_KERNEL);
+ if (!softback_buf) {
+ fbcon_softback_size = 0;
+ softback_top = 0;
+ }
+ }
+ } else {
+ if (softback_buf) {
+ kvfree((void *) softback_buf);
+ softback_buf = 0;
+ softback_top = 0;
+ }
+ }
+ if (softback_buf)
+ softback_in = softback_top = softback_curr =
+ softback_buf;
+ softback_lines = 0;
+ }
+
/* Setup default font */
if (!p->fontdata && !vc->vc_font.data) {
if (!fontname[0] || !(font = find_font(fontname)))
@@ -1169,6 +1220,9 @@
if (logo)
fbcon_prepare_logo(vc, info, cols, rows, new_cols, new_rows);
+ if (vc == svc && softback_buf)
+ fbcon_update_softback(vc);
+
if (ops->rotate_font && ops->rotate_font(info, vc)) {
ops->rotate = FB_ROTATE_UR;
set_blitting_type(vc, info);
@@ -1331,6 +1385,7 @@
{
struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
struct fbcon_ops *ops = info->fbcon_par;
+ int y;
int c = scr_readw((u16 *) vc->vc_pos);
ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms);
@@ -1334,11 +1389,19 @@ static void fbcon_cursor(struct vc_data
fbcon_add_cursor_timer(info);
ops->cursor_flash = (mode == CM_ERASE) ? 0 : 1;
+ if (mode & CM_SOFTBACK) {
+ mode &= ~CM_SOFTBACK;
+ y = softback_lines;
+ } else {
+ if (softback_lines)
+ fbcon_set_origin(vc);
+ y = 0;
+ }
if (!ops->cursor)
return;
- ops->cursor(vc, info, mode, 0, get_color(vc, info, c, 1),
+ ops->cursor(vc, info, mode, y, get_color(vc, info, c, 1),
get_color(vc, info, c, 0));
}
@@ -1416,6 +1479,8 @@
if (con_is_visible(vc)) {
update_screen(vc);
+ if (softback_buf)
+ fbcon_update_softback(vc);
}
}
@@ -1553,6 +1618,99 @@
scrollback_current = 0;
}
+static void fbcon_redraw_softback(struct vc_data *vc, struct fbcon_display *p,
+ long delta)
+{
+ int count = vc->vc_rows;
+ unsigned short *d, *s;
+ unsigned long n;
+ int line = 0;
+
+ d = (u16 *) softback_curr;
+ if (d == (u16 *) softback_in)
+ d = (u16 *) vc->vc_origin;
+ n = softback_curr + delta * vc->vc_size_row;
+ softback_lines -= delta;
+ if (delta < 0) {
+ if (softback_curr < softback_top && n < softback_buf) {
+ n += softback_end - softback_buf;
+ if (n < softback_top) {
+ softback_lines -=
+ (softback_top - n) / vc->vc_size_row;
+ n = softback_top;
+ }
+ } else if (softback_curr >= softback_top
+ && n < softback_top) {
+ softback_lines -=
+ (softback_top - n) / vc->vc_size_row;
+ n = softback_top;
+ }
+ } else {
+ if (softback_curr > softback_in && n >= softback_end) {
+ n += softback_buf - softback_end;
+ if (n > softback_in) {
+ n = softback_in;
+ softback_lines = 0;
+ }
+ } else if (softback_curr <= softback_in && n > softback_in) {
+ n = softback_in;
+ softback_lines = 0;
+ }
+ }
+ if (n == softback_curr)
+ return;
+ softback_curr = n;
+ s = (u16 *) softback_curr;
+ if (s == (u16 *) softback_in)
+ s = (u16 *) vc->vc_origin;
+ while (count--) {
+ unsigned short *start;
+ unsigned short *le;
+ unsigned short c;
+ int x = 0;
+ unsigned short attr = 1;
+
+ start = s;
+ le = advance_row(s, 1);
+ do {
+ c = scr_readw(s);
+ if (attr != (c & 0xff00)) {
+ attr = c & 0xff00;
+ if (s > start) {
+ fbcon_putcs(vc, start, s - start,
+ line, x);
+ x += s - start;
+ start = s;
+ }
+ }
+ if (c == scr_readw(d)) {
+ if (s > start) {
+ fbcon_putcs(vc, start, s - start,
+ line, x);
+ x += s - start + 1;
+ start = s + 1;
+ } else {
+ x++;
+ start++;
+ }
+ }
+ s++;
+ d++;
+ } while (s < le);
+ if (s > start)
+ fbcon_putcs(vc, start, s - start, line, x);
+ line++;
+ if (d == (u16 *) softback_end)
+ d = (u16 *) softback_buf;
+ if (d == (u16 *) softback_in)
+ d = (u16 *) vc->vc_origin;
+ if (s == (u16 *) softback_end)
+ s = (u16 *) softback_buf;
+ if (s == (u16 *) softback_in)
+ s = (u16 *) vc->vc_origin;
+ }
+}
+
static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p,
int line, int count, int dy)
{
@@ -1692,6 +1850,31 @@
}
}
+static inline void fbcon_softback_note(struct vc_data *vc, int t,
+ int count)
+{
+ unsigned short *p;
+
+ if (vc->vc_num != fg_console)
+ return;
+ p = (unsigned short *) (vc->vc_origin + t * vc->vc_size_row);
+
+ while (count) {
+ scr_memcpyw((u16 *) softback_in, p, vc->vc_size_row);
+ count--;
+ p = advance_row(p, 1);
+ softback_in += vc->vc_size_row;
+ if (softback_in == softback_end)
+ softback_in = softback_buf;
+ if (softback_in == softback_top) {
+ softback_top += vc->vc_size_row;
+ if (softback_top == softback_end)
+ softback_top = softback_buf;
+ }
+ }
+ softback_curr = softback_in;
+}
+
static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
enum con_scroll dir, unsigned int count)
{
@@ -1714,6 +1897,8 @@
case SM_UP:
if (count > vc->vc_rows) /* Maximum realistic size */
count = vc->vc_rows;
+ if (softback_top)
+ fbcon_softback_note(vc, t, count);
if (logo_shown >= 0)
goto redraw_up;
switch (p->scrollmode) {
@@ -2084,6 +2269,14 @@
info = registered_fb[con2fb_map[vc->vc_num]];
ops = info->fbcon_par;
+ if (softback_top) {
+ if (softback_lines)
+ fbcon_set_origin(vc);
+ softback_top = softback_curr = softback_in = softback_buf;
+ softback_lines = 0;
+ fbcon_update_softback(vc);
+ }
+
if (logo_shown >= 0) {
struct vc_data *conp2 = vc_cons[logo_shown].d;
@@ -2407,6 +2600,9 @@
int cnt;
char *old_data = NULL;
+ if (con_is_visible(vc) && softback_lines)
+ fbcon_set_origin(vc);
+
resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
if (p->userfont)
old_data = vc->vc_font.data;
@@ -2432,6 +2628,8 @@
cols /= w;
rows /= h;
vc_resize(vc, cols, rows);
+ if (con_is_visible(vc) && softback_buf)
+ fbcon_update_softback(vc);
} else if (con_is_visible(vc)
&& vc->vc_mode == KD_TEXT) {
fbcon_clear_margins(vc, 0);
@@ -2590,7 +2788,19 @@
static u16 *fbcon_screen_pos(struct vc_data *vc, int offset)
{
+ unsigned long p;
+ int line;
+
+ if (vc->vc_num != fg_console || !softback_lines)
+ return (u16 *) (vc->vc_origin + offset);
+ line = offset / vc->vc_size_row;
+ if (line >= softback_lines)
+ return (u16 *) (vc->vc_origin + offset -
+ softback_lines * vc->vc_size_row);
+ p = softback_curr + offset;
+ if (p >= softback_end)
+ p += softback_buf - softback_end;
+ return (u16 *) p;
- return (u16 *) (vc->vc_origin + offset);
}
static unsigned long fbcon_getxy(struct vc_data *vc, unsigned long pos,
@@ -2604,7 +2814,22 @@
x = offset % vc->vc_cols;
y = offset / vc->vc_cols;
+ if (vc->vc_num == fg_console)
+ y += softback_lines;
ret = pos + (vc->vc_cols - x) * 2;
+ } else if (vc->vc_num == fg_console && softback_lines) {
+ unsigned long offset = pos - softback_curr;
+
+ if (pos < softback_curr)
+ offset += softback_end - softback_buf;
+ offset /= 2;
+ x = offset % vc->vc_cols;
+ y = offset / vc->vc_cols;
+ ret = pos + (vc->vc_cols - x) * 2;
+ if (ret == softback_end)
+ ret = softback_buf;
+ if (ret == softback_in)
+ ret = vc->vc_origin;
} else {
/* Should not happen */
x = y = 0;
@@ -2632,11 +2857,106 @@
a = ((a) & 0x88ff) | (((a) & 0x7000) >> 4) |
(((a) & 0x0700) << 4);
scr_writew(a, p++);
+ if (p == (u16 *) softback_end)
+ p = (u16 *) softback_buf;
+ if (p == (u16 *) softback_in)
+ p = (u16 *) vc->vc_origin;
+ }
+}
+
+static void fbcon_scrolldelta(struct vc_data *vc, int lines)
+{
+ struct fb_info *info = registered_fb[con2fb_map[fg_console]];
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct fbcon_display *disp = &fb_display[fg_console];
+ int offset, limit, scrollback_old;
+
+ if (softback_top) {
+ if (vc->vc_num != fg_console)
+ return;
+ if (vc->vc_mode != KD_TEXT || !lines)
+ return;
+ if (logo_shown >= 0) {
+ struct vc_data *conp2 = vc_cons[logo_shown].d;
+
+ if (conp2->vc_top == logo_lines
+ && conp2->vc_bottom == conp2->vc_rows)
+ conp2->vc_top = 0;
+ if (logo_shown == vc->vc_num) {
+ unsigned long p, q;
+ int i;
+
+ p = softback_in;
+ q = vc->vc_origin +
+ logo_lines * vc->vc_size_row;
+ for (i = 0; i < logo_lines; i++) {
+ if (p == softback_top)
+ break;
+ if (p == softback_buf)
+ p = softback_end;
+ p -= vc->vc_size_row;
+ q -= vc->vc_size_row;
+ scr_memcpyw((u16 *) q, (u16 *) p,
+ vc->vc_size_row);
+ }
+ softback_in = softback_curr = p;
+ update_region(vc, vc->vc_origin,
+ logo_lines * vc->vc_cols);
+ }
+ logo_shown = FBCON_LOGO_CANSHOW;
+ }
+ fbcon_cursor(vc, CM_ERASE | CM_SOFTBACK);
+ fbcon_redraw_softback(vc, disp, lines);
+ fbcon_cursor(vc, CM_DRAW | CM_SOFTBACK);
+ return;
}
+
+ if (!scrollback_phys_max)
+ return;
+
+ scrollback_old = scrollback_current;
+ scrollback_current -= lines;
+ if (scrollback_current < 0)
+ scrollback_current = 0;
+ else if (scrollback_current > scrollback_max)
+ scrollback_current = scrollback_max;
+ if (scrollback_current == scrollback_old)
+ return;
+
+ if (fbcon_is_inactive(vc, info))
+ return;
+
+ fbcon_cursor(vc, CM_ERASE);
+
+ offset = disp->yscroll - scrollback_current;
+ limit = disp->vrows;
+ switch (disp->scrollmode) {
+ case SCROLL_WRAP_MOVE:
+ info->var.vmode |= FB_VMODE_YWRAP;
+ break;
+ case SCROLL_PAN_MOVE:
+ case SCROLL_PAN_REDRAW:
+ limit -= vc->vc_rows;
+ info->var.vmode &= ~FB_VMODE_YWRAP;
+ break;
+ }
+ if (offset < 0)
+ offset += limit;
+ else if (offset >= limit)
+ offset -= limit;
+
+ ops->var.xoffset = 0;
+ ops->var.yoffset = offset * vc->vc_font.height;
+ ops->update_start(info);
+
+ if (!scrollback_current)
+ fbcon_cursor(vc, CM_DRAW);
}
static int fbcon_set_origin(struct vc_data *vc)
{
+ if (softback_lines)
+ fbcon_scrolldelta(vc, softback_lines);
return 0;
}
@@ -2700,6 +3020,8 @@
fbcon_set_palette(vc, color_table);
update_screen(vc);
+ if (softback_buf)
+ fbcon_update_softback(vc);
}
}
@@ -3110,6 +3432,7 @@
.con_font_default = fbcon_set_def_font,
.con_font_copy = fbcon_copy_font,
.con_set_palette = fbcon_set_palette,
+ .con_scrolldelta = fbcon_scrolldelta,
.con_set_origin = fbcon_set_origin,
.con_invert_region = fbcon_invert_region,
.con_screen_pos = fbcon_screen_pos,
@@ -3344,6 +3667,9 @@
}
#endif
+ kvfree((void *)softback_buf);
+ softback_buf = 0UL;
+
for_each_registered_fb(i) {
int pending = 0;

View File

@ -0,0 +1,215 @@
diff --git a/drivers/video/fbdev/core/bootsplash_render.c b/drivers/video/fbdev/core/bootsplash_render.c
index 8c09c306ff67..07e3a4eab811 100644
--- a/drivers/video/fbdev/core/bootsplash_render.c
+++ b/drivers/video/fbdev/core/bootsplash_render.c
@@ -155,6 +155,7 @@ void bootsplash_do_render_pictures(struct fb_info *info,
for (i = 0; i < fp->header->num_pics; i++) {
struct splash_blob_priv *bp;
struct splash_pic_priv *pp = &fp->pics[i];
+ const struct splash_pic_header *ph = pp->pic_header;
long dst_xoff, dst_yoff;
if (pp->blobs_loaded < 1)
@@ -165,8 +166,139 @@ void bootsplash_do_render_pictures(struct fb_info *info,
if (!bp || bp->blob_header->type != 0)
continue;
- dst_xoff = (info->var.xres - pp->pic_header->width) / 2;
- dst_yoff = (info->var.yres - pp->pic_header->height) / 2;
+ switch (ph->position) {
+ case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_TOP_LEFT:
+ dst_xoff = 0;
+ dst_yoff = 0;
+
+ dst_xoff += ph->position_offset;
+ dst_yoff += ph->position_offset;
+ break;
+ case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_TOP:
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_xoff /= 2;
+ dst_yoff = 0;
+
+ dst_yoff += ph->position_offset;
+ break;
+ case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_TOP_RIGHT:
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_yoff = 0;
+
+ dst_xoff -= ph->position_offset;
+ dst_yoff += ph->position_offset;
+ break;
+ case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_RIGHT:
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_yoff = info->var.yres - pp->pic_header->height;
+ dst_yoff /= 2;
+
+ dst_xoff -= ph->position_offset;
+ break;
+ case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_BOTTOM_RIGHT:
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_yoff = info->var.yres - pp->pic_header->height;
+
+ dst_xoff -= ph->position_offset;
+ dst_yoff -= ph->position_offset;
+ break;
+ case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_BOTTOM:
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_xoff /= 2;
+ dst_yoff = info->var.yres - pp->pic_header->height;
+
+ dst_yoff -= ph->position_offset;
+ break;
+ case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_BOTTOM_LEFT:
+ dst_xoff = 0 + ph->position_offset;
+ dst_yoff = info->var.yres - pp->pic_header->height
+ - ph->position_offset;
+ break;
+ case SPLASH_POS_FLAG_CORNER | SPLASH_CORNER_LEFT:
+ dst_xoff = 0;
+ dst_yoff = info->var.yres - pp->pic_header->height;
+ dst_yoff /= 2;
+
+ dst_xoff += ph->position_offset;
+ break;
+
+ case SPLASH_CORNER_TOP_LEFT:
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_xoff /= 2;
+ dst_yoff = info->var.yres - pp->pic_header->height;
+ dst_yoff /= 2;
+
+ dst_xoff -= ph->position_offset;
+ dst_yoff -= ph->position_offset;
+ break;
+ case SPLASH_CORNER_TOP:
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_xoff /= 2;
+ dst_yoff = info->var.yres - pp->pic_header->height;
+ dst_yoff /= 2;
+
+ dst_yoff -= ph->position_offset;
+ break;
+ case SPLASH_CORNER_TOP_RIGHT:
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_xoff /= 2;
+ dst_yoff = info->var.yres - pp->pic_header->height;
+ dst_yoff /= 2;
+
+ dst_xoff += ph->position_offset;
+ dst_yoff -= ph->position_offset;
+ break;
+ case SPLASH_CORNER_RIGHT:
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_xoff /= 2;
+ dst_yoff = info->var.yres - pp->pic_header->height;
+ dst_yoff /= 2;
+
+ dst_xoff += ph->position_offset;
+ break;
+ case SPLASH_CORNER_BOTTOM_RIGHT:
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_xoff /= 2;
+ dst_yoff = info->var.yres - pp->pic_header->height;
+ dst_yoff /= 2;
+
+ dst_xoff += ph->position_offset;
+ dst_yoff += ph->position_offset;
+ break;
+ case SPLASH_CORNER_BOTTOM:
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_xoff /= 2;
+ dst_yoff = info->var.yres - pp->pic_header->height;
+ dst_yoff /= 2;
+
+ dst_yoff += ph->position_offset;
+ break;
+ case SPLASH_CORNER_BOTTOM_LEFT:
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_xoff /= 2;
+ dst_yoff = info->var.yres - pp->pic_header->height;
+ dst_yoff /= 2;
+
+ dst_xoff -= ph->position_offset;
+ dst_yoff += ph->position_offset;
+ break;
+ case SPLASH_CORNER_LEFT:
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_xoff /= 2;
+ dst_yoff = info->var.yres - pp->pic_header->height;
+ dst_yoff /= 2;
+
+ dst_xoff -= ph->position_offset;
+ break;
+
+ default:
+ /* As a fallback, center the picture. */
+ dst_xoff = info->var.xres - pp->pic_header->width;
+ dst_xoff /= 2;
+ dst_yoff = info->var.yres - pp->pic_header->height;
+ dst_yoff /= 2;
+ break;
+ }
if (dst_xoff < 0
|| dst_yoff < 0
diff --git a/include/uapi/linux/bootsplash_file.h b/include/uapi/linux/bootsplash_file.h
index 89dc9cca8f0c..71cedcc68933 100644
--- a/include/uapi/linux/bootsplash_file.h
+++ b/include/uapi/linux/bootsplash_file.h
@@ -91,7 +91,32 @@ struct splash_pic_header {
*/
uint8_t num_blobs;
- uint8_t padding[27];
+ /*
+ * Corner to move the picture to / from.
+ * 0x00 - Top left
+ * 0x01 - Top
+ * 0x02 - Top right
+ * 0x03 - Right
+ * 0x04 - Bottom right
+ * 0x05 - Bottom
+ * 0x06 - Bottom left
+ * 0x07 - Left
+ *
+ * Flags:
+ * 0x10 - Calculate offset from the corner towards the center,
+ * rather than from the center towards the corner
+ */
+ uint8_t position;
+
+ /*
+ * Pixel offset from the selected position.
+ * Example: If the picture is in the top right corner, it will
+ * be placed position_offset pixels from the top and
+ * position_offset pixels from the right margin.
+ */
+ uint16_t position_offset;
+
+ uint8_t padding[24];
} __attribute__((__packed__));
@@ -115,4 +140,22 @@ struct splash_blob_header {
uint8_t padding[9];
} __attribute__((__packed__));
+
+
+
+/*
+ * Enums for on-disk types
+ */
+enum splash_position {
+ SPLASH_CORNER_TOP_LEFT = 0,
+ SPLASH_CORNER_TOP = 1,
+ SPLASH_CORNER_TOP_RIGHT = 2,
+ SPLASH_CORNER_RIGHT = 3,
+ SPLASH_CORNER_BOTTOM_RIGHT = 4,
+ SPLASH_CORNER_BOTTOM = 5,
+ SPLASH_CORNER_BOTTOM_LEFT = 6,
+ SPLASH_CORNER_LEFT = 7,
+ SPLASH_POS_FLAG_CORNER = 0x10,
+};
+
#endif

View File

@ -0,0 +1,65 @@
From 3af7a8b44f265a482c8297b420085cfb53725136 Mon Sep 17 00:00:00 2001
From: Bhushan Shah <bshah@kde.org>
Date: Wed, 14 Apr 2021 10:29:57 +0530
Subject: [PATCH 4/5] cdc-wdm: provide wrapper for reset_resume
---
drivers/usb/class/cdc-wdm.c | 35 ++++++++++++++++++++++++++++++++++-
1 file changed, 34 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 508b1c3f8b731..2b9355ed4a2ad 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -1119,6 +1119,39 @@ static int wdm_resume(struct usb_interface *intf)
return rv;
}
+
+static int wdm_reset_resume(struct usb_interface *intf)
+{
+ struct wdm_device *desc = wdm_find_device(intf);
+ int rv;
+
+ dev_dbg(&desc->intf->dev, "wdm%d_reset_resume\n", intf->minor);
+
+ spin_lock_irq(&desc->iuspin);
+ set_bit(WDM_RESETTING, &desc->flags);
+ set_bit(WDM_READ, &desc->flags);
+ clear_bit(WDM_IN_USE, &desc->flags);
+
+ desc->rerr = -EINTR;
+
+ spin_unlock_irq(&desc->iuspin);
+ wake_up_all(&desc->wait);
+ mutex_lock(&desc->rlock);
+ mutex_lock(&desc->wlock);
+ poison_urbs(desc);
+ cancel_work_sync(&desc->rxwork);
+ cancel_work_sync(&desc->service_outs_intr);
+
+ clear_bit(WDM_SUSPENDING, &desc->flags);
+ clear_bit(WDM_OVERFLOW, &desc->flags);
+ clear_bit(WDM_RESETTING, &desc->flags);
+
+ rv = recover_from_urb_loss(desc);
+ mutex_unlock(&desc->wlock);
+ mutex_unlock(&desc->rlock);
+
+ return rv;
+}
#endif
static int wdm_pre_reset(struct usb_interface *intf)
@@ -1166,7 +1199,7 @@ static struct usb_driver wdm_driver = {
#ifdef CONFIG_PM
.suspend = wdm_suspend,
.resume = wdm_resume,
- .reset_resume = wdm_resume,
+ .reset_resume = wdm_reset_resume,
#endif
.pre_reset = wdm_pre_reset,
.post_reset = wdm_post_reset,
--
2.31.1

View File

@ -0,0 +1,58 @@
From a163474e9b86c2c25f20733385d8b1d6de492a7f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Wed, 25 Nov 2020 08:45:55 +0100
Subject: efivarfs: revert "fix memory leak in efivarfs_create()"
The memory leak addressed by commit fe5186cf12e3 is a false positive:
all allocations are recorded in a linked list, and freed when the
filesystem is unmounted. This leads to double frees, and as reported
by David, leads to crashes if SLUB is configured to self destruct when
double frees occur.
So drop the redundant kfree() again, and instead, mark the offending
pointer variable so the allocation is ignored by kmemleak.
Cc: Vamshi K Sthambamkadi <vamshi.k.sthambamkadi@gmail.com>
Fixes: fe5186cf12e3 ("efivarfs: fix memory leak in efivarfs_create()")
Reported-by: David Laight <David.Laight@aculab.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
fs/efivarfs/inode.c | 2 ++
fs/efivarfs/super.c | 1 -
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 96c0c86f3fff..0297ad95eb5c 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -7,6 +7,7 @@
#include <linux/efi.h>
#include <linux/fs.h>
#include <linux/ctype.h>
+#include <linux/kmemleak.h>
#include <linux/slab.h>
#include <linux/uuid.h>
@@ -103,6 +104,7 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
var->var.VariableName[i] = '\0';
inode->i_private = var;
+ kmemleak_ignore(var);
err = efivar_entry_add(var, &efivarfs_list);
if (err)
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index f943fd0b0699..15880a68faad 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -21,7 +21,6 @@ LIST_HEAD(efivarfs_list);
static void efivarfs_evict_inode(struct inode *inode)
{
clear_inode(inode);
- kfree(inode->i_private);
}
static const struct super_operations efivarfs_ops = {
--
cgit v1.2.3-1-gf6bb5

View File

@ -0,0 +1,327 @@
diff --git a/drivers/video/fbdev/core/bootsplash.c b/drivers/video/fbdev/core/bootsplash.c
index 815b007f81ca..c8642142cfea 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -53,6 +53,14 @@ static void splash_callback_redraw_vc(struct work_struct *ignored)
console_unlock();
}
+static void splash_callback_animation(struct work_struct *ignored)
+{
+ if (bootsplash_would_render_now()) {
+ /* This will also re-schedule this delayed worker */
+ splash_callback_redraw_vc(ignored);
+ }
+}
+
static bool is_fb_compatible(const struct fb_info *info)
{
@@ -103,17 +111,44 @@ static bool is_fb_compatible(const struct fb_info *info)
*/
void bootsplash_render_full(struct fb_info *info)
{
+ bool is_update = false;
+
mutex_lock(&splash_state.data_lock);
- if (!is_fb_compatible(info))
- goto out;
+ /*
+ * If we've painted on this FB recently, we don't have to do
+ * the sanity checks and background drawing again.
+ */
+ if (splash_state.splash_fb == info)
+ is_update = true;
+
+
+ if (!is_update) {
+ /* Check whether we actually support this FB. */
+ splash_state.splash_fb = NULL;
+
+ if (!is_fb_compatible(info))
+ goto out;
+
+ /* Draw the background only once */
+ bootsplash_do_render_background(info, splash_state.file);
- bootsplash_do_render_background(info, splash_state.file);
+ /* Mark this FB as last seen */
+ splash_state.splash_fb = info;
+ }
- bootsplash_do_render_pictures(info, splash_state.file);
+ bootsplash_do_render_pictures(info, splash_state.file, is_update);
bootsplash_do_render_flush(info);
+ bootsplash_do_step_animations(splash_state.file);
+
+ /* Schedule update for animated splash screens */
+ if (splash_state.file->frame_ms > 0)
+ schedule_delayed_work(&splash_state.dwork_animation,
+ msecs_to_jiffies(
+ splash_state.file->frame_ms));
+
out:
mutex_unlock(&splash_state.data_lock);
}
@@ -169,8 +204,14 @@ void bootsplash_enable(void)
was_enabled = test_and_set_bit(0, &splash_state.enabled);
- if (!was_enabled)
+ if (!was_enabled) {
+ /* Force a full redraw when the splash is re-activated */
+ mutex_lock(&splash_state.data_lock);
+ splash_state.splash_fb = NULL;
+ mutex_unlock(&splash_state.data_lock);
+
schedule_work(&splash_state.work_redraw_vc);
+ }
}
@@ -227,6 +268,14 @@ ATTRIBUTE_GROUPS(splash_dev);
*/
static int splash_resume(struct device *device)
{
+ /*
+ * Force full redraw on resume since we've probably lost the
+ * framebuffer's contents meanwhile
+ */
+ mutex_lock(&splash_state.data_lock);
+ splash_state.splash_fb = NULL;
+ mutex_unlock(&splash_state.data_lock);
+
if (bootsplash_would_render_now())
schedule_work(&splash_state.work_redraw_vc);
@@ -235,6 +284,7 @@ static int splash_resume(struct device *device)
static int splash_suspend(struct device *device)
{
+ cancel_delayed_work_sync(&splash_state.dwork_animation);
cancel_work_sync(&splash_state.work_redraw_vc);
return 0;
@@ -296,6 +346,8 @@ void bootsplash_init(void)
set_bit(0, &splash_state.enabled);
INIT_WORK(&splash_state.work_redraw_vc, splash_callback_redraw_vc);
+ INIT_DELAYED_WORK(&splash_state.dwork_animation,
+ splash_callback_animation);
if (!splash_state.bootfile || !strlen(splash_state.bootfile))
diff --git a/drivers/video/fbdev/core/bootsplash_internal.h b/drivers/video/fbdev/core/bootsplash_internal.h
index 0acb383aa4e3..b3a74835d90f 100644
--- a/drivers/video/fbdev/core/bootsplash_internal.h
+++ b/drivers/video/fbdev/core/bootsplash_internal.h
@@ -37,6 +37,8 @@ struct splash_pic_priv {
struct splash_blob_priv *blobs;
u16 blobs_loaded;
+
+ u16 anim_nextframe;
};
@@ -45,6 +47,12 @@ struct splash_file_priv {
const struct splash_file_header *header;
struct splash_pic_priv *pics;
+
+ /*
+ * A local copy of the frame delay in the header.
+ * We modify it to keep the code simple.
+ */
+ u16 frame_ms;
};
@@ -71,6 +79,7 @@ struct splash_priv {
struct platform_device *splash_device;
struct work_struct work_redraw_vc;
+ struct delayed_work dwork_animation;
/* Splash data structures including lock for everything below */
struct mutex data_lock;
@@ -88,8 +97,10 @@ struct splash_priv {
void bootsplash_do_render_background(struct fb_info *info,
const struct splash_file_priv *fp);
void bootsplash_do_render_pictures(struct fb_info *info,
- const struct splash_file_priv *fp);
+ const struct splash_file_priv *fp,
+ bool is_update);
void bootsplash_do_render_flush(struct fb_info *info);
+void bootsplash_do_step_animations(struct splash_file_priv *fp);
void bootsplash_free_file(struct splash_file_priv *fp);
diff --git a/drivers/video/fbdev/core/bootsplash_load.c b/drivers/video/fbdev/core/bootsplash_load.c
index fd807571ab7d..1f661b2d4cc9 100644
--- a/drivers/video/fbdev/core/bootsplash_load.c
+++ b/drivers/video/fbdev/core/bootsplash_load.c
@@ -71,6 +71,7 @@ struct splash_file_priv *bootsplash_load_firmware(struct device *device,
{
const struct firmware *fw;
struct splash_file_priv *fp;
+ bool have_anim = false;
unsigned int i;
const u8 *walker;
@@ -135,6 +136,13 @@ struct splash_file_priv *bootsplash_load_firmware(struct device *device,
goto err;
}
+ if (ph->anim_type > SPLASH_ANIM_LOOP_FORWARD) {
+ pr_warn("Picture %u: Unsupported animation type %u.\n",
+ i, ph->anim_type);
+
+ ph->anim_type = SPLASH_ANIM_NONE;
+ }
+
pp->pic_header = ph;
pp->blobs = vzalloc(ph->num_blobs
* sizeof(struct splash_blob_priv));
@@ -202,6 +210,7 @@ struct splash_file_priv *bootsplash_load_firmware(struct device *device,
/* Walk over pictures and ensure all blob slots are filled */
for (i = 0; i < fp->header->num_pics; i++) {
struct splash_pic_priv *pp = &fp->pics[i];
+ const struct splash_pic_header *ph = pp->pic_header;
if (pp->blobs_loaded != pp->pic_header->num_blobs) {
pr_err("Picture %u doesn't have all blob slots filled.\n",
@@ -209,8 +218,20 @@ struct splash_file_priv *bootsplash_load_firmware(struct device *device,
goto err;
}
+
+ if (ph->anim_type
+ && ph->num_blobs > 1
+ && ph->anim_loop < pp->blobs_loaded)
+ have_anim = true;
}
+ if (!have_anim)
+ /* Disable animation timer if there is nothing to animate */
+ fp->frame_ms = 0;
+ else
+ /* Enforce minimum delay between frames */
+ fp->frame_ms = max((u16)20, fp->header->frame_ms);
+
pr_info("Loaded (%ld bytes, %u pics, %u blobs).\n",
fw->size,
fp->header->num_pics,
diff --git a/drivers/video/fbdev/core/bootsplash_render.c b/drivers/video/fbdev/core/bootsplash_render.c
index 07e3a4eab811..76033606ca8a 100644
--- a/drivers/video/fbdev/core/bootsplash_render.c
+++ b/drivers/video/fbdev/core/bootsplash_render.c
@@ -148,7 +148,8 @@ void bootsplash_do_render_background(struct fb_info *info,
void bootsplash_do_render_pictures(struct fb_info *info,
- const struct splash_file_priv *fp)
+ const struct splash_file_priv *fp,
+ bool is_update)
{
unsigned int i;
@@ -161,7 +162,11 @@ void bootsplash_do_render_pictures(struct fb_info *info,
if (pp->blobs_loaded < 1)
continue;
- bp = &pp->blobs[0];
+ /* Skip static pictures when refreshing animations */
+ if (ph->anim_type == SPLASH_ANIM_NONE && is_update)
+ continue;
+
+ bp = &pp->blobs[pp->anim_nextframe];
if (!bp || bp->blob_header->type != 0)
continue;
@@ -351,3 +356,24 @@ void bootsplash_do_render_flush(struct fb_info *info)
info->fbops->fb_copyarea(info, &area);
}
}
+
+
+void bootsplash_do_step_animations(struct splash_file_priv *fp)
+{
+ unsigned int i;
+
+ /* Step every animation once */
+ for (i = 0; i < fp->header->num_pics; i++) {
+ struct splash_pic_priv *pp = &fp->pics[i];
+
+ if (pp->blobs_loaded < 2
+ || pp->pic_header->anim_loop > pp->blobs_loaded)
+ continue;
+
+ if (pp->pic_header->anim_type == SPLASH_ANIM_LOOP_FORWARD) {
+ pp->anim_nextframe++;
+ if (pp->anim_nextframe >= pp->pic_header->num_blobs)
+ pp->anim_nextframe = pp->pic_header->anim_loop;
+ }
+ }
+}
diff --git a/include/uapi/linux/bootsplash_file.h b/include/uapi/linux/bootsplash_file.h
index 71cedcc68933..b3af0a3c6487 100644
--- a/include/uapi/linux/bootsplash_file.h
+++ b/include/uapi/linux/bootsplash_file.h
@@ -77,7 +77,17 @@ struct splash_file_header {
uint16_t num_blobs;
uint8_t num_pics;
- uint8_t padding[103];
+ uint8_t unused_1;
+
+ /*
+ * Milliseconds to wait before painting the next frame in
+ * an animation.
+ * This is actually a minimum, as the system is allowed to
+ * stall for longer between frames.
+ */
+ uint16_t frame_ms;
+
+ uint8_t padding[100];
} __attribute__((__packed__));
@@ -116,7 +126,23 @@ struct splash_pic_header {
*/
uint16_t position_offset;
- uint8_t padding[24];
+ /*
+ * Animation type.
+ * 0 - off
+ * 1 - forward loop
+ */
+ uint8_t anim_type;
+
+ /*
+ * Animation loop point.
+ * Actual meaning depends on animation type:
+ * Type 0 - Unused
+ * 1 - Frame at which to restart the forward loop
+ * (allowing for "intro" frames)
+ */
+ uint8_t anim_loop;
+
+ uint8_t padding[22];
} __attribute__((__packed__));
@@ -158,4 +184,9 @@ enum splash_position {
SPLASH_POS_FLAG_CORNER = 0x10,
};
+enum splash_anim_type {
+ SPLASH_ANIM_NONE = 0,
+ SPLASH_ANIM_LOOP_FORWARD = 1,
+};
+
#endif

View File

@ -0,0 +1,34 @@
From ed73c96e313c549f710df58c8fbe47200ee13df1 Mon Sep 17 00:00:00 2001
From: Bhushan Shah <bshah@kde.org>
Date: Sat, 10 Apr 2021 08:52:05 +0530
Subject: [PATCH 5/5] net: usb: qmi_wwan: set the DTR when resuming
If usb device does reset_resume instead of unbind/bind, we need to
re-enable the DTR quirk, that way after resuming connection QMI
communication between host and modem is possible again.
Signed-off-by: Bhushan Shah <bshah@kde.org>
Tested-by: Dalton Durst <d@ltondur.st>
---
drivers/net/usb/qmi_wwan.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index cd6ae9696b56a..ada94a3242146 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -855,6 +855,11 @@ static int qmi_wwan_reset_resume(struct usb_interface *intf)
ret = usbnet_resume(intf);
if (ret < 0 && callsub)
info->subdriver->suspend(intf, PMSG_SUSPEND);
+
+ if (dev->driver_info->data & QMI_WWAN_QUIRK_DTR ||
+ le16_to_cpu(dev->udev->descriptor.bcdUSB) >= 0x0201) {
+ qmi_wwan_change_dtr(dev, true);
+ }
err:
return ret;
}
--
2.31.1

View File

@ -0,0 +1,82 @@
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 2ebaba16f785..416735ab6dc1 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -105,6 +105,7 @@
#include <linux/ctype.h>
#include <linux/bsearch.h>
#include <linux/gcd.h>
+#include <linux/bootsplash.h>
#define MAX_NR_CON_DRIVER 16
@@ -4235,6 +4236,7 @@ void do_unblank_screen(int leaving_gfx)
}
console_blanked = 0;
+ bootsplash_mark_dirty();
if (vc->vc_sw->con_blank(vc, 0, leaving_gfx))
/* Low-level driver cannot restore -> do it ourselves */
update_screen(vc);
diff --git a/drivers/video/fbdev/core/bootsplash.c b/drivers/video/fbdev/core/bootsplash.c
index c8642142cfea..13fcaabbc2ca 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -165,6 +165,13 @@ bool bootsplash_would_render_now(void)
&& bootsplash_is_enabled();
}
+void bootsplash_mark_dirty(void)
+{
+ mutex_lock(&splash_state.data_lock);
+ splash_state.splash_fb = NULL;
+ mutex_unlock(&splash_state.data_lock);
+}
+
bool bootsplash_is_enabled(void)
{
bool was_enabled;
@@ -206,9 +213,7 @@ void bootsplash_enable(void)
if (!was_enabled) {
/* Force a full redraw when the splash is re-activated */
- mutex_lock(&splash_state.data_lock);
- splash_state.splash_fb = NULL;
- mutex_unlock(&splash_state.data_lock);
+ bootsplash_mark_dirty();
schedule_work(&splash_state.work_redraw_vc);
}
@@ -272,9 +277,7 @@ static int splash_resume(struct device *device)
* Force full redraw on resume since we've probably lost the
* framebuffer's contents meanwhile
*/
- mutex_lock(&splash_state.data_lock);
- splash_state.splash_fb = NULL;
- mutex_unlock(&splash_state.data_lock);
+ bootsplash_mark_dirty();
if (bootsplash_would_render_now())
schedule_work(&splash_state.work_redraw_vc);
diff --git a/include/linux/bootsplash.h b/include/linux/bootsplash.h
index c6dd0b43180d..4075098aaadd 100644
--- a/include/linux/bootsplash.h
+++ b/include/linux/bootsplash.h
@@ -19,6 +19,8 @@ extern void bootsplash_render_full(struct fb_info *info);
extern bool bootsplash_would_render_now(void);
+extern void bootsplash_mark_dirty(void);
+
extern bool bootsplash_is_enabled(void);
extern void bootsplash_disable(void);
extern void bootsplash_enable(void);
@@ -31,6 +33,8 @@ extern void bootsplash_init(void);
#define bootsplash_would_render_now() (false)
+#define bootsplash_mark_dirty()
+
#define bootsplash_is_enabled() (false)
#define bootsplash_disable()
#define bootsplash_enable()

View File

@ -0,0 +1,27 @@
From 1671ef2de0f3f698622bed7ba0e9a605fdd260fc Mon Sep 17 00:00:00 2001
From: Bhushan Shah <bshah@kde.org>
Date: Wed, 14 Apr 2021 18:58:41 +0530
Subject: [PATCH 6/6] cdc-wdm: send HUP if we are resetting
If userspace is polling the cdc-wdm socket, and device resets then we
should notify userspace/client about reset.
---
drivers/usb/class/cdc-wdm.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 2b9355ed4a2ad..93d9bacc18384 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -666,6 +666,8 @@ static __poll_t wdm_poll(struct file *file, struct poll_table_struct *wait)
spin_unlock_irqrestore(&desc->iuspin, flags);
goto desc_out;
}
+ if (test_bit(WDM_RESETTING, &desc->flags))
+ mask = EPOLLHUP;
if (test_bit(WDM_READ, &desc->flags))
mask = EPOLLIN | EPOLLRDNORM;
if (desc->rerr || desc->werr)
--
2.31.1

View File

@ -0,0 +1,42 @@
diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c
index f4166263bb3a..a248429194bb 100644
--- a/drivers/tty/vt/keyboard.c
+++ b/drivers/tty/vt/keyboard.c
@@ -49,6 +49,8 @@
#include <asm/irq_regs.h>
+#include <linux/bootsplash.h>
+
/*
* Exported functions/variables
*/
@@ -1413,6 +1415,28 @@ static void kbd_keycode(unsigned int key
}
#endif
+ /* Trap keys when bootsplash is shown */
+ if (bootsplash_would_render_now()) {
+ /* Deactivate bootsplash on ESC or Alt+Fxx VT switch */
+ if (keycode >= KEY_F1 && keycode <= KEY_F12) {
+ bootsplash_disable();
+
+ /*
+ * No return here since we want to actually
+ * perform the VT switch.
+ */
+ } else {
+ if (keycode == KEY_ESC)
+ bootsplash_disable();
+
+ /*
+ * Just drop any other keys.
+ * Their effect would be hidden by the splash.
+ */
+ return;
+ }
+ }
+
if (kbd->kbdmode == VC_MEDIUMRAW) {
/*
* This is extended medium raw mode, with keys above 127

View File

@ -0,0 +1,21 @@
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 3ffc1ce29023..bc6a24c9dfa8 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -49,6 +49,7 @@
#include <linux/syscalls.h>
#include <linux/of.h>
#include <linux/rcupdate.h>
+#include <linux/bootsplash.h>
#include <asm/ptrace.h>
#include <asm/irq_regs.h>
@@ -104,6 +105,8 @@ static void sysrq_handle_SAK(int key)
{
struct work_struct *SAK_work = &vc_cons[fg_console].SAK_work;
schedule_work(SAK_work);
+
+ bootsplash_disable();
}
static struct sysrq_key_op sysrq_SAK_op = {
.handler = sysrq_handle_SAK,

View File

@ -0,0 +1,21 @@
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 9a39a6fcfe98..8a9c67e1c5d8 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -1343,6 +1343,16 @@ static void fbcon_cursor(struct vc_data *vc, int mode)
int y;
int c = scr_readw((u16 *) vc->vc_pos);
+ /*
+ * Disable the splash here so we don't have to hook into
+ * vt_console_print() in drivers/tty/vt/vt.c
+ *
+ * We'd disable the splash just before the call to
+ * hide_cursor() anyway, so this spot is just fine.
+ */
+ if (oops_in_progress)
+ bootsplash_disable();
+
ops->cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms);
if (fbcon_is_inactive(vc, info) || vc->vc_deccm != 1)

View File

@ -0,0 +1,321 @@
diff --git a/Documentation/ABI/testing/sysfs-platform-bootsplash b/Documentation/ABI/testing/sysfs-platform-bootsplash
new file mode 100644
index 000000000000..742c7b035ded
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-bootsplash
@@ -0,0 +1,11 @@
+What: /sys/devices/platform/bootsplash.0/enabled
+Date: Oct 2017
+KernelVersion: 4.14
+Contact: Max Staudt <mstaudt@suse.de>
+Description:
+ Can be set and read.
+
+ 0: Splash is disabled.
+ 1: Splash is shown whenever fbcon would show a text console
+ (i.e. no graphical application is running), and a splash
+ file is loaded.
diff --git a/Documentation/bootsplash.rst b/Documentation/bootsplash.rst
new file mode 100644
index 000000000000..611f0c558925
--- /dev/null
+++ b/Documentation/bootsplash.rst
@@ -0,0 +1,285 @@
+====================
+The Linux bootsplash
+====================
+
+:Date: November, 2017
+:Author: Max Staudt <mstaudt@suse.de>
+
+
+The Linux bootsplash is a graphical replacement for the '``quiet``' boot
+option, typically showing a logo and a spinner animation as the system starts.
+
+Currently, it is a part of the Framebuffer Console support, and can be found
+as ``CONFIG_BOOTSPLASH`` in the kernel configuration. This means that as long
+as it is enabled, it hijacks fbcon's output and draws a splash screen instead.
+
+Purely compiling in the bootsplash will not render it functional - to actually
+render a splash, you will also need a splash theme file. See the example
+utility and script in ``tools/bootsplash`` for a live demo.
+
+
+
+Motivation
+==========
+
+- The '``quiet``' boot option only suppresses most messages during boot, but
+ errors are still shown.
+
+- A user space implementation can only show a logo once user space has been
+ initialized far enough to allow this. A kernel splash can display a splash
+ immediately as soon as fbcon can be displayed.
+
+- Implementing a splash screen in user space (e.g. Plymouth) is problematic
+ due to resource conflicts.
+
+ For example, if Plymouth is keeping ``/dev/fb0`` (provided via vesafb/efifb)
+ open, then most DRM drivers can't replace it because the address space is
+ still busy - thus leading to a VRAM reservation error.
+
+ See: https://bugzilla.opensuse.org/show_bug.cgi?id=980750
+
+
+
+Command line arguments
+======================
+
+``bootsplash.bootfile``
+ Which file in the initramfs to load.
+
+ The splash theme is loaded via request_firmware(), thus to load
+ ``/lib/firmware/bootsplash/mytheme`` pass the command line:
+
+ ``bootsplash.bootfile=bootsplash/mytheme``
+
+ Note: The splash file *has to be* in the initramfs, as it needs to be
+ available when the splash is initialized early on.
+
+ Default: none, i.e. a non-functional splash, falling back to showing text.
+
+
+
+sysfs run-time configuration
+============================
+
+``/sys/devices/platform/bootsplash.0/enabled``
+ Enable/disable the bootsplash.
+ The system boots with this set to 1, but will not show a splash unless
+ a splash theme file is also loaded.
+
+
+
+Kconfig
+=======
+
+``BOOTSPLASH``
+ Whether to compile in bootsplash support
+ (depends on fbcon compiled in, i.e. ``FRAMEBUFFER_CONSOLE=y``)
+
+
+
+Bootsplash file format
+======================
+
+A file specified in the kernel configuration as ``CONFIG_BOOTSPLASH_FILE``
+or specified on the command line as ``bootsplash.bootfile`` will be loaded
+and displayed as soon as fbcon is initialized.
+
+
+Main blocks
+-----------
+
+There are 3 main blocks in each file:
+
+ - one File header
+ - n Picture headers
+ - m (Blob header + payload) blocks
+
+
+Structures
+----------
+
+The on-disk structures are defined in
+``drivers/video/fbdev/core/bootsplash_file.h`` and represent these blocks:
+
+ - ``struct splash_file_header``
+
+ Represents the file header, with splash-wide information including:
+
+ - The magic string "``Linux bootsplash``" on big-endian platforms
+ (the reverse on little endian)
+ - The file format version (for incompatible updates, hopefully never)
+ - The background color
+ - Number of picture and blob blocks
+ - Animation speed (we only allow one delay for all animations)
+
+ The file header is followed by the first picture header.
+
+
+ - ``struct splash_picture_header``
+
+ Represents an object (picture) drawn on screen, including its immutable
+ properties:
+ - Width, height
+ - Positioning relative to screen corners or in the center
+ - Animation, if any
+ - Animation type
+ - Number of blobs
+
+ The picture header is followed by another picture header, up until n
+ picture headers (as defined in the file header) have been read. Then,
+ the (blob header, payload) pairs follow.
+
+
+ - ``struct splash_blob_header``
+ (followed by payload)
+
+ Represents one raw data stream. So far, only picture data is defined.
+
+ The blob header is followed by a payload, then padding to n*16 bytes,
+ then (if further blobs are defined in the file header) a further blob
+ header.
+
+
+Alignment
+---------
+
+The bootsplash file is designed to be loaded into memory as-is.
+
+All structures are a multiple of 16 bytes long, all elements therein are
+aligned to multiples of their length, and the payloads are always padded
+up to multiples of 16 bytes. This is to allow aligned accesses in all
+cases while still simply mapping the structures over an in-memory copy of
+the bootsplash file.
+
+
+Further information
+-------------------
+
+Please see ``drivers/video/fbdev/core/bootsplash_file.h`` for further
+details and possible values in the file.
+
+
+
+Hooks - how the bootsplash is integrated
+========================================
+
+``drivers/video/fbdev/core/fbcon.c``
+ ``fbcon_init()`` calls ``bootsplash_init()``, which loads the default
+ bootsplash file or the one specified on the kernel command line.
+
+ ``fbcon_switch()`` draws the bootsplash when it's active, and is also
+ one of the callers of ``set_blitting_type()``.
+
+ ``set_blitting_type()`` calls ``fbcon_set_dummyops()`` when the
+ bootsplash is active, overriding the text rendering functions.
+
+ ``fbcon_cursor()`` will call ``bootsplash_disable()`` when an oops is
+ being printed in order to make a kernel panic visible.
+
+``drivers/video/fbdev/core/dummyblit.c``
+ This contains the dummy text rendering functions used to suppress text
+ output while the bootsplash is shown.
+
+``drivers/tty/vt/keyboard.c``
+ ``kbd_keycode()`` can call ``bootsplash_disable()`` when the user
+ presses ESC or F1-F12 (changing VT). This is to provide a built-in way
+ of disabling the splash manually at any time.
+
+
+
+FAQ: Frequently Asked Questions
+===============================
+
+I want to see the log! How do I show the log?
+---------------------------------------------
+
+Press ESC while the splash is shown, or remove the ``bootsplash.bootfile``
+parameter from the kernel cmdline. Without that parameter, the bootsplash
+will boot disabled.
+
+
+Why use FB instead of modern DRM/KMS?
+-------------------------------------
+
+This is a semantic problem:
+ - What memory to draw the splash to?
+ - And what mode will the screen be set to?
+
+Using the fbdev emulation solves these issues.
+
+Let's start from a bare KMS system, without fbcon, and without fbdev
+emulation. In this case, as long as userspace doesn't open the KMS
+device, the state of the screen is undefined. No framebuffer is
+allocated in video RAM, and no particular mode is set.
+
+In this case, we'd have to allocate a framebuffer to show the splash,
+and set our mode ourselves. This either wastes a screenful of video RAM
+if the splash is to co-exist with the userspace program's own allocated
+framebuffer, or there is a flicker as we deactivate and delete the
+bootsplash's framebuffer and hand control over to userspace. Since we
+may set a different mode than userspace, we'd also have flicker due
+to mode switching.
+
+This logic is already contained in every KMS driver that performs fbdev
+emulation. So we might as well use that. And the correct API to do so is
+fbdev. Plus, we get compatibility with old, pure fbdev drivers for free.
+With the fbdev emulation, there is *always* a well-defined framebuffer
+to draw on. And the selection of mode has already been done by the
+graphics driver, so we don't need to reinvent that wheel, either.
+Finally, if userspace decides to use /dev/fbX, we don't have to worry
+about wasting video RAM, either.
+
+
+Why is the bootsplash integrated in fbcon?
+------------------------------------------
+
+Right now, the bootsplash is drawn from within fbcon, as this allows us
+to easily know *when* to draw - i.e. when we're safe from fbcon and
+userspace drawing all over our beautiful splash logo.
+
+Separating them is not easy - see the to-do list below.
+
+
+
+TO DO list for future development
+=================================
+
+Second enable/disable switch for the system
+-------------------------------------------
+
+It may be helpful to differentiate between the system and the user
+switching off the bootsplash. Thus, the system may make it disappear and
+reappear e.g. for a password prompt, yet once the user has pressed ESC,
+it could stay gone.
+
+
+Fix buggy DRM/KMS drivers
+-------------------------
+
+Currently, the splash code manually checks for fbdev emulation provided by
+the ast, cirrus, and mgag200 DRM/KMS drivers.
+These drivers use a manual mechanism similar to deferred I/O for their FB
+emulation, and thus need to be manually flushed onto the screen in the same
+way.
+
+This may be improved upon in several ways:
+
+1. Changing these drivers to expose the fbdev BO's memory directly, like
+ bochsdrmfb does.
+2. Creating a new fb_ops->fb_flush() API to allow the kernel to flush the
+ framebuffer once the bootsplash has been drawn into it.
+
+
+Separating from fbcon
+---------------------
+
+Separating these two components would yield independence from fbcon being
+compiled into the kernel, and thus lowering code size in embedded
+applications.
+
+To do this cleanly will involve a clean separation of users of an FB device
+within the kernel, i.e. fbcon, bootsplash, and userspace. Right now, the
+legacy fbcon code and VT code co-operate to switch between fbcon and
+userspace (by setting the VT into KD_GRAPHICS mode). Installing a muxer
+between these components ensues refactoring of old code and checking for
+correct locking.
diff --git a/MAINTAINERS b/MAINTAINERS
index 5c237445761e..7ffac272434e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2709,6 +2709,8 @@ BOOTSPLASH
M: Max Staudt <mstaudt@suse.de>
L: linux-fbdev@vger.kernel.org
S: Maintained
+F: Documentation/ABI/testing/sysfs-platform-bootsplash
+F: Documentation/bootsplash.rst
F: drivers/video/fbdev/core/bootsplash*.*
F: drivers/video/fbdev/core/dummycon.c
F: include/linux/bootsplash.h

View File

@ -0,0 +1,56 @@
From 05044b9e4e4ae03f66e1c504d6fef57a1d135897 Mon Sep 17 00:00:00 2001
From: Dylan Van Assche <me@dylanvanassche.be>
Date: Thu, 24 Dec 2020 19:57:12 +0100
Subject: [PATCH] dts: pinephone: Add 'pine64,pinephone' to compat list
Indicates that all PinePhone models share most of the hardware with each other.
Used for feedbackd configuration when retrieving a device specific config for
haptic feedbackd.
---
arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.0.dts | 2 +-
arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts | 2 +-
arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.0.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.0.dts
index 0f6faa44c..2e0892b32 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.0.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.0.dts
@@ -7,7 +7,7 @@
/ {
model = "Pine64 PinePhone Developer Batch (1.0)";
- compatible = "pine64,pinephone-1.0", "allwinner,sun50i-a64";
+ compatible = "pine64,pinephone-1.0", "pine64,pinephone", "allwinner,sun50i-a64";
reg_vbus: usb0-vbus {
compatible = "regulator-fixed";
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts
index 95a880fdc..d6bad0838 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts
@@ -7,7 +7,7 @@
/ {
model = "Pine64 PinePhone Braveheart (1.1)";
- compatible = "pine64,pinephone-1.1", "allwinner,sun50i-a64";
+ compatible = "pine64,pinephone-1.1", "pine64,pinephone", "allwinner,sun50i-a64";
reg_vbus: usb0-vbus {
compatible = "regulator-fixed";
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts
index 23ba72508..710493186 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts
@@ -7,7 +7,7 @@
/ {
model = "Pine64 PinePhone (1.2)";
- compatible = "pine64,pinephone-1.2", "allwinner,sun50i-a64";
+ compatible = "pine64,pinephone-1.2", "pine64,pinephone", "allwinner,sun50i-a64";
wifi_pwrseq: wifi-pwrseq {
compatible = "mmc-pwrseq-simple";
--
2.26.2

View File

@ -0,0 +1,129 @@
diff --git a/Documentation/ABI/testing/sysfs-platform-bootsplash b/Documentation/ABI/testing/sysfs-platform-bootsplash
index 742c7b035ded..f8f4b259220e 100644
--- a/Documentation/ABI/testing/sysfs-platform-bootsplash
+++ b/Documentation/ABI/testing/sysfs-platform-bootsplash
@@ -9,3 +9,35 @@ Description:
1: Splash is shown whenever fbcon would show a text console
(i.e. no graphical application is running), and a splash
file is loaded.
+
+What: /sys/devices/platform/bootsplash.0/drop_splash
+Date: Oct 2017
+KernelVersion: 4.14
+Contact: Max Staudt <mstaudt@suse.de>
+Description:
+ Can only be set.
+
+ Any value written will cause the current splash theme file
+ to be unloaded and the text console to be redrawn.
+
+What: /sys/devices/platform/bootsplash.0/load_file
+Date: Oct 2017
+KernelVersion: 4.14
+Contact: Max Staudt <mstaudt@suse.de>
+Description:
+ Can only be set.
+
+ Any value written will cause the splash to be disabled and
+ internal memory structures to be freed.
+
+ A firmware path written will cause a new theme file to be
+ loaded and the current bootsplash to be replaced.
+ The current enabled/disabled status is not touched.
+ If the splash is already active, it will be redrawn.
+
+ The path has to be a path in /lib/firmware since
+ request_firmware() is used to fetch the data.
+
+ When setting the splash from the shell, echo -n has to be
+ used as any trailing '\n' newline will be interpreted as
+ part of the path.
diff --git a/Documentation/bootsplash.rst b/Documentation/bootsplash.rst
index 611f0c558925..b35aba5093e8 100644
--- a/Documentation/bootsplash.rst
+++ b/Documentation/bootsplash.rst
@@ -67,6 +67,14 @@ sysfs run-time configuration
a splash theme file is also loaded.
+``/sys/devices/platform/bootsplash.0/drop_splash``
+ Unload splash data and free memory.
+
+``/sys/devices/platform/bootsplash.0/load_file``
+ Load a splash file from ``/lib/firmware/``.
+ Note that trailing newlines will be interpreted as part of the file name.
+
+
Kconfig
=======
diff --git a/drivers/video/fbdev/core/bootsplash.c b/drivers/video/fbdev/core/bootsplash.c
index 13fcaabbc2ca..16cb0493629d 100644
--- a/drivers/video/fbdev/core/bootsplash.c
+++ b/drivers/video/fbdev/core/bootsplash.c
@@ -251,11 +251,65 @@ static ssize_t splash_store_enabled(struct device *device,
return count;
}
+static ssize_t splash_store_drop_splash(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct splash_file_priv *fp;
+
+ if (!buf || !count || !splash_state.file)
+ return count;
+
+ mutex_lock(&splash_state.data_lock);
+ fp = splash_state.file;
+ splash_state.file = NULL;
+ mutex_unlock(&splash_state.data_lock);
+
+ /* Redraw the text console */
+ schedule_work(&splash_state.work_redraw_vc);
+
+ bootsplash_free_file(fp);
+
+ return count;
+}
+
+static ssize_t splash_store_load_file(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct splash_file_priv *fp, *fp_old;
+
+ if (!count)
+ return 0;
+
+ fp = bootsplash_load_firmware(&splash_state.splash_device->dev,
+ buf);
+
+ if (!fp)
+ return -ENXIO;
+
+ mutex_lock(&splash_state.data_lock);
+ fp_old = splash_state.file;
+ splash_state.splash_fb = NULL;
+ splash_state.file = fp;
+ mutex_unlock(&splash_state.data_lock);
+
+ /* Update the splash or text console */
+ schedule_work(&splash_state.work_redraw_vc);
+
+ bootsplash_free_file(fp_old);
+ return count;
+}
+
static DEVICE_ATTR(enabled, 0644, splash_show_enabled, splash_store_enabled);
+static DEVICE_ATTR(drop_splash, 0200, NULL, splash_store_drop_splash);
+static DEVICE_ATTR(load_file, 0200, NULL, splash_store_load_file);
static struct attribute *splash_dev_attrs[] = {
&dev_attr_enabled.attr,
+ &dev_attr_drop_splash.attr,
+ &dev_attr_load_file.attr,
NULL
};

View File

@ -0,0 +1,29 @@
From d753557c64f6e85f63cffab53496d6271d724074 Mon Sep 17 00:00:00 2001
From: Martijn Braam <martijn@brixit.nl>
Date: Mon, 15 Feb 2021 13:10:37 -0800
Subject: [PATCH] pinephone: fix pogopin i2c
---
arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
index 39fdf96fe95d..7d0dd52e2f9d 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
@@ -635,11 +635,7 @@ &pio {
vcc-pb-supply = <&reg_dcdc1>;
vcc-pc-supply = <&reg_dcdc1>;
vcc-pd-supply = <&reg_dcdc1>;
- /* pinctrl would enable this even if no camera is powered,
- * which is wrong/not necessary
- *
- * vcc-pe-supply = <&reg_aldo1>; (also used by pogo pins i2c)
- */
+ vcc-pe-supply = <&reg_aldo1>; /* (also used by pogo pins i2c) */
vcc-pf-supply = <&reg_dcdc1>;
vcc-pg-supply = <&reg_dldo4>;
vcc-ph-supply = <&reg_dcdc1>;
--
2.30.1

View File

@ -0,0 +1,25 @@
From 2253c0d31cf17debb97db418bec21ad59cd47c14 Mon Sep 17 00:00:00 2001
From: Arnaud Ferraris <arnaud.ferraris@collabora.com>
Date: Tue, 3 Nov 2020 17:04:35 +0100
Subject: [PATCH 178/183] sun8i-codec: fix headphone jack pin name
---
sound/soc/sunxi/sun8i-codec.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c
index 6128d861df90..ab751fd7c426 100644
--- a/sound/soc/sunxi/sun8i-codec.c
+++ b/sound/soc/sunxi/sun8i-codec.c
@@ -1272,7 +1272,7 @@ static const struct snd_soc_dapm_route sun8i_codec_legacy_routes[] = {
static struct snd_soc_jack_pin sun8i_codec_jack_pins[] = {
{
- .pin = "Headphone Jack",
+ .pin = "Headphone",
.mask = SND_JACK_HEADPHONE,
},
{
--
2.30.0

View File

@ -0,0 +1,130 @@
From 465a75a727ae5eb4c94859bfac4742cb14e38b3e Mon Sep 17 00:00:00 2001
From: Arnaud Ferraris <arnaud.ferraris@collabora.com>
Date: Fri, 3 Apr 2020 17:13:55 +0200
Subject: [PATCH 179/183] arm64: dts: allwinner: pinephone: improve device tree
On PinePhone, the headset mic bias resistor isn't populated on the
schematics (R811), therefore we need to enable the codec's internal
resistor. Additionnally, the jack detection IRQ's are inverted due to the
connector wiring, so the necessary property is added to the codec node
to made the driver aware of this fact.
We also stop LEDs during suspend to improve battery life, lower
cpu_alert* temperatures so the phone doesn't get too hot and improve the
backlight brightness values so we have a wider usable range.
Finally, the RGB LED max_brightness is set to 1 as it isn't using a PWM
output.
---
.../dts/allwinner/sun50i-a64-pinephone-1.1.dts | 10 +++++-----
.../dts/allwinner/sun50i-a64-pinephone-1.2.dts | 13 ++++++-------
.../dts/allwinner/sun50i-a64-pinephone.dtsi | 18 +++++++++++++-----
3 files changed, 24 insertions(+), 17 deletions(-)
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts
index f084c4f21f12..573f1929da4f 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts
@@ -29,11 +29,11 @@ &backlight {
* value here was chosen as a safe default.
*/
brightness-levels = <
- 774 793 814 842
- 882 935 1003 1088
- 1192 1316 1462 1633
- 1830 2054 2309 2596
- 2916 3271 3664 4096>;
+ 392 413 436 468
+ 512 571 647 742
+ 857 995 1159 1349
+ 1568 1819 2103 2423
+ 2779 3176 3614 4096>;
num-interpolated-steps = <50>;
default-brightness-level = <400>;
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts
index bbf64677c22b..6c3922543fec 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts
@@ -34,14 +34,13 @@ &backlight {
* chosen as a safe default.
*/
brightness-levels = <
- 5000 5248 5506 5858 6345
- 6987 7805 8823 10062 11543
- 13287 15317 17654 20319 23336
- 26724 30505 34702 39335 44427
- 50000
- >;
+ 392 413 436 468
+ 512 571 647 742
+ 857 995 1159 1349
+ 1568 1819 2103 2423
+ 2779 3176 3614 4096>;
num-interpolated-steps = <50>;
- default-brightness-level = <500>;
+ default-brightness-level = <400>;
};
&lis3mdl {
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
index c55709197804..441358592072 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
@@ -219,21 +219,21 @@
function = LED_FUNCTION_INDICATOR;
color = <LED_COLOR_ID_BLUE>;
gpios = <&pio 3 20 GPIO_ACTIVE_HIGH>; /* PD20 */
- retain-state-suspended;
+ max-brightness = <1>;
};
led-1 {
function = LED_FUNCTION_INDICATOR;
color = <LED_COLOR_ID_GREEN>;
gpios = <&pio 3 18 GPIO_ACTIVE_HIGH>; /* PD18 */
- retain-state-suspended;
+ max-brightness = <1>;
};
led-2 {
function = LED_FUNCTION_INDICATOR;
color = <LED_COLOR_ID_RED>;
gpios = <&pio 3 19 GPIO_ACTIVE_HIGH>; /* PD19 */
- retain-state-suspended;
+ max-brightness = <1>;
};
};
@@ -380,6 +380,14 @@
cpu-supply = <&reg_dcdc2>;
};
+&cpu_alert0 {
+ temperature = <60000>;
+};
+
+&cpu_alert1 {
+ temperature = <80000>;
+};
+
&csi {
pinctrl-0 = <&csi_pins>, <&csi_mclk_pin>;
status = "okay";
@@ -816,11 +824,11 @@
simple-audio-card,aux-devs = <&codec_analog>, <&speaker_amp>;
simple-audio-card,widgets = "Microphone", "Headset Microphone",
"Microphone", "Internal Microphone",
- "Headphone", "Headphone Jack",
+ "Headphone", "Headphone",
"Speaker", "Internal Earpiece",
"Speaker", "Internal Speaker";
simple-audio-card,routing =
- "Headphone Jack", "HP",
+ "Headphone", "HP",
"Internal Earpiece", "EARPIECE",
"Internal Speaker", "Speaker Amp OUTL",
"Internal Speaker", "Speaker Amp OUTR",
--
2.30.0

View File

@ -0,0 +1,116 @@
From 465a75a727ae5eb4c94859bfac4742cb14e38b3e Mon Sep 17 00:00:00 2001
From: Arnaud Ferraris <arnaud.ferraris@collabora.com>
Date: Fri, 3 Apr 2020 17:13:55 +0200
Subject: [PATCH 179/183] arm64: dts: allwinner: pinephone: improve device tree
On PinePhone, the headset mic bias resistor isn't populated on the
schematics (R811), therefore we need to enable the codec's internal
resistor. Additionnally, the jack detection IRQ's are inverted due to the
connector wiring, so the necessary property is added to the codec node
to made the driver aware of this fact.
We also stop LEDs during suspend to improve battery life, lower
cpu_alert* temperatures so the phone doesn't get too hot and improve the
backlight brightness values so we have a wider usable range.
Finally, the RGB LED max_brightness is set to 1 as it isn't using a PWM
output.
---
.../dts/allwinner/sun50i-a64-pinephone-1.1.dts | 10 +++++-----
.../dts/allwinner/sun50i-a64-pinephone-1.2.dts | 13 ++++++-------
.../dts/allwinner/sun50i-a64-pinephone.dtsi | 18 +++++++++++++-----
3 files changed, 24 insertions(+), 17 deletions(-)
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts
index f084c4f21f12..573f1929da4f 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.1.dts
@@ -29,11 +29,11 @@ &backlight {
* value here was chosen as a safe default.
*/
brightness-levels = <
- 774 793 814 842
- 882 935 1003 1088
- 1192 1316 1462 1633
- 1830 2054 2309 2596
- 2916 3271 3664 4096>;
+ 392 413 436 468
+ 512 571 647 742
+ 857 995 1159 1349
+ 1568 1819 2103 2423
+ 2779 3176 3614 4096>;
num-interpolated-steps = <50>;
default-brightness-level = <400>;
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts
index bbf64677c22b..6c3922543fec 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone-1.2.dts
@@ -34,14 +34,13 @@ &backlight {
* chosen as a safe default.
*/
brightness-levels = <
- 5000 5248 5506 5858 6345
- 6987 7805 8823 10062 11543
- 13287 15317 17654 20319 23336
- 26724 30505 34702 39335 44427
- 50000
- >;
+ 392 413 436 468
+ 512 571 647 742
+ 857 995 1159 1349
+ 1568 1819 2103 2423
+ 2779 3176 3614 4096>;
num-interpolated-steps = <50>;
- default-brightness-level = <500>;
+ default-brightness-level = <400>;
};
&lis3mdl {
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
index c55709197804..441358592072 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
@@ -219,21 +219,21 @@
function = LED_FUNCTION_INDICATOR;
color = <LED_COLOR_ID_BLUE>;
gpios = <&pio 3 20 GPIO_ACTIVE_HIGH>; /* PD20 */
- retain-state-suspended;
+ max-brightness = <1>;
};
led-1 {
function = LED_FUNCTION_INDICATOR;
color = <LED_COLOR_ID_GREEN>;
gpios = <&pio 3 18 GPIO_ACTIVE_HIGH>; /* PD18 */
- retain-state-suspended;
+ max-brightness = <1>;
};
led-2 {
function = LED_FUNCTION_INDICATOR;
color = <LED_COLOR_ID_RED>;
gpios = <&pio 3 19 GPIO_ACTIVE_HIGH>; /* PD19 */
- retain-state-suspended;
+ max-brightness = <1>;
};
};
@@ -380,6 +380,14 @@
cpu-supply = <&reg_dcdc2>;
};
+&cpu_alert0 {
+ temperature = <60000>;
+};
+
+&cpu_alert1 {
+ temperature = <80000>;
+};
+
&csi {
pinctrl-0 = <&csi_pins>, <&csi_mclk_pin>;
status = "okay";
--
2.30.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,512 @@
diff --git a/Makefile b/Makefile
index ed6e7ec60eff6..ffcc7eadc44b8 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
PATCHLEVEL = 15
-SUBLEVEL = 0
+SUBLEVEL = 1
EXTRAVERSION =
NAME = Trick or Treat
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 962041148482c..6c0f7f4f7d1de 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -377,9 +377,6 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
void __iomem *tmp;
int i, ret;
- WARN_ON(dev->irq[0] == (unsigned int)-1);
- WARN_ON(dev->irq[1] == (unsigned int)-1);
-
ret = request_resource(parent, &dev->res);
if (ret)
goto err_out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 269437b013280..289c7dc053634 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1078,8 +1078,6 @@ struct amdgpu_device {
char product_name[32];
char serial[20];
- struct amdgpu_autodump autodump;
-
atomic_t throttling_logging_enabled;
struct ratelimit_state throttling_logging_rs;
uint32_t ras_hw_enabled;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 463b9c0283f7e..ec30d81586a79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -27,7 +27,6 @@
#include <linux/pci.h>
#include <linux/uaccess.h>
#include <linux/pm_runtime.h>
-#include <linux/poll.h>
#include "amdgpu.h"
#include "amdgpu_pm.h"
@@ -37,85 +36,7 @@
#include "amdgpu_securedisplay.h"
#include "amdgpu_fw_attestation.h"
-int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev)
-{
#if defined(CONFIG_DEBUG_FS)
- unsigned long timeout = 600 * HZ;
- int ret;
-
- wake_up_interruptible(&adev->autodump.gpu_hang);
-
- ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout);
- if (ret == 0) {
- pr_err("autodump: timeout, move on to gpu recovery\n");
- return -ETIMEDOUT;
- }
-#endif
- return 0;
-}
-
-#if defined(CONFIG_DEBUG_FS)
-
-static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)
-{
- struct amdgpu_device *adev = inode->i_private;
- int ret;
-
- file->private_data = adev;
-
- ret = down_read_killable(&adev->reset_sem);
- if (ret)
- return ret;
-
- if (adev->autodump.dumping.done) {
- reinit_completion(&adev->autodump.dumping);
- ret = 0;
- } else {
- ret = -EBUSY;
- }
-
- up_read(&adev->reset_sem);
-
- return ret;
-}
-
-static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file)
-{
- struct amdgpu_device *adev = file->private_data;
-
- complete_all(&adev->autodump.dumping);
- return 0;
-}
-
-static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table)
-{
- struct amdgpu_device *adev = file->private_data;
-
- poll_wait(file, &adev->autodump.gpu_hang, poll_table);
-
- if (amdgpu_in_reset(adev))
- return POLLIN | POLLRDNORM | POLLWRNORM;
-
- return 0;
-}
-
-static const struct file_operations autodump_debug_fops = {
- .owner = THIS_MODULE,
- .open = amdgpu_debugfs_autodump_open,
- .poll = amdgpu_debugfs_autodump_poll,
- .release = amdgpu_debugfs_autodump_release,
-};
-
-static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev)
-{
- init_completion(&adev->autodump.dumping);
- complete_all(&adev->autodump.dumping);
- init_waitqueue_head(&adev->autodump.gpu_hang);
-
- debugfs_create_file("amdgpu_autodump", 0600,
- adev_to_drm(adev)->primary->debugfs_root,
- adev, &autodump_debug_fops);
-}
/**
* amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
@@ -1588,7 +1509,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
}
amdgpu_ras_debugfs_create_all(adev);
- amdgpu_debugfs_autodump_init(adev);
amdgpu_rap_debugfs_init(adev);
amdgpu_securedisplay_debugfs_init(adev);
amdgpu_fw_attestation_debugfs_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
index 141a8474e24f2..8b641f40fdf66 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@@ -26,10 +26,6 @@
/*
* Debugfs
*/
-struct amdgpu_autodump {
- struct completion dumping;
- struct wait_queue_head gpu_hang;
-};
int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
int amdgpu_debugfs_init(struct amdgpu_device *adev);
@@ -37,4 +33,3 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev);
void amdgpu_debugfs_fence_init(struct amdgpu_device *adev);
void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
void amdgpu_debugfs_gem_init(struct amdgpu_device *adev);
-int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index af9bdf16eefd4..b8d9004fb1635 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2432,10 +2432,6 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
if (!adev->gmc.xgmi.pending_reset)
amdgpu_amdkfd_device_init(adev);
- r = amdgpu_amdkfd_resume_iommu(adev);
- if (r)
- goto init_failed;
-
amdgpu_fru_get_product_info(adev);
init_failed:
@@ -4466,10 +4462,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
if (reset_context->reset_req_dev == adev)
job = reset_context->job;
- /* no need to dump if device is not in good state during probe period */
- if (!adev->gmc.xgmi.pending_reset)
- amdgpu_debugfs_wait_dump(adev);
-
if (amdgpu_sriov_vf(adev)) {
/* stop the data exchange thread */
amdgpu_virt_fini_data_exchange(adev);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 4a416231b24c8..a6afacc3b10cd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -924,6 +924,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
svm_migrate_init((struct amdgpu_device *)kfd->kgd);
+ if(kgd2kfd_resume_iommu(kfd))
+ goto device_iommu_error;
+
if (kfd_resume(kfd))
goto kfd_resume_error;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index 8080bba5b7a76..de9ec5ddb6c72 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -247,6 +247,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
{
struct amdgpu_dm_connector *connector = file_inode(f)->i_private;
struct dc_link *link = connector->dc_link;
+ struct dc *dc = (struct dc *)link->dc;
struct dc_link_settings prefer_link_settings;
char *wr_buf = NULL;
const uint32_t wr_buf_size = 40;
@@ -313,7 +314,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
prefer_link_settings.lane_count = param[0];
prefer_link_settings.link_rate = param[1];
- dp_retrain_link_dp_test(link, &prefer_link_settings, false);
+ dc_link_set_preferred_training_settings(dc, &prefer_link_settings, NULL, link, true);
kfree(wr_buf);
return size;
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index de5f9c86b9a44..cafb0608ffb46 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2140,10 +2140,6 @@ static void __execlists_unhold(struct i915_request *rq)
if (p->flags & I915_DEPENDENCY_WEAK)
continue;
- /* Propagate any change in error status */
- if (rq->fence.error)
- i915_request_set_error_once(w, rq->fence.error);
-
if (w->engine != rq->engine)
continue;
diff --git a/drivers/media/firewire/firedtv-avc.c b/drivers/media/firewire/firedtv-avc.c
index 2bf9467b917d1..71991f8638e6b 100644
--- a/drivers/media/firewire/firedtv-avc.c
+++ b/drivers/media/firewire/firedtv-avc.c
@@ -1165,7 +1165,11 @@ int avc_ca_pmt(struct firedtv *fdtv, char *msg, int length)
read_pos += program_info_length;
write_pos += program_info_length;
}
- while (read_pos < length) {
+ while (read_pos + 4 < length) {
+ if (write_pos + 4 >= sizeof(c->operand) - 4) {
+ ret = -EINVAL;
+ goto out;
+ }
c->operand[write_pos++] = msg[read_pos++];
c->operand[write_pos++] = msg[read_pos++];
c->operand[write_pos++] = msg[read_pos++];
@@ -1177,13 +1181,17 @@ int avc_ca_pmt(struct firedtv *fdtv, char *msg, int length)
c->operand[write_pos++] = es_info_length >> 8;
c->operand[write_pos++] = es_info_length & 0xff;
if (es_info_length > 0) {
+ if (read_pos >= length) {
+ ret = -EINVAL;
+ goto out;
+ }
pmt_cmd_id = msg[read_pos++];
if (pmt_cmd_id != 1 && pmt_cmd_id != 4)
dev_err(fdtv->device, "invalid pmt_cmd_id %d at stream level\n",
pmt_cmd_id);
- if (es_info_length > sizeof(c->operand) - 4 -
- write_pos) {
+ if (es_info_length > sizeof(c->operand) - 4 - write_pos ||
+ es_info_length > length - read_pos) {
ret = -EINVAL;
goto out;
}
diff --git a/drivers/media/firewire/firedtv-ci.c b/drivers/media/firewire/firedtv-ci.c
index 9363d005e2b61..e0d57e09dab0c 100644
--- a/drivers/media/firewire/firedtv-ci.c
+++ b/drivers/media/firewire/firedtv-ci.c
@@ -134,6 +134,8 @@ static int fdtv_ca_pmt(struct firedtv *fdtv, void *arg)
} else {
data_length = msg->msg[3];
}
+ if (data_length > sizeof(msg->msg) - data_pos)
+ return -EINVAL;
return avc_ca_pmt(fdtv, &msg->msg[data_pos], data_length);
}
diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c
index bf1443539a1a4..bd552c7dffcb1 100644
--- a/drivers/net/ethernet/sfc/ethtool_common.c
+++ b/drivers/net/ethernet/sfc/ethtool_common.c
@@ -563,20 +563,14 @@ int efx_ethtool_get_link_ksettings(struct net_device *net_dev,
{
struct efx_nic *efx = netdev_priv(net_dev);
struct efx_link_state *link_state = &efx->link_state;
- u32 supported;
mutex_lock(&efx->mac_lock);
efx_mcdi_phy_get_link_ksettings(efx, cmd);
mutex_unlock(&efx->mac_lock);
/* Both MACs support pause frames (bidirectional and respond-only) */
- ethtool_convert_link_mode_to_legacy_u32(&supported,
- cmd->link_modes.supported);
-
- supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-
- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
- supported);
+ ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+ ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause);
if (LOOPBACK_INTERNAL(efx)) {
cmd->base.speed = link_state->speed;
diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index ec913ec991f3f..6e91bdb2e08d4 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -604,15 +604,6 @@ static int wcn36xx_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
}
}
}
- /* FIXME: Only enable bmps support when encryption is enabled.
- * For any reasons, when connected to open/no-security BSS,
- * the wcn36xx controller in bmps mode does not forward
- * 'wake-up' beacons despite AP sends DTIM with station AID.
- * It could be due to a firmware issue or to the way driver
- * configure the station.
- */
- if (vif->type == NL80211_IFTYPE_STATION)
- vif_priv->allow_bmps = true;
break;
case DISABLE_KEY:
if (!(IEEE80211_KEY_FLAG_PAIRWISE & key_conf->flags)) {
@@ -913,7 +904,6 @@ static void wcn36xx_bss_info_changed(struct ieee80211_hw *hw,
vif->addr,
bss_conf->aid);
vif_priv->sta_assoc = false;
- vif_priv->allow_bmps = false;
wcn36xx_smd_set_link_st(wcn,
bss_conf->bssid,
vif->addr,
diff --git a/drivers/net/wireless/ath/wcn36xx/pmc.c b/drivers/net/wireless/ath/wcn36xx/pmc.c
index 2d0780fefd477..2936aaf532738 100644
--- a/drivers/net/wireless/ath/wcn36xx/pmc.c
+++ b/drivers/net/wireless/ath/wcn36xx/pmc.c
@@ -23,10 +23,7 @@ int wcn36xx_pmc_enter_bmps_state(struct wcn36xx *wcn,
{
int ret = 0;
struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif);
-
- if (!vif_priv->allow_bmps)
- return -ENOTSUPP;
-
+ /* TODO: Make sure the TX chain clean */
ret = wcn36xx_smd_enter_bmps(wcn, vif);
if (!ret) {
wcn36xx_dbg(WCN36XX_DBG_PMC, "Entered BMPS\n");
diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
index add6e527e8330..e9560f35e9bcf 100644
--- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
+++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h
@@ -128,7 +128,6 @@ struct wcn36xx_vif {
enum wcn36xx_hal_bss_type bss_type;
/* Power management */
- bool allow_bmps;
enum wcn36xx_power_state pw_state;
u8 bss_index;
diff --git a/drivers/soc/imx/gpcv2.c b/drivers/soc/imx/gpcv2.c
index 34a9ac1f2b9b1..8b7a01773aec2 100644
--- a/drivers/soc/imx/gpcv2.c
+++ b/drivers/soc/imx/gpcv2.c
@@ -244,6 +244,8 @@ static int imx_pgc_power_up(struct generic_pm_domain *genpd)
goto out_regulator_disable;
}
+ reset_control_assert(domain->reset);
+
if (domain->bits.pxx) {
/* request the domain to power up */
regmap_update_bits(domain->regmap, GPC_PU_PGC_SW_PUP_REQ,
@@ -266,8 +268,6 @@ static int imx_pgc_power_up(struct generic_pm_domain *genpd)
GPC_PGC_CTRL_PCR);
}
- reset_control_assert(domain->reset);
-
/* delay for reset to propagate */
udelay(5);
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 7ee6e4cc0d89e..00d35fe1fef0b 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2795,7 +2795,6 @@ int usb_add_hcd(struct usb_hcd *hcd,
{
int retval;
struct usb_device *rhdev;
- struct usb_hcd *shared_hcd;
if (!hcd->skip_phy_initialization && usb_hcd_is_primary_hcd(hcd)) {
hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev);
@@ -2956,26 +2955,13 @@ int usb_add_hcd(struct usb_hcd *hcd,
goto err_hcd_driver_start;
}
- /* starting here, usbcore will pay attention to the shared HCD roothub */
- shared_hcd = hcd->shared_hcd;
- if (!usb_hcd_is_primary_hcd(hcd) && shared_hcd && HCD_DEFER_RH_REGISTER(shared_hcd)) {
- retval = register_root_hub(shared_hcd);
- if (retval != 0)
- goto err_register_root_hub;
-
- if (shared_hcd->uses_new_polling && HCD_POLL_RH(shared_hcd))
- usb_hcd_poll_rh_status(shared_hcd);
- }
-
/* starting here, usbcore will pay attention to this root hub */
- if (!HCD_DEFER_RH_REGISTER(hcd)) {
- retval = register_root_hub(hcd);
- if (retval != 0)
- goto err_register_root_hub;
+ retval = register_root_hub(hcd);
+ if (retval != 0)
+ goto err_register_root_hub;
- if (hcd->uses_new_polling && HCD_POLL_RH(hcd))
- usb_hcd_poll_rh_status(hcd);
- }
+ if (hcd->uses_new_polling && HCD_POLL_RH(hcd))
+ usb_hcd_poll_rh_status(hcd);
return retval;
@@ -3013,7 +2999,6 @@ EXPORT_SYMBOL_GPL(usb_add_hcd);
void usb_remove_hcd(struct usb_hcd *hcd)
{
struct usb_device *rhdev = hcd->self.root_hub;
- bool rh_registered;
dev_info(hcd->self.controller, "remove, state %x\n", hcd->state);
@@ -3024,7 +3009,6 @@ void usb_remove_hcd(struct usb_hcd *hcd)
dev_dbg(hcd->self.controller, "roothub graceful disconnect\n");
spin_lock_irq (&hcd_root_hub_lock);
- rh_registered = hcd->rh_registered;
hcd->rh_registered = 0;
spin_unlock_irq (&hcd_root_hub_lock);
@@ -3034,8 +3018,7 @@ void usb_remove_hcd(struct usb_hcd *hcd)
cancel_work_sync(&hcd->died_work);
mutex_lock(&usb_bus_idr_lock);
- if (rh_registered)
- usb_disconnect(&rhdev); /* Sets rhdev to NULL */
+ usb_disconnect(&rhdev); /* Sets rhdev to NULL */
mutex_unlock(&usb_bus_idr_lock);
/*
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 541fe4dcc43a2..902f410874e8e 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -692,7 +692,6 @@ int xhci_run(struct usb_hcd *hcd)
if (ret)
xhci_free_command(xhci, command);
}
- set_bit(HCD_FLAG_DEFER_RH_REGISTER, &hcd->flags);
xhci_dbg_trace(xhci, trace_xhci_dbg_init,
"Finished xhci_run for USB2 roothub");
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 2c1fc9212cf28..548a028f2dabb 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -124,7 +124,6 @@ struct usb_hcd {
#define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */
#define HCD_FLAG_DEAD 6 /* controller has died? */
#define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */
-#define HCD_FLAG_DEFER_RH_REGISTER 8 /* Defer roothub registration */
/* The flags can be tested using these macros; they are likely to
* be slightly faster than test_bit().
@@ -135,7 +134,6 @@ struct usb_hcd {
#define HCD_WAKEUP_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING))
#define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING))
#define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD))
-#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER))
/*
* Specifies if interfaces are authorized by default
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 8929d9abe8aa8..74e5bd2cc9329 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1887,6 +1887,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
DEVICE_FLG(0x21b4, 0x0081, /* AudioQuest DragonFly */
QUIRK_FLAG_GET_SAMPLE_RATE),
+ DEVICE_FLG(0x2708, 0x0002, /* Audient iD14 */
+ QUIRK_FLAG_IGNORE_CTL_ERROR),
DEVICE_FLG(0x2912, 0x30c8, /* Audioengine D1 */
QUIRK_FLAG_GET_SAMPLE_RATE),
DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */

View File

@ -0,0 +1,12 @@
[Trigger]
Type = File
Operation = Install
Operation = Upgrade
Operation = Remove
Target = usr/lib/modules/%KERNVER%/*
Target = usr/lib/modules/%EXTRAMODULES%/*
[Action]
Description = Updating %PKGBASE% module dependencies...
When = PostTransaction
Exec = /usr/bin/depmod %KERNVER%

View File

@ -0,0 +1,11 @@
[Trigger]
Type = File
Operation = Install
Operation = Upgrade
Target = boot/Image
Target = usr/lib/initcpio/*
[Action]
Description = Updating %PKGBASE% initcpios...
When = PostTransaction
Exec = /usr/bin/mkinitcpio -p %PKGBASE%

View File

@ -0,0 +1,118 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-15.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
DKIM_VALID,DKIM_VALID_AU,FREEMAIL_FORGED_FROMDOMAIN,FREEMAIL_FROM,
HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_PATCH,MAILING_LIST_MULTI,
MENTIONS_GIT_HOSTING,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham
autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 30CB3C433DB
for <linux-kernel@archiver.kernel.org>; Sat, 27 Mar 2021 13:07:07 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id F053961971
for <linux-kernel@archiver.kernel.org>; Sat, 27 Mar 2021 13:07:06 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S230259AbhC0NGA (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 27 Mar 2021 09:06:00 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59650 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S229582AbhC0NFz (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 27 Mar 2021 09:05:55 -0400
Received: from mail-pf1-x435.google.com (mail-pf1-x435.google.com [IPv6:2607:f8b0:4864:20::435])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7DAA2C0613B1
for <linux-kernel@vger.kernel.org>; Sat, 27 Mar 2021 06:05:55 -0700 (PDT)
Received: by mail-pf1-x435.google.com with SMTP id j25so6773824pfe.2
for <linux-kernel@vger.kernel.org>; Sat, 27 Mar 2021 06:05:55 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=gmail.com; s=20161025;
h=from:to:cc:subject:date:message-id:mime-version
:content-transfer-encoding;
bh=j1vEagJw59z7NUkgTmJto/iZqe4yWAoKFE4cIPL6KSA=;
b=W++BsAIwl+3ucDDq6J9gfd2LQIPwdj2biEakUpnxpy/YsK+3iCoF33X3sftQWOxfP7
IPnjeO9Rde2kHHrwJMlMu2vkeccABUo1mLD6uTVGrEfxwUsN9YJWo9jUzaSY3cyvbGI2
ehpcqqnKgU3Fn5s/X8mIRCamioqbkc4WM23kxCZTaHn+XhPN+SWL6FLKRXDgGS2ivbn1
OER7kHG7/pVx6TS91PpzwDFvN3Rnt6x9672uUa2Evpoapzy9P2POwsDV4zxWpGjlEc44
x99r44072+JXwATmacsPj8yOAksyAgxYYzYtUqJdpS51qpCmDWMBwE12Glc6HXWJKDrZ
V2jw==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:from:to:cc:subject:date:message-id:mime-version
:content-transfer-encoding;
bh=j1vEagJw59z7NUkgTmJto/iZqe4yWAoKFE4cIPL6KSA=;
b=kBUfAu8SPEqgcOJWgnRXoXkAISBVy5hrC8mdP43B3bJdOtUy3ckGem52OQlxU4JiZ9
fRLzgQhtoaEPJwBegty5xhiBi7a9Z8jtP5y9BGPLtHnPY0vj7Q5WoLeJW8VQSdnTfIea
LWI0QFW1FPejnWt1OkpKrg23YuIwnAy4JeS4ppYqwxoJLaA2CK49uIgHZvkO/KUWTZ1u
H4FTpR1COt4JDajdNEO9PXbgAIn7Zhc84m4BrHeFhPPMguGgiORNl8CiVYnEidYMOYB4
xKZysxTMak3CltRje+eBSpYQa9T1mcAv1kMsc7OY2YJYnK7TadU5FcwBnEGYEGtI6pK9
kbyg==
X-Gm-Message-State: AOAM531ZZkAzAjwe4lLioYqlb3vcaxOF8kBIHnAElR+v7R84d5htjzCI
w2CjFLBp7ZYpGnOW93HhwfMMXbvyGk6rb63y
X-Google-Smtp-Source: ABdhPJwtfu5Tfa4UYXfW8whmzPPLMc8XbemaxfI6PdBR3nuuwqL4rugT9FibIaSdi/j6K0J4cIJCqg==
X-Received: by 2002:a63:e906:: with SMTP id i6mr16394983pgh.132.1616850352891;
Sat, 27 Mar 2021 06:05:52 -0700 (PDT)
Received: from johnchen902-arch-ryzen.. (2001-b011-3815-3a1f-9afa-9bff-fe6e-3ce2.dynamic-ip6.hinet.net. [2001:b011:3815:3a1f:9afa:9bff:fe6e:3ce2])
by smtp.gmail.com with ESMTPSA id ot17sm6413787pjb.50.2021.03.27.06.05.51
(version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
Sat, 27 Mar 2021 06:05:52 -0700 (PDT)
From: John Chen <johnchen902@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: Rohit Pidaparthi <rohitpid@gmail.com>,
RicardoEPRodrigues <ricardo.e.p.rodrigues@gmail.com>,
Jiri Kosina <jikos@kernel.org>,
Benjamin Tissoires <benjamin.tissoires@redhat.com>,
John Chen <johnchen902@gmail.com>
Subject: [PATCH 0/4] HID: add Apple Magic Mouse 2 support
Date: Sat, 27 Mar 2021 21:05:04 +0800
Message-Id: <20210327130508.24849-1-johnchen902@gmail.com>
X-Mailer: git-send-email 2.31.0
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210327130508.24849-1-johnchen902@gmail.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
The HID descriptor of Magic Mouse 2 contains BTN_LEFT, BTN_RIGHT, REL_X,
REL_Y, whether it's charging, whether it's fully charged, and battery
capacity.
$ xxd -p report_descriptor
05010902a101851205091901290215002501950275018102950175068103
05010901a1001601f826ff073601fb46ff046513550d0930093175109502
8106750895028101c00602ff09558555150026ff0075089540b1a2c00600
ff0914a10185900584750195031500250109610585094409468102950581
0175089501150026ff0009658102c000
As hidinput can handle the BTNs and RELs, the Magic Mouse 2 already
functions as a basic mouse. Nevertheless, It should be reasonable to
extend hid-magicmouse to support Magic Mouse 2 as well. Furthermore,
hidinput is patched to handle the battery capacity.
This work is based on Recardo's, which is in turned based on Rohitpid's.
Their GitHub repositories are linked below:
https://github.com/RicardoEPRodrigues/magicmouse-hid
https://github.com/rohitpid/Linux-Magic-Trackpad-2-Driver
John Chen (4):
HID: magicmouse: add Apple Magic Mouse 2 support
HID: magicmouse: fix 3 button emulation of Mouse 2
HID: magicmouse: fix reconnection of Magic Mouse 2
HID: input: map battery capacity (00850065)
drivers/hid/hid-debug.c | 1 +
drivers/hid/hid-ids.h | 1 +
drivers/hid/hid-input.c | 11 +++
drivers/hid/hid-magicmouse.c | 156 ++++++++++++++++++++++++++++-------
include/linux/hid.h | 3 +
5 files changed, 140 insertions(+), 32 deletions(-)
--
2.31.0

View File

@ -0,0 +1,247 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-15.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
DKIM_VALID,DKIM_VALID_AU,FREEMAIL_FORGED_FROMDOMAIN,FREEMAIL_FROM,
HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH,
MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham
autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 4888EC433C1
for <linux-kernel@archiver.kernel.org>; Sat, 27 Mar 2021 13:07:07 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 0E6E861981
for <linux-kernel@archiver.kernel.org>; Sat, 27 Mar 2021 13:07:07 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S230295AbhC0NGh (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 27 Mar 2021 09:06:37 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59740 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S229582AbhC0NGT (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 27 Mar 2021 09:06:19 -0400
Received: from mail-pg1-x529.google.com (mail-pg1-x529.google.com [IPv6:2607:f8b0:4864:20::529])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 82262C0613B1
for <linux-kernel@vger.kernel.org>; Sat, 27 Mar 2021 06:06:19 -0700 (PDT)
Received: by mail-pg1-x529.google.com with SMTP id v10so6405578pgs.12
for <linux-kernel@vger.kernel.org>; Sat, 27 Mar 2021 06:06:19 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=gmail.com; s=20161025;
h=from:to:cc:subject:date:message-id:in-reply-to:references
:mime-version:content-transfer-encoding;
bh=/43es5lmfTvSMg9V9lh/7OQVghMj1iNxFqwqD88gyCk=;
b=JA8+yZao+x/DmyoiRUpwr0wP9XgaNgDVez40dXm+yEd6Wlgs1dQvO3DkU8n7trJWcL
TCj7NqBp0z4pf3pSHrTxX7rWZX4yRyZJAXo7fqTPqfN2R0PkRIp5gnvcDv+7/BRM4nqx
3pI6ubgKZ+rxYph8XNAuO94/oOjxgItIhOqYGbLPHwa2eoI60mUbrF/ukBsw8OwQ+Vli
0siGyaoTCPP/h+9uuHJqQJ1yw6CCkCAxMwZXD79abtLytL6WkhuvoFJ6exRYGHawcHMs
bel32ifzIlv+7ULbcTI2uVNhxvdrD51tRSNrAZ77n+Tk8RivXMeSqSzPVngWZCs0uk6s
JryA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to
:references:mime-version:content-transfer-encoding;
bh=/43es5lmfTvSMg9V9lh/7OQVghMj1iNxFqwqD88gyCk=;
b=fAhjI90TZfQpcQBqM4rN69d8uN92OH3j+lhm/dYYlmqdchK6ZZsPD3wt6VW8/ObU+0
BpTic3inOmn0aVasSmAkbNxaVAUJ339klb/WnO9RfaemBLXDCBMgGjVr+ofhpIbfKxiZ
0aBswW4Dc2uY39zmxm7wtJ2sRHHwj/Ltdt7B+NYes7Kzohvfg98YLvm8I5mloimR02U9
HRlPKK2YbMcZ5i2Y8Q3faX8356caUUU7l91utK4EXdrVFCbNftXBEmRej6gXSZudCBga
7w6Rgymaox0hfMZzYLWtJJp2fo3BcKA4+TD6bJ1yrxIdPmK59QMGoyMUIKqTIZIjN2c/
gvpg==
X-Gm-Message-State: AOAM531lA6V8bOmQPsuLmZx3iv59gcixbI4HEH5eqWzOJ/N3DRaX/hb9
NavPhvckezEkR22O7uWWvZAUxOplQlRwSsX5
X-Google-Smtp-Source: ABdhPJyaSIYZWu4pp8j7TnxkxYd0BP77HzgDaIZFIDeoL910Tkv+L4VuoQLEw0GNu+5Zxi80enV/YQ==
X-Received: by 2002:a65:498b:: with SMTP id r11mr16491362pgs.364.1616850378733;
Sat, 27 Mar 2021 06:06:18 -0700 (PDT)
Received: from johnchen902-arch-ryzen.. (2001-b011-3815-3a1f-9afa-9bff-fe6e-3ce2.dynamic-ip6.hinet.net. [2001:b011:3815:3a1f:9afa:9bff:fe6e:3ce2])
by smtp.gmail.com with ESMTPSA id ot17sm6413787pjb.50.2021.03.27.06.06.17
(version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
Sat, 27 Mar 2021 06:06:18 -0700 (PDT)
From: John Chen <johnchen902@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: Rohit Pidaparthi <rohitpid@gmail.com>,
RicardoEPRodrigues <ricardo.e.p.rodrigues@gmail.com>,
Jiri Kosina <jikos@kernel.org>,
Benjamin Tissoires <benjamin.tissoires@redhat.com>,
John Chen <johnchen902@gmail.com>
Subject: [PATCH 1/4] HID: magicmouse: add Apple Magic Mouse 2 support
Date: Sat, 27 Mar 2021 21:05:05 +0800
Message-Id: <20210327130508.24849-2-johnchen902@gmail.com>
X-Mailer: git-send-email 2.31.0
In-Reply-To: <20210327130508.24849-1-johnchen902@gmail.com>
References: <20210327130508.24849-1-johnchen902@gmail.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210327130508.24849-2-johnchen902@gmail.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Bluetooth device
Vendor 004c (Apple)
Device 0269 (Magic Mouse 2)
Add support for Apple Magic Mouse 2, putting the device in multi-touch
mode.
Co-authored-by: Rohit Pidaparthi <rohitpid@gmail.com>
Co-authored-by: RicardoEPRodrigues <ricardo.e.p.rodrigues@gmail.com>
Signed-off-by: John Chen <johnchen902@gmail.com>
---
drivers/hid/hid-ids.h | 1 +
drivers/hid/hid-magicmouse.c | 53 ++++++++++++++++++++++++++++++++----
2 files changed, 49 insertions(+), 5 deletions(-)
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index e42aaae3138f..fa0edf03570a 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -93,6 +93,7 @@
#define BT_VENDOR_ID_APPLE 0x004c
#define USB_DEVICE_ID_APPLE_MIGHTYMOUSE 0x0304
#define USB_DEVICE_ID_APPLE_MAGICMOUSE 0x030d
+#define USB_DEVICE_ID_APPLE_MAGICMOUSE2 0x0269
#define USB_DEVICE_ID_APPLE_MAGICTRACKPAD 0x030e
#define USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 0x0265
#define USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI 0x020e
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index abd86903875f..7aad6ca56780 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -54,6 +54,7 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state fie
#define TRACKPAD2_USB_REPORT_ID 0x02
#define TRACKPAD2_BT_REPORT_ID 0x31
#define MOUSE_REPORT_ID 0x29
+#define MOUSE2_REPORT_ID 0x12
#define DOUBLE_REPORT_ID 0xf7
/* These definitions are not precise, but they're close enough. (Bits
* 0x03 seem to indicate the aspect ratio of the touch, bits 0x70 seem
@@ -195,7 +196,8 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda
int id, x, y, size, orientation, touch_major, touch_minor, state, down;
int pressure = 0;
- if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
+ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE ||
+ input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
id = (tdata[6] << 2 | tdata[5] >> 6) & 0xf;
x = (tdata[1] << 28 | tdata[0] << 20) >> 20;
y = -((tdata[2] << 24 | tdata[1] << 16) >> 20);
@@ -296,7 +298,8 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda
input_report_abs(input, ABS_MT_PRESSURE, pressure);
if (report_undeciphered) {
- if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE)
+ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE ||
+ input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2)
input_event(input, EV_MSC, MSC_RAW, tdata[7]);
else if (input->id.product !=
USB_DEVICE_ID_APPLE_MAGICTRACKPAD2)
@@ -380,6 +383,34 @@ static int magicmouse_raw_event(struct hid_device *hdev,
* ts = data[3] >> 6 | data[4] << 2 | data[5] << 10;
*/
break;
+ case MOUSE2_REPORT_ID:
+ /* Size is either 8 or (14 + 8 * N) */
+ if (size != 8 && (size < 14 || (size - 14) % 8 != 0))
+ return 0;
+ npoints = (size - 14) / 8;
+ if (npoints > 15) {
+ hid_warn(hdev, "invalid size value (%d) for MOUSE2_REPORT_ID\n",
+ size);
+ return 0;
+ }
+ msc->ntouches = 0;
+ for (ii = 0; ii < npoints; ii++)
+ magicmouse_emit_touch(msc, ii, data + ii * 8 + 14);
+
+ /* When emulating three-button mode, it is important
+ * to have the current touch information before
+ * generating a click event.
+ */
+ x = (int)((data[3] << 24) | (data[2] << 16)) >> 16;
+ y = (int)((data[5] << 24) | (data[4] << 16)) >> 16;
+ clicks = data[1];
+
+ /* The following bits provide a device specific timestamp. They
+ * are unused here.
+ *
+ * ts = data[11] >> 6 | data[12] << 2 | data[13] << 10;
+ */
+ break;
case DOUBLE_REPORT_ID:
/* Sometimes the trackpad sends two touch reports in one
* packet.
@@ -392,7 +423,8 @@ static int magicmouse_raw_event(struct hid_device *hdev,
return 0;
}
- if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
+ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE ||
+ input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
magicmouse_emit_buttons(msc, clicks & 3);
input_report_rel(input, REL_X, x);
input_report_rel(input, REL_Y, y);
@@ -415,7 +447,8 @@ static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hd
__set_bit(EV_KEY, input->evbit);
- if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
+ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE ||
+ input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
__set_bit(BTN_LEFT, input->keybit);
__set_bit(BTN_RIGHT, input->keybit);
if (emulate_3button)
@@ -480,7 +513,8 @@ static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hd
* the origin at the same position, and just uses the additive
* inverse of the reported Y.
*/
- if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) {
+ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE ||
+ input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
input_set_abs_params(input, ABS_MT_ORIENTATION, -31, 32, 1, 0);
input_set_abs_params(input, ABS_MT_POSITION_X,
MOUSE_MIN_X, MOUSE_MAX_X, 4, 0);
@@ -586,6 +620,7 @@ static int magicmouse_probe(struct hid_device *hdev,
{
const u8 *feature;
const u8 feature_mt[] = { 0xD7, 0x01 };
+ const u8 feature_mt_mouse2[] = { 0xF1, 0x02, 0x01 };
const u8 feature_mt_trackpad2_usb[] = { 0x02, 0x01 };
const u8 feature_mt_trackpad2_bt[] = { 0xF1, 0x02, 0x01 };
u8 *buf;
@@ -631,6 +666,9 @@ static int magicmouse_probe(struct hid_device *hdev,
if (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE)
report = hid_register_report(hdev, HID_INPUT_REPORT,
MOUSE_REPORT_ID, 0);
+ else if (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2)
+ report = hid_register_report(hdev, HID_INPUT_REPORT,
+ MOUSE2_REPORT_ID, 0);
else if (id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) {
if (id->vendor == BT_VENDOR_ID_APPLE)
report = hid_register_report(hdev, HID_INPUT_REPORT,
@@ -660,6 +698,9 @@ static int magicmouse_probe(struct hid_device *hdev,
feature_size = sizeof(feature_mt_trackpad2_usb);
feature = feature_mt_trackpad2_usb;
}
+ } else if (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
+ feature_size = sizeof(feature_mt_mouse2);
+ feature = feature_mt_mouse2;
} else {
feature_size = sizeof(feature_mt);
feature = feature_mt;
@@ -696,6 +737,8 @@ static int magicmouse_probe(struct hid_device *hdev,
static const struct hid_device_id magic_mice[] = {
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
USB_DEVICE_ID_APPLE_MAGICMOUSE), .driver_data = 0 },
+ { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE,
+ USB_DEVICE_ID_APPLE_MAGICMOUSE2), .driver_data = 0 },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
USB_DEVICE_ID_APPLE_MAGICTRACKPAD), .driver_data = 0 },
{ HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE,
--
2.31.0

View File

@ -0,0 +1,134 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-15.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
DKIM_VALID,DKIM_VALID_AU,FREEMAIL_FORGED_FROMDOMAIN,FREEMAIL_FROM,
HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH,
MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham
autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 06C18C433E1
for <linux-kernel@archiver.kernel.org>; Sat, 27 Mar 2021 13:07:08 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id D1CE16193D
for <linux-kernel@archiver.kernel.org>; Sat, 27 Mar 2021 13:07:07 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S230328AbhC0NGi (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 27 Mar 2021 09:06:38 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59770 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S230266AbhC0NG1 (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 27 Mar 2021 09:06:27 -0400
Received: from mail-pl1-x634.google.com (mail-pl1-x634.google.com [IPv6:2607:f8b0:4864:20::634])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5086BC0613B1
for <linux-kernel@vger.kernel.org>; Sat, 27 Mar 2021 06:06:27 -0700 (PDT)
Received: by mail-pl1-x634.google.com with SMTP id h8so2235029plt.7
for <linux-kernel@vger.kernel.org>; Sat, 27 Mar 2021 06:06:27 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=gmail.com; s=20161025;
h=from:to:cc:subject:date:message-id:in-reply-to:references
:mime-version:content-transfer-encoding;
bh=NeWUvZBV3NAy1b0eckELIbBZ7sti/n1sLYnD4r2cjaU=;
b=V7uM0AaI1Vy/mmqpuTVu5F6+98YPDzOa3QS6tRkWeJqhrflMONfCXtOxXVR+CeiPil
OOfaxOtAMeVEW9wE0EU3U/8aNghtzuUvVN+0Tj57+W+4g0ilQOODiDLDu4ZqAo1Q5eDZ
gA+He13KWVwNYaYTNUNParLXG5GYDbblaqABSUDurI1FTjn1US0ZZytlzdZy1GfL9eTj
6AiiVM3A4YdUGUWE7qQQE8jI92o4qKYvaNjn1M+d5ypKCue3NJWeRTSPKLu0QD2qL02+
QPga2RPtmLpztA8/lPGTRpgVNY3C5jdCBZyWgFtvZg5dNoDfe5bQnAmF2J2ka+A7JBSD
VHtw==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to
:references:mime-version:content-transfer-encoding;
bh=NeWUvZBV3NAy1b0eckELIbBZ7sti/n1sLYnD4r2cjaU=;
b=OQek2lJ5JINezfYdN/FzSPFL1N9Hrs+KstU7K4gEHavdffvSAOBebg2MG5VSzkf93H
o1iOiAOoXY7cx7j7Vx5CFZUuJOLilpC6gPTJpZlaP8YtEFfGkPaUPPh5FSTyM463Sir8
n6DupTSrFUI1y44GOBZ2bM2pf9hRN1Yj1oiCT6upmfoHw0/PaKEZt5aOEI8se7HRJp94
td6+SEZok3uxKEglKEqAG8cnj7Pt4tKVQlg+MI1AQDLQ/ytdYJlMPmrqVyNpnsv44wYa
dxBf0TaMvqn9SYDIDcGct3toAVm5DfVUqXm1nkYcYMOdvPrmLoH52NtCyi5cYC+2TR6i
jUpA==
X-Gm-Message-State: AOAM532sXgN0NNpKjilSMBewUXwwXz+MOfd7J5FRI6zAWA5st7gy5LmE
Sw/QHj4cm3zT07LU1kWYSO9puwFV+yK0Hquf
X-Google-Smtp-Source: ABdhPJyDnhcP7BeBHXX2rPqMXwkOQiZdussDPATmYqyQnp7HAsi0OqWSUVIloMNi3QBpMsmjXTtyew==
X-Received: by 2002:a17:903:2285:b029:e6:faf5:eaff with SMTP id b5-20020a1709032285b02900e6faf5eaffmr19574014plh.70.1616850386727;
Sat, 27 Mar 2021 06:06:26 -0700 (PDT)
Received: from johnchen902-arch-ryzen.. (2001-b011-3815-3a1f-9afa-9bff-fe6e-3ce2.dynamic-ip6.hinet.net. [2001:b011:3815:3a1f:9afa:9bff:fe6e:3ce2])
by smtp.gmail.com with ESMTPSA id ot17sm6413787pjb.50.2021.03.27.06.06.25
(version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
Sat, 27 Mar 2021 06:06:26 -0700 (PDT)
From: John Chen <johnchen902@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: Rohit Pidaparthi <rohitpid@gmail.com>,
RicardoEPRodrigues <ricardo.e.p.rodrigues@gmail.com>,
Jiri Kosina <jikos@kernel.org>,
Benjamin Tissoires <benjamin.tissoires@redhat.com>,
John Chen <johnchen902@gmail.com>
Subject: [PATCH 2/4] HID: magicmouse: fix 3 button emulation of Mouse 2
Date: Sat, 27 Mar 2021 21:05:06 +0800
Message-Id: <20210327130508.24849-3-johnchen902@gmail.com>
X-Mailer: git-send-email 2.31.0
In-Reply-To: <20210327130508.24849-1-johnchen902@gmail.com>
References: <20210327130508.24849-1-johnchen902@gmail.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210327130508.24849-3-johnchen902@gmail.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
It is observed that, with 3 button emulation, when middle button is
clicked, either the left button or right button is clicked as well. It
is caused by hidinput "correctly" acting on the event, oblivious to the
3 button emulation.
As raw_event has taken care of everything, no further processing is
needed. However, the only way to stop at raw_event is to return an error
(negative) value. Therefore, the processing is stopped at event instead.
Signed-off-by: John Chen <johnchen902@gmail.com>
---
drivers/hid/hid-magicmouse.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index 7aad6ca56780..c646b4cd3783 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -440,6 +440,21 @@ static int magicmouse_raw_event(struct hid_device *hdev,
return 1;
}
+static int magicmouse_event(struct hid_device *hdev, struct hid_field *field,
+ struct hid_usage *usage, __s32 value)
+{
+ struct magicmouse_sc *msc = hid_get_drvdata(hdev);
+ if (msc->input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 &&
+ field->report->id == MOUSE2_REPORT_ID) {
+ // magic_mouse_raw_event has done all the work. Skip hidinput.
+ //
+ // Specifically, hidinput may modify BTN_LEFT and BTN_RIGHT,
+ // breaking emulate_3button.
+ return 1;
+ }
+ return 0;
+}
+
static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hdev)
{
int error;
@@ -754,6 +769,7 @@ static struct hid_driver magicmouse_driver = {
.id_table = magic_mice,
.probe = magicmouse_probe,
.raw_event = magicmouse_raw_event,
+ .event = magicmouse_event,
.input_mapping = magicmouse_input_mapping,
.input_configured = magicmouse_input_configured,
};
--
2.31.0

View File

@ -0,0 +1,265 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-15.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
DKIM_VALID,DKIM_VALID_AU,FREEMAIL_FORGED_FROMDOMAIN,FREEMAIL_FROM,
HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH,
MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham
autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 9A212C433DB
for <linux-kernel@archiver.kernel.org>; Sat, 27 Mar 2021 13:10:34 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 60FCC61981
for <linux-kernel@archiver.kernel.org>; Sat, 27 Mar 2021 13:10:34 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S230394AbhC0NHJ (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 27 Mar 2021 09:07:09 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59810 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S230307AbhC0NGi (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 27 Mar 2021 09:06:38 -0400
Received: from mail-pf1-x432.google.com (mail-pf1-x432.google.com [IPv6:2607:f8b0:4864:20::432])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1EDFCC0613B1
for <linux-kernel@vger.kernel.org>; Sat, 27 Mar 2021 06:06:38 -0700 (PDT)
Received: by mail-pf1-x432.google.com with SMTP id q5so6741894pfh.10
for <linux-kernel@vger.kernel.org>; Sat, 27 Mar 2021 06:06:38 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=gmail.com; s=20161025;
h=from:to:cc:subject:date:message-id:in-reply-to:references
:mime-version:content-transfer-encoding;
bh=fWEWnDB7IS15Aoqul4RZDergwEtbUe4NAH8lKjv7p/s=;
b=CGLrSHoDnG8b5CL6asLWP1Ym/QFl+wtwIF8PhKlW7RJ5IhavVtdO6Fd7/cY/3GQTDa
wvX9Q1wfBsakVlG9/sM9CuozOsra6Ec9c1B+0beWTAKj/tBjwvsVHtMoCiqOPL/Vbig6
4zkWMb6dwWSzAgmCqPEaYlyJYqBrDLzzXxqGhchwTfcNgNZQGq0xhh7tZsukEPz4XLIC
LNCy6+hPSVdRG1ADbyPpOGFn3fSeFs5KAwl3y1Cn0TvTPxgpckTLcFz5TsTF/w7VLGW1
bn9Gakn+MaATqxahU0lDwyzI1sMK2er7/ddjV9VugYN4PzgL9DHGu/iGzXGFftDoLdaJ
tBIQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to
:references:mime-version:content-transfer-encoding;
bh=fWEWnDB7IS15Aoqul4RZDergwEtbUe4NAH8lKjv7p/s=;
b=PQiPlj7RSTzmBU6u/2xzL9qv8jrelC7cJFFiOHjwKfz43PMzm0nEj6PxY5ZFMSjmbs
JEfC8iDjJh39FJdthBrvaZX4yuTv4QmOdmRMWrN77sQYbZOaKOhbNrCx2/LdHzAFjLBY
qTHW0+siiP/ATBf1M0cSP200UZAjBwU8MRapxAlaIUmlrfr5+oM8ZrL2tMhzDYcn5b51
TwXEVVI5Ep0YZxyGYQ04yaMBZxb1hSKev6UhrFpk96Ukg4IY3qBQBRpjWHIWqZY21aUl
EeDLmlWZaqDbp6UQQrAd2p1kIVyrxKD2Cf4aPnk2JcvzR9qGfMwV8cpR9rqwrXBEiyLj
KZFg==
X-Gm-Message-State: AOAM532lFsZyg8BiLek2pS5Ftc0rOopeD1Q9b7d5Lc7gC8pPIjHcnizK
2/grg+4GExN9zVerojORiZgGkTwU1/c2DswO
X-Google-Smtp-Source: ABdhPJwECFbuV2SwesS0pF6L0s23ghF61g6whXAjcLZpxYe6b6OsgENBMa3gmTj9FFMF+68uJYhPPw==
X-Received: by 2002:a63:1d26:: with SMTP id d38mr17032822pgd.385.1616850397389;
Sat, 27 Mar 2021 06:06:37 -0700 (PDT)
Received: from johnchen902-arch-ryzen.. (2001-b011-3815-3a1f-9afa-9bff-fe6e-3ce2.dynamic-ip6.hinet.net. [2001:b011:3815:3a1f:9afa:9bff:fe6e:3ce2])
by smtp.gmail.com with ESMTPSA id ot17sm6413787pjb.50.2021.03.27.06.06.36
(version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
Sat, 27 Mar 2021 06:06:37 -0700 (PDT)
From: John Chen <johnchen902@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: Rohit Pidaparthi <rohitpid@gmail.com>,
RicardoEPRodrigues <ricardo.e.p.rodrigues@gmail.com>,
Jiri Kosina <jikos@kernel.org>,
Benjamin Tissoires <benjamin.tissoires@redhat.com>,
John Chen <johnchen902@gmail.com>
Subject: [PATCH 3/4] HID: magicmouse: fix reconnection of Magic Mouse 2
Date: Sat, 27 Mar 2021 21:05:07 +0800
Message-Id: <20210327130508.24849-4-johnchen902@gmail.com>
X-Mailer: git-send-email 2.31.0
In-Reply-To: <20210327130508.24849-1-johnchen902@gmail.com>
References: <20210327130508.24849-1-johnchen902@gmail.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210327130508.24849-4-johnchen902@gmail.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
It is observed that the Magic Mouse 2 would not enter multi-touch mode
unless the mouse is connected before loading the module. It seems to be
a quirk specific to Magic Mouse 2
Retrying after 500ms fixes the problem for me. The delay can't be
reduced much further --- 300ms didn't work for me. Retrying immediately
after receiving an event didn't work either.
Signed-off-by: John Chen <johnchen902@gmail.com>
---
drivers/hid/hid-magicmouse.c | 93 ++++++++++++++++++++++++------------
1 file changed, 63 insertions(+), 30 deletions(-)
diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index c646b4cd3783..69aefef9fe07 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -16,6 +16,7 @@
#include <linux/input/mt.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/workqueue.h>
#include "hid-ids.h"
@@ -128,6 +129,9 @@ struct magicmouse_sc {
u8 size;
} touches[16];
int tracking_ids[16];
+
+ struct hid_device *hdev;
+ struct delayed_work work;
};
static int magicmouse_firm_touch(struct magicmouse_sc *msc)
@@ -629,9 +633,7 @@ static int magicmouse_input_configured(struct hid_device *hdev,
return 0;
}
-
-static int magicmouse_probe(struct hid_device *hdev,
- const struct hid_device_id *id)
+static int magicmouse_enable_multitouch(struct hid_device *hdev)
{
const u8 *feature;
const u8 feature_mt[] = { 0xD7, 0x01 };
@@ -639,10 +641,52 @@ static int magicmouse_probe(struct hid_device *hdev,
const u8 feature_mt_trackpad2_usb[] = { 0x02, 0x01 };
const u8 feature_mt_trackpad2_bt[] = { 0xF1, 0x02, 0x01 };
u8 *buf;
+ int ret;
+ int feature_size;
+
+ if (hdev->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) {
+ if (hdev->vendor == BT_VENDOR_ID_APPLE) {
+ feature_size = sizeof(feature_mt_trackpad2_bt);
+ feature = feature_mt_trackpad2_bt;
+ } else { /* USB_VENDOR_ID_APPLE */
+ feature_size = sizeof(feature_mt_trackpad2_usb);
+ feature = feature_mt_trackpad2_usb;
+ }
+ } else if (hdev->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
+ feature_size = sizeof(feature_mt_mouse2);
+ feature = feature_mt_mouse2;
+ } else {
+ feature_size = sizeof(feature_mt);
+ feature = feature_mt;
+ }
+
+ buf = kmemdup(feature, feature_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ ret = hid_hw_raw_request(hdev, buf[0], buf, feature_size,
+ HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+ kfree(buf);
+ return ret;
+}
+
+static void magicmouse_enable_mt_work(struct work_struct *work)
+{
+ struct magicmouse_sc *msc =
+ container_of(work, struct magicmouse_sc, work.work);
+ int ret;
+
+ ret = magicmouse_enable_multitouch(msc->hdev);
+ if (ret < 0)
+ hid_err(msc->hdev, "unable to request touch data (%d)\n", ret);
+}
+
+static int magicmouse_probe(struct hid_device *hdev,
+ const struct hid_device_id *id)
+{
struct magicmouse_sc *msc;
struct hid_report *report;
int ret;
- int feature_size;
if (id->vendor == USB_VENDOR_ID_APPLE &&
id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 &&
@@ -656,6 +700,8 @@ static int magicmouse_probe(struct hid_device *hdev,
}
msc->scroll_accel = SCROLL_ACCEL_DEFAULT;
+ msc->hdev = hdev;
+ INIT_DEFERRABLE_WORK(&msc->work, magicmouse_enable_mt_work);
msc->quirks = id->driver_data;
hid_set_drvdata(hdev, msc);
@@ -705,28 +751,6 @@ static int magicmouse_probe(struct hid_device *hdev,
}
report->size = 6;
- if (id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) {
- if (id->vendor == BT_VENDOR_ID_APPLE) {
- feature_size = sizeof(feature_mt_trackpad2_bt);
- feature = feature_mt_trackpad2_bt;
- } else { /* USB_VENDOR_ID_APPLE */
- feature_size = sizeof(feature_mt_trackpad2_usb);
- feature = feature_mt_trackpad2_usb;
- }
- } else if (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
- feature_size = sizeof(feature_mt_mouse2);
- feature = feature_mt_mouse2;
- } else {
- feature_size = sizeof(feature_mt);
- feature = feature_mt;
- }
-
- buf = kmemdup(feature, feature_size, GFP_KERNEL);
- if (!buf) {
- ret = -ENOMEM;
- goto err_stop_hw;
- }
-
/*
* Some devices repond with 'invalid report id' when feature
* report switching it into multitouch mode is sent to it.
@@ -735,13 +759,14 @@ static int magicmouse_probe(struct hid_device *hdev,
* but there seems to be no other way of switching the mode.
* Thus the super-ugly hacky success check below.
*/
- ret = hid_hw_raw_request(hdev, buf[0], buf, feature_size,
- HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
- kfree(buf);
- if (ret != -EIO && ret != feature_size) {
+ ret = magicmouse_enable_multitouch(hdev);
+ if (ret != -EIO && ret < 0) {
hid_err(hdev, "unable to request touch data (%d)\n", ret);
goto err_stop_hw;
}
+ if (ret == -EIO && id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) {
+ schedule_delayed_work(&msc->work, msecs_to_jiffies(500));
+ }
return 0;
err_stop_hw:
@@ -749,6 +774,13 @@ static int magicmouse_probe(struct hid_device *hdev,
return ret;
}
+static void magicmouse_remove(struct hid_device *hdev)
+{
+ struct magicmouse_sc *msc = hid_get_drvdata(hdev);
+ cancel_delayed_work_sync(&msc->work);
+ hid_hw_stop(hdev);
+}
+
static const struct hid_device_id magic_mice[] = {
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE,
USB_DEVICE_ID_APPLE_MAGICMOUSE), .driver_data = 0 },
@@ -768,6 +800,7 @@ static struct hid_driver magicmouse_driver = {
.name = "magicmouse",
.id_table = magic_mice,
.probe = magicmouse_probe,
+ .remove = magicmouse_remove,
.raw_event = magicmouse_raw_event,
.event = magicmouse_event,
.input_mapping = magicmouse_input_mapping,
--
2.31.0

View File

@ -0,0 +1,155 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-15.8 required=3.0 tests=BAYES_00,DKIM_SIGNED,
DKIM_VALID,DKIM_VALID_AU,FREEMAIL_FORGED_FROMDOMAIN,FREEMAIL_FROM,
HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH,
MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham
autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 9BE24C433E0
for <linux-kernel@archiver.kernel.org>; Sat, 27 Mar 2021 13:10:34 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 70E6A61993
for <linux-kernel@archiver.kernel.org>; Sat, 27 Mar 2021 13:10:34 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S230334AbhC0NHL (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 27 Mar 2021 09:07:11 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59832 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S230347AbhC0NGo (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 27 Mar 2021 09:06:44 -0400
Received: from mail-pg1-x536.google.com (mail-pg1-x536.google.com [IPv6:2607:f8b0:4864:20::536])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6AAD5C0613B1
for <linux-kernel@vger.kernel.org>; Sat, 27 Mar 2021 06:06:44 -0700 (PDT)
Received: by mail-pg1-x536.google.com with SMTP id 32so6451842pgm.1
for <linux-kernel@vger.kernel.org>; Sat, 27 Mar 2021 06:06:44 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=gmail.com; s=20161025;
h=from:to:cc:subject:date:message-id:in-reply-to:references
:mime-version:content-transfer-encoding;
bh=5GW0J0I07iFc3HTzTSsHG/cnT3ft+pF2eI68TUZquXs=;
b=jJACN/frvED8BOFNtDXFShzg5zpUEJjtXdexEk/8uljNrUnW9QBLA5orX/2hdcZdS4
9HL492GecBx3KY9Y5P/B3fttZmlHkwWbuUktmVlmaIOZv1jAPTyYz5zJYh0O0ncw9/rk
aPvRb4s1NZHByZ4XoCWbWOd98BvgHHQ/m4Zf1zmP5lYjBVMb2r6qSejuJ5ywQOCBKo9x
Q8SL42BOWrlNlsWmeP+oDEYWSDARHpKlRqQ63Y4LdvV2uS9IF6+bmHotGSRlHSLuFN1b
/PcfuSQynv38/EgH8MeE16VKzAvLXGu/KgcOLLCsSLgPFLqKAk3uQVc5QRLw00niguHp
MMUg==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to
:references:mime-version:content-transfer-encoding;
bh=5GW0J0I07iFc3HTzTSsHG/cnT3ft+pF2eI68TUZquXs=;
b=AueOy/X73fYr+YShlS2LMp2gWnsgDUIFV7eKHphj6Q1dBTHtj/dxdarFAonoqAtMRD
RCpUwakkFtecMcJPGG/2FRZdRmbJg/ksRvalLQhQCN8XYGB4T/o3zmaOUUooitcGC3gj
aeCCryhkv9OzDUYTwPsAzTjrDTkTB3Oh8IvWhfEIcb9x8k+J+OLaIrGozUxhtJkepSRc
Uswy+MJkcl8KfXiawODwnjX7JmWwm2SdpRbsBKjJjs+rad+ECgYEFyt8aqYbTabFtIuj
oaHtcYHS6FLYTFcRbhCcVECQo2OUzvJjPNe5GyHqUxlvmdFI/Wn68/dt5DKSl5CHRjrp
qdPA==
X-Gm-Message-State: AOAM530Dp411JuLoADUDvvtPhkgbdVcoa5u6Mm4STd67xIRk49ZPXT33
uWg1/mAtTfIfNFL2t5NEvBtvsCfrYoBgGzJJ
X-Google-Smtp-Source: ABdhPJw4EeRec5E5FY/fifXp32gxWuRl/ThAsWSiEflIS3aALG3LbhZVwkDmQhbhsG3gD8jRMsQIlw==
X-Received: by 2002:a62:17c4:0:b029:1f5:7cfe:ebc4 with SMTP id 187-20020a6217c40000b02901f57cfeebc4mr17080244pfx.5.1616850403793;
Sat, 27 Mar 2021 06:06:43 -0700 (PDT)
Received: from johnchen902-arch-ryzen.. (2001-b011-3815-3a1f-9afa-9bff-fe6e-3ce2.dynamic-ip6.hinet.net. [2001:b011:3815:3a1f:9afa:9bff:fe6e:3ce2])
by smtp.gmail.com with ESMTPSA id ot17sm6413787pjb.50.2021.03.27.06.06.42
(version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);
Sat, 27 Mar 2021 06:06:43 -0700 (PDT)
From: John Chen <johnchen902@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: Rohit Pidaparthi <rohitpid@gmail.com>,
RicardoEPRodrigues <ricardo.e.p.rodrigues@gmail.com>,
Jiri Kosina <jikos@kernel.org>,
Benjamin Tissoires <benjamin.tissoires@redhat.com>,
John Chen <johnchen902@gmail.com>
Subject: [PATCH 4/4] HID: input: map battery capacity (00850065)
Date: Sat, 27 Mar 2021 21:05:08 +0800
Message-Id: <20210327130508.24849-5-johnchen902@gmail.com>
X-Mailer: git-send-email 2.31.0
In-Reply-To: <20210327130508.24849-1-johnchen902@gmail.com>
References: <20210327130508.24849-1-johnchen902@gmail.com>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210327130508.24849-5-johnchen902@gmail.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
This is the capacity in percentage, relative to design capacity.
Specifically, it is present in Apple Magic Mouse 2.
In contrast, usage 00850064 is also the capacity in percentage, but is
relative to full capacity. It is not mapped here because I don't have
such device.
Signed-off-by: John Chen <johnchen902@gmail.com>
---
drivers/hid/hid-debug.c | 1 +
drivers/hid/hid-input.c | 11 +++++++++++
include/linux/hid.h | 3 +++
3 files changed, 15 insertions(+)
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index d7eaf9100370..59f8d716d78f 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -417,6 +417,7 @@ static const struct hid_usage_entry hid_usage_table[] = {
{ 0x85, 0x44, "Charging" },
{ 0x85, 0x45, "Discharging" },
{ 0x85, 0x4b, "NeedReplacement" },
+ { 0x85, 0x65, "AbsoluteStateOfCharge" },
{ 0x85, 0x66, "RemainingCapacity" },
{ 0x85, 0x68, "RunTimeToEmpty" },
{ 0x85, 0x6a, "AverageTimeToFull" },
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 236bccd37760..5dea3669a927 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1074,6 +1074,17 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
}
goto unknown;
+ case HID_UP_BATTERY:
+ switch (usage->hid) {
+ case HID_BAT_ABSOLUTESTATEOFCHARGE:
+ hidinput_setup_battery(device, HID_INPUT_REPORT, field);
+ usage->type = EV_PWR;
+ device->battery_min = 0;
+ device->battery_max = 100;
+ return;
+ }
+ goto unknown;
+
case HID_UP_HPVENDOR: /* Reported on a Dutch layout HP5308 */
set_bit(EV_REP, input->evbit);
switch (usage->hid & HID_USAGE) {
diff --git a/include/linux/hid.h b/include/linux/hid.h
index ef702b3f56e3..b40e1abbe11d 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -153,6 +153,7 @@ struct hid_item {
#define HID_UP_CONSUMER 0x000c0000
#define HID_UP_DIGITIZER 0x000d0000
#define HID_UP_PID 0x000f0000
+#define HID_UP_BATTERY 0x00850000
#define HID_UP_HPVENDOR 0xff7f0000
#define HID_UP_HPVENDOR2 0xff010000
#define HID_UP_MSVENDOR 0xff000000
@@ -297,6 +298,8 @@ struct hid_item {
#define HID_DG_TOOLSERIALNUMBER 0x000d005b
#define HID_DG_LATENCYMODE 0x000d0060
+#define HID_BAT_ABSOLUTESTATEOFCHARGE 0x00850065
+
#define HID_VD_ASUS_CUSTOM_MEDIA_KEYS 0xff310076
/*
* HID report types --- Ouch! HID spec says 1 2 3!
--
2.31.0

View File

@ -0,0 +1,330 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-21.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT,
USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 3BECEC433E0
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id E48DB64F1D
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:05 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233163AbhCMH6d (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:33 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58938 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S230309AbhCMH55 (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:57:57 -0500
Received: from mail-qk1-x74a.google.com (mail-qk1-x74a.google.com [IPv6:2607:f8b0:4864:20::74a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 25364C061574
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:57:57 -0800 (PST)
Received: by mail-qk1-x74a.google.com with SMTP id k188so1766042qkb.5
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:57:57 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:message-id:mime-version:subject:from:to:cc;
bh=Dl/3fLOB0H+oajTIzqKaDQePlH9N08uWOrKgpspO4TI=;
b=ZDlp8kO1cOzH9TKK391ns60MA5XH6wAt4WlC5cRspVrndQuLOdzpe4WuBER+H/7iF2
P/jJN5bw/W10rtSgEJl+3nFM9KliKjzDKLX1Wjo+FdVZj7lWam1qWgkQTlezZ+NtB7MK
cT+C7m++Ac2yj63uufwG9IIyPjtCqwGGHd6caaZjsFdwrZIYl6mprawhmN0ajnA+KxLu
3msx/zJkbVaZ75VF4EavCd4hAKjuHACTjU5DSIC+hq9i3Y5TuQGinRu50cx5wXXQqKu+
TLyLtiLkTbZaVeLhF0uQooG8E4w+JXFmnfMxWOPsekXQyWZHebj5hpUPJ1nW39iQnMBt
NShw==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:message-id:mime-version:subject:from:to:cc;
bh=Dl/3fLOB0H+oajTIzqKaDQePlH9N08uWOrKgpspO4TI=;
b=XOeOFYm39AC6u/tzgFN40WA+JyClAjJZL1vfB4B2WAz2bh8R5boQeyY2Px52/PXMqe
PCZUSDqqa3qe2AL+XMFxSKay7L4rvvcPP294PgvjMTHIci5V4Nvhb2gooGAFYMoxkgvH
lEixBlTS6nGyJ8IubphUQVdIAQN9EaHViPwha6EQb3TvAyPjae5NDLVjv32BjQLi8CGw
OTubWcbqjEu/b5lo0MSHi/e6RCI3rcUJRFagT567WMEKCRXl9L9lKS2Y/hxoG2vx6f7E
NTzYk8hh52IHO/hBULiYGwss1WApIAFZmg6gkNZJQhw3Z7ZYCxHz7oMXAJCzFeOBikcZ
lJnw==
X-Gm-Message-State: AOAM5315xRkAW2HlZY5TGBhlW7nW/go+xoCYXD97M7G+xWGL5D5tgqcK
MXUwE4z8bQg+QCpnSwxROcufEldhmTU=
X-Google-Smtp-Source: ABdhPJyxefk+CsNOhJRg0zohX7wmgO41UdqyprhNKHQCCmk9ImMeIO+UNC1eONE3N7hnVkFPu9qRD/MnUEM=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:a0c:fc06:: with SMTP id z6mr1801957qvo.25.1615622276103;
Fri, 12 Mar 2021 23:57:56 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:33 -0700
Message-Id: <20210313075747.3781593-1-yuzhao@google.com>
Mime-Version: 1.0
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 00/14] Multigenerational LRU
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-1-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
TLDR
====
The current page reclaim is too expensive in terms of CPU usage and
often making poor choices about what to evict. We would like to offer
a performant, versatile and straightforward augment.
Repo
====
git fetch https://linux-mm.googlesource.com/page-reclaim refs/changes/01/1101/1
Gerrit https://linux-mm-review.googlesource.com/c/page-reclaim/+/1101
Background
==========
DRAM is a major factor in total cost of ownership, and improving
memory overcommit brings a high return on investment. Over the past
decade of research and experimentation in memory overcommit, we
observed a distinct trend across millions of servers and clients: the
size of page cache has been decreasing because of the growing
popularity of cloud storage. Nowadays anon pages account for more than
90% of our memory consumption and page cache contains mostly
executable pages.
Problems
========
Notion of the active/inactive
-----------------------------
For servers equipped with hundreds of gigabytes of memory, the
granularity of the active/inactive is too coarse to be useful for job
scheduling. And false active/inactive rates are relatively high. In
addition, scans of largely varying numbers of pages are unpredictable
because inactive_is_low() is based on magic numbers.
For phones and laptops, the eviction is biased toward file pages
because the selection has to resort to heuristics as direct
comparisons between anon and file types are infeasible. On Android and
Chrome OS, executable pages are frequently evicted despite the fact
that there are many less recently used anon pages. This causes "janks"
(slow UI rendering) and negatively impacts user experience.
For systems with multiple nodes and/or memcgs, it is impossible to
compare lruvecs based on the notion of the active/inactive.
Incremental scans via the rmap
------------------------------
Each incremental scan picks up at where the last scan left off and
stops after it has found a handful of unreferenced pages. For most of
the systems running cloud workloads, incremental scans lose the
advantage under sustained memory pressure due to high ratios of the
number of scanned pages to the number of reclaimed pages. In our case,
the average ratio of pgscan to pgsteal is about 7.
On top of that, the rmap has poor memory locality due to its complex
data structures. The combined effects typically result in a high
amount of CPU usage in the reclaim path. For example, with zram, a
typical kswapd profile on v5.11 looks like:
31.03% page_vma_mapped_walk
25.59% lzo1x_1_do_compress
4.63% do_raw_spin_lock
3.89% vma_interval_tree_iter_next
3.33% vma_interval_tree_subtree_search
And with real swap, it looks like:
45.16% page_vma_mapped_walk
7.61% do_raw_spin_lock
5.69% vma_interval_tree_iter_next
4.91% vma_interval_tree_subtree_search
3.71% page_referenced_one
Solutions
=========
Notion of generation numbers
----------------------------
The notion of generation numbers introduces a quantitative approach to
memory overcommit. A larger number of pages can be spread out across
configurable generations, and thus they have relatively low false
active/inactive rates. Each generation includes all pages that have
been referenced since the last generation.
Given an lruvec, scans and the selections between anon and file types
are all based on generation numbers, which are simple and yet
effective. For different lruvecs, comparisons are still possible based
on birth times of generations.
Differential scans via page tables
----------------------------------
Each differential scan discovers all pages that have been referenced
since the last scan. Specifically, it walks the mm_struct list
associated with an lruvec to scan page tables of processes that have
been scheduled since the last scan. The cost of each differential scan
is roughly proportional to the number of referenced pages it
discovers. Unless address spaces are extremely sparse, page tables
usually have better memory locality than the rmap. The end result is
generally a significant reduction in CPU usage, for most of the
systems running cloud workloads.
On Chrome OS, our real-world benchmark that browses popular websites
in multiple tabs demonstrates 51% less CPU usage from kswapd and 52%
(full) less PSI on v5.11. And kswapd profile looks like:
49.36% lzo1x_1_do_compress
4.54% page_vma_mapped_walk
4.45% memset_erms
3.47% walk_pte_range
2.88% zram_bvec_rw
In addition, direct reclaim latency is reduced by 22% at 99th
percentile and the number of refaults is reduced 7%. These metrics are
important to phones and laptops as they are correlated to user
experience.
Workflow
========
Evictable pages are divided into multiple generations for each lruvec.
The youngest generation number is stored in lruvec->evictable.max_seq
for both anon and file types as they are aged on an equal footing. The
oldest generation numbers are stored in lruvec->evictable.min_seq[2]
separately for anon and file types as clean file pages can be evicted
regardless of may_swap or may_writepage. Generation numbers are
truncated into ilog2(MAX_NR_GENS)+1 bits in order to fit into
page->flags. The sliding window technique is used to prevent truncated
generation numbers from overlapping. Each truncated generation number
is an index to
lruvec->evictable.lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES].
Evictable pages are added to the per-zone lists indexed by max_seq or
min_seq[2] (modulo MAX_NR_GENS), depending on whether they are being
faulted in or read ahead. The workflow comprises two conceptually
independent functions: the aging and the eviction.
Aging
-----
The aging produces young generations. Given an lruvec, the aging scans
page tables for referenced pages of this lruvec. Upon finding one, the
aging updates its generation number to max_seq. After each round of
scan, the aging increments max_seq. The aging maintains either a
system-wide mm_struct list or per-memcg mm_struct lists and tracks
whether an mm_struct is being used on any CPUs or has been used since
the last scan. Multiple threads can concurrently work on the same
mm_struct list, and each of them will be given a different mm_struct
belonging to a process that has been scheduled since the last scan.
Eviction
--------
The eviction consumes old generations. Given an lruvec, the eviction
scans the pages on the per-zone lists indexed by either of min_seq[2].
It selects a type according to the values of min_seq[2] and
swappiness. During a scan, the eviction either sorts or isolates a
page, depending on whether the aging has updated its generation
number. When it finds all the per-zone lists are empty, the eviction
increments min_seq[2] indexed by this selected type. The eviction
triggers the aging when both of min_seq[2] reaches max_seq-1, assuming
both anon and file types are reclaimable.
Use cases
=========
On Android, our most advanced simulation that generates memory
pressure from realistic user behavior shows 18% fewer low-memory
kills, which in turn reduces cold starts by 16%.
On Borg, a similar approach enables us to identify jobs that
underutilize their memory and downsize them considerably without
compromising any of our service level indicators.
On Chrome OS, our field telemetry reports 96% fewer low-memory tab
discards and 59% fewer OOM kills from fully-utilized devices and no UX
regressions from underutilized devices.
For other use cases include working set estimation, proactive reclaim,
far memory tiering and NUMA-aware job scheduling, please refer to the
documentation included in this series and the following references.
References
==========
1. Long-term SLOs for reclaimed cloud computing resources
https://research.google/pubs/pub43017/
2. Profiling a warehouse-scale computer
https://research.google/pubs/pub44271/
3. Evaluation of NUMA-Aware Scheduling in Warehouse-Scale Clusters
https://research.google/pubs/pub48329/
4. Software-defined far memory in warehouse-scale computers
https://research.google/pubs/pub48551/
5. Borg: the Next Generation
https://research.google/pubs/pub49065/
Yu Zhao (14):
include/linux/memcontrol.h: do not warn in page_memcg_rcu() if
!CONFIG_MEMCG
include/linux/nodemask.h: define next_memory_node() if !CONFIG_NUMA
include/linux/huge_mm.h: define is_huge_zero_pmd() if
!CONFIG_TRANSPARENT_HUGEPAGE
include/linux/cgroup.h: export cgroup_mutex
mm/swap.c: export activate_page()
mm, x86: support the access bit on non-leaf PMD entries
mm/pagewalk.c: add pud_entry_post() for post-order traversals
mm/vmscan.c: refactor shrink_node()
mm: multigenerational lru: mm_struct list
mm: multigenerational lru: core
mm: multigenerational lru: page activation
mm: multigenerational lru: user space interface
mm: multigenerational lru: Kconfig
mm: multigenerational lru: documentation
Documentation/vm/index.rst | 1 +
Documentation/vm/multigen_lru.rst | 210 +++
arch/Kconfig | 8 +
arch/x86/Kconfig | 1 +
arch/x86/include/asm/pgtable.h | 2 +-
arch/x86/mm/pgtable.c | 5 +-
fs/exec.c | 2 +
fs/proc/task_mmu.c | 3 +-
include/linux/cgroup.h | 15 +-
include/linux/huge_mm.h | 5 +
include/linux/memcontrol.h | 5 +-
include/linux/mm.h | 1 +
include/linux/mm_inline.h | 246 ++++
include/linux/mm_types.h | 135 ++
include/linux/mmzone.h | 62 +-
include/linux/nodemask.h | 1 +
include/linux/page-flags-layout.h | 20 +-
include/linux/pagewalk.h | 4 +
include/linux/pgtable.h | 4 +-
include/linux/swap.h | 5 +-
kernel/events/uprobes.c | 2 +-
kernel/exit.c | 1 +
kernel/fork.c | 10 +
kernel/kthread.c | 1 +
kernel/sched/core.c | 2 +
mm/Kconfig | 29 +
mm/huge_memory.c | 5 +-
mm/khugepaged.c | 2 +-
mm/memcontrol.c | 28 +
mm/memory.c | 14 +-
mm/migrate.c | 2 +-
mm/mm_init.c | 13 +-
mm/mmzone.c | 2 +
mm/pagewalk.c | 5 +
mm/rmap.c | 6 +
mm/swap.c | 58 +-
mm/swapfile.c | 6 +-
mm/userfaultfd.c | 2 +-
mm/vmscan.c | 2091 +++++++++++++++++++++++++++--
39 files changed, 2870 insertions(+), 144 deletions(-)
create mode 100644 Documentation/vm/multigen_lru.rst
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,129 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 42B16C433DB
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 11CED64ECE
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233237AbhCMH6e (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:34 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58944 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S230349AbhCMH56 (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:57:58 -0500
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7B7EAC061574
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:57:58 -0800 (PST)
Received: by mail-yb1-xb49.google.com with SMTP id d8so32058455ybs.11
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:57:58 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=CiMQJrfmhcT/Xw28mTP3VlU5SRPV8bRsC232LNJk7tU=;
b=U+bvs2P4aWZrWqfRwrXFunM/l5sWGKqRdiGQFJBSXwSH+vfw4kB3WjkPPQpoUgHwwx
+4KITrOtke32as1JFmSOW/QJ8GYL6J2CyqtNZysfNDnr4dUu1eafFf0OU/BN2PlR6TZw
u/bOTirXcAreUn8QrcDvxRKbQwugJdk2JWl2TqDc7KAmb0AodFb/pAgQnWip2QOqWta3
5ohqe66l196K6u9PNyDcJqEzz4CuJBMkGEAupVYjzX/HNuFZ1kLz2lz2FxSR38TqyX8I
aWFk3lMppRkLaWpnC4nC3SQhZcJq9YOxrujTA4BSnsEwscy7qJzgt8w9xEMiwzmAoN1g
Bm1Q==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=CiMQJrfmhcT/Xw28mTP3VlU5SRPV8bRsC232LNJk7tU=;
b=Qi2PdmMPthJWQMXSQ05Fb8upxFWwOHNAFsy8LixbQ0QGPKWVlwBzQDYwPCpG05WFP8
cYIfPhq3nv9++78Y1Pw7Q8oiGEv7KJ0j/sHIaRZGlWZdHWwciPpKdT9M4JVXYhn4NxhF
6YrPBHSTOTN7v9fuCOcPqKSOKTBYMF/eETj9XoeqtvetJVZ1i3Dqxu4TawWOlymnTTkh
9IQRnTz2ffdTJdBCH6iTA0UfrEDWDnESubVvzuRfmLvCt3b427gwHWXaK/i7F/60+65O
8gzoqIJ9gqwTfdGB3vB6HXtbmWosQ39Zy7gnpTpf0CB7afg1Gnx/4Y26GMLjtLkzeviQ
6K7w==
X-Gm-Message-State: AOAM531kmbzmJE5p9rtDGXbRHYBSsGjPyFJxoBPqsQzAq6DHfRzY/is2
Lgnmsa+bse/YLa5M+1JDlGqGCZxMntQ=
X-Google-Smtp-Source: ABdhPJxUTV3MKesWpiHjP1OfC2lc1y93+U3j9zDFMK1igY41oF3Fyu2enmEiwR5c1z6fz0Ykw1sgyEJETNk=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:a25:dfd1:: with SMTP id w200mr24182984ybg.362.1615622277472;
Fri, 12 Mar 2021 23:57:57 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:34 -0700
In-Reply-To: <20210313075747.3781593-1-yuzhao@google.com>
Message-Id: <20210313075747.3781593-2-yuzhao@google.com>
Mime-Version: 1.0
References: <20210313075747.3781593-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 01/14] include/linux/memcontrol.h: do not warn in
page_memcg_rcu() if !CONFIG_MEMCG
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-2-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
We want to make sure the rcu lock is held while using
page_memcg_rcu(). But having a WARN_ON_ONCE() in page_memcg_rcu() when
!CONFIG_MEMCG is superfluous because of the following legit use case:
memcg = lock_page_memcg(page1)
(rcu_read_lock() if CONFIG_MEMCG=y)
do something to page1
if (page_memcg_rcu(page2) == memcg)
do something to page2 too as it cannot be migrated away from the
memcg either.
unlock_page_memcg(page1)
(rcu_read_unlock() if CONFIG_MEMCG=y)
This patch removes the WARN_ON_ONCE() from page_memcg_rcu() for the
!CONFIG_MEMCG case.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
include/linux/memcontrol.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e6dc793d587d..f325aeb4b4e8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1079,7 +1079,6 @@ static inline struct mem_cgroup *page_memcg(struct page *page)
static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
{
- WARN_ON_ONCE(!rcu_read_lock_held());
return NULL;
}
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,113 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=ham autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 7CAADC433E9
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 4C9E564ECE
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233343AbhCMH6f (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:35 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58950 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S231723AbhCMH57 (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:57:59 -0500
Received: from mail-qk1-x74a.google.com (mail-qk1-x74a.google.com [IPv6:2607:f8b0:4864:20::74a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B79D0C061574
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:57:59 -0800 (PST)
Received: by mail-qk1-x74a.google.com with SMTP id k68so19900263qke.2
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:57:59 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=8JNiZUfBDRjXaPYBpQeMTXnhFbhlF+xtdVruxqSh3cA=;
b=IxvaJGLX6pUI5R+Y+pYVqc8/0gQYUuErDfxr3dMFZudBUHTyTTMgRdE0XBcCau7R3l
WVURZXlPOHDzjeCRAmjp2GkNmw3M99Sx/iwvc+iSD8ohg8gx7Cj3TzTxPdzUCsghnFFL
Hv+lIk4SezNfKgceCzGd/c+Rf6ueoDDPEzD62ZXkdDxk/uLmQc4GjBU0Knksz2+dLsVo
b2U0CgK0WmdW2qIHy4OyEo2nBB4jmzDFCPxxlIobZYlIAsooUXen6yoe28K/2f1TgtI5
p1/lftklJT4PvJibzbIlGo6vGha2wAL2lU0ks0AxI4l1lf/1Bf/PVsGVaXXW7+F/fJFj
Y3HQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=8JNiZUfBDRjXaPYBpQeMTXnhFbhlF+xtdVruxqSh3cA=;
b=gAD0lTpWpuRmUv2Dlufjuw7ZklBUzo8DSa0cxSlaWPJunVm1RpqFGuQ0dz/Q5SDxC8
e2XkyRSyCbrJ2qPrJUA+p5trg9qCQq4i4twAOZW4+6JOmPcDhnjMZ5aZGSzfoiMIRsoT
4EV2Q02mcFvf2IuUgWli5WndTXndhzHNpFWCsogvzS+JWqWb6aY+e+5tJWSfWH3kCiDw
t8uRvv61lDKzNQeIpa/ZbY8MFF4olHkRwvO5FMf2xUfghMxRoosJSB3DdLIiwC8NEbpr
g7dGskt+2tbLFnNCykjqc81I4A/sSyeBg95oUKs9PNAIBgIoAQlvqOizUcVccM9r97dk
dPYA==
X-Gm-Message-State: AOAM533j3NWtnjtXQODMvkwfUeiEAcpKyeqz3jm23oO6viFDFeKf35BJ
zE9lWSBpsopJHLabXkGrebl2ktdd2BY=
X-Google-Smtp-Source: ABdhPJwORNu5jWRG63mEkIrwerIFX+r6WsDRo4k6jBRlD+35Q3Ytikr16dVGNSyDCuR7br75GADeWLQhEY0=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:ad4:5ce8:: with SMTP id iv8mr1757086qvb.16.1615622278881;
Fri, 12 Mar 2021 23:57:58 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:35 -0700
In-Reply-To: <20210313075747.3781593-1-yuzhao@google.com>
Message-Id: <20210313075747.3781593-3-yuzhao@google.com>
Mime-Version: 1.0
References: <20210313075747.3781593-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 02/14] include/linux/nodemask.h: define next_memory_node()
if !CONFIG_NUMA
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-3-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Currently next_memory_node only exists when CONFIG_NUMA=y. This patch
defines the macro for the !CONFIG_NUMA case.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
include/linux/nodemask.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index ac398e143c9a..89fe4e3592f9 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -486,6 +486,7 @@ static inline int num_node_state(enum node_states state)
#define first_online_node 0
#define first_memory_node 0
#define next_online_node(nid) (MAX_NUMNODES)
+#define next_memory_node(nid) (MAX_NUMNODES)
#define nr_node_ids 1U
#define nr_online_nodes 1U
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,118 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 97984C433E6
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 62DCA64F1E
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233389AbhCMH6g (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:36 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58956 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S232023AbhCMH6B (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:58:01 -0500
Received: from mail-qk1-x74a.google.com (mail-qk1-x74a.google.com [IPv6:2607:f8b0:4864:20::74a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 055BEC061574
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:01 -0800 (PST)
Received: by mail-qk1-x74a.google.com with SMTP id u5so19869532qkj.10
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:00 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=BUGHWaLOgbBXMks9MxevCFPE+HJdHu8WtN3Ad5BKa2E=;
b=EmtSmNZcxIc0Qq98PvQLlh/Je74+I9pGiId+AoSzt2WN66X/7gqva2AkM1Z6ZrqWlC
qMCo9fu+KgWIl13K9lL0hfZkSMTr33It30mFN/3/xwUcpWXiUy2ttup7BflThw89akrm
ipCdNg7GP9J1lKGO0+Ae8TZbHboXPO6EU2DcK5O1kt2ZE3NOFthikv0X6/opyUBdeUKR
Q3HhM/pgOA+vQ4UMR7hzNtEDZcmVDtrPgTwq9zDoYDV/KHqaCvxRPvrkCfwY3MbDN9Yc
Dm0TaDfpoFKrj/syFqJ/83hwIk0G3OPLs6DbY/I2HkHJ6cbjakxVLFDyXybDitO9MyGA
/tow==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=BUGHWaLOgbBXMks9MxevCFPE+HJdHu8WtN3Ad5BKa2E=;
b=tk+Szu/MRdyl1iY0zpUHO4VtD8YBAHs5CFbud9sFWZl8OIpU011Pd59sKmQkChiEw7
YEzG58xR4zAwc8CM8Vd7HuqKhhVUDIqumg0Ntx1KyfQ2QNYcbVSv8oAHuMLxAAembJau
D5EsuQjvXjKjroV/zEhusebtXIHZULBN1x9MfQwyikGwihqLhzDwXeUHx2D7JFo7i40J
3hvB0UCXdGNVF257C0gUQWS/r0tKshrTyX1i7tyAutY7viCISRQ6FP65DlTjV3PAmmsw
VHxDK47EkcJAoat+x24kFd6i44dgww1DGsDCpvBZu8P/V1m2f4keblf2wZMFsVwDhZRU
0b/g==
X-Gm-Message-State: AOAM5314IBqaZ2bDNgLAPxQ3EGGAGDitVyFeUtt5HNFWNSHljmLCuPyl
iPCluBY4dSP0ON55Ckf7BwK1bpQBiw8=
X-Google-Smtp-Source: ABdhPJzJJ1SnzBtj3bVJq+tnpS8mvob3Iwgd7McsgFl/pDIRa/R9w7/nQuY0bxlcdhV2mXDJvzzuC77PvVA=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:a05:6214:4b3:: with SMTP id
w19mr1750690qvz.26.1615622280155; Fri, 12 Mar 2021 23:58:00 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:36 -0700
In-Reply-To: <20210313075747.3781593-1-yuzhao@google.com>
Message-Id: <20210313075747.3781593-4-yuzhao@google.com>
Mime-Version: 1.0
References: <20210313075747.3781593-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 03/14] include/linux/huge_mm.h: define is_huge_zero_pmd()
if !CONFIG_TRANSPARENT_HUGEPAGE
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-4-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Currently is_huge_zero_pmd() only exists when
CONFIG_TRANSPARENT_HUGEPAGE=y. This patch defines the function for the
!CONFIG_TRANSPARENT_HUGEPAGE case.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
include/linux/huge_mm.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ba973efcd369..0ba7b3f9029c 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -443,6 +443,11 @@ static inline bool is_huge_zero_page(struct page *page)
return false;
}
+static inline bool is_huge_zero_pmd(pmd_t pmd)
+{
+ return false;
+}
+
static inline bool is_huge_zero_pud(pud_t pud)
{
return false;
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,140 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id A8DC7C43381
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 7FA3564F1D
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233409AbhCMH6i (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:38 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58964 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S232230AbhCMH6C (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:58:02 -0500
Received: from mail-qk1-x74a.google.com (mail-qk1-x74a.google.com [IPv6:2607:f8b0:4864:20::74a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 562C3C061574
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:02 -0800 (PST)
Received: by mail-qk1-x74a.google.com with SMTP id d137so4579263qkb.18
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:02 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=aLRTkKE+4395hmMwkgIvoFPbmsRrSNB3cb7TyZ2ydVo=;
b=RbyWeK4rUqhSF6l4+WOn4bz/6l7Kc8FPXxW3gI+7y8uPJAZ1QX8cf/I1Awt1gV/SbI
bjnyW7mGxY1NMOZzzbS/+Pu/wZOk8PcdLyHQjU8FYS7MY3rlxWHPLiUeDkvVnQXqR/vU
VjFNQgX24G3KsIyOvy72WKVvMUMe73K7lMeGcaq5JwzYtlJpwJmAq7im1mo7v1rCTUgA
0mo4ifiQHDGxfCFwMiqlcmL9XCp3IrCMWE8XU4ROv4uXfZw0LtfhovB0FFVhgU4OaGPo
9xkxh+e9HnpJUWha5GVEemFT2phEp+qmiZ0b0RvdNPBXFoxHBjwLSKOp/alc26idQnA2
6ehw==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=aLRTkKE+4395hmMwkgIvoFPbmsRrSNB3cb7TyZ2ydVo=;
b=sPWOPGHoVohb7G6EOihIswCUjaDOMsg/DvRB2ugPnziQk8PcjMml4kYe8yFsYiBamJ
ZBRsKYaIxBbegmcuF2aq6FE8IRzx6eYh8i5L6RQ83/jPcS1VVViz30AEgGo2OR0qlRtK
6e9I4lEcrR67MLWdkHooamn5SOvnTfgJcr7FGERX+0O/FzSxT56KcHTaEjHYnS68pxQM
cryChrhdy5jpPx9+EiGLdZI95GTYYHE3/TXMlABP1Dv4YEWI93zhR/ePrlm0SjEioKWR
PW1K3Blnn4t6EIlzyEcAxmVz7702MA3b1x1hM3iPT6B+pwdovapsNRL+JH3s67twnKUc
qBWQ==
X-Gm-Message-State: AOAM532W7wUq4pmKjtVvHSPslHZDT9pB3jR1xJmNsJD9ZqrBMl8E+xpH
v9xzx/Rcs8diDMvgzLYOU4hfhWFObf4=
X-Google-Smtp-Source: ABdhPJwU+Y5hioYI52CJyazw+FjQUFeKf1QbQJTSQIAKTuuIxfYVSs2ErBNlMhmNRx4/c6B7zv8sBwMBzxE=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:ad4:5c87:: with SMTP id o7mr1743197qvh.31.1615622281483;
Fri, 12 Mar 2021 23:58:01 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:37 -0700
In-Reply-To: <20210313075747.3781593-1-yuzhao@google.com>
Message-Id: <20210313075747.3781593-5-yuzhao@google.com>
Mime-Version: 1.0
References: <20210313075747.3781593-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 04/14] include/linux/cgroup.h: export cgroup_mutex
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-5-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Export cgroup_mutex so it can be used to synchronize with memcg
allocations.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
include/linux/cgroup.h | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 4f2f79de083e..bd5744360cfa 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgroup *cgrp)
css_put(&cgrp->self);
}
+extern struct mutex cgroup_mutex;
+
+static inline void cgroup_lock(void)
+{
+ mutex_lock(&cgroup_mutex);
+}
+
+static inline void cgroup_unlock(void)
+{
+ mutex_unlock(&cgroup_mutex);
+}
+
/**
* task_css_set_check - obtain a task's css_set with extra access conditions
* @task: the task to obtain css_set for
@@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgroup *cgrp)
* as locks used during the cgroup_subsys::attach() methods.
*/
#ifdef CONFIG_PROVE_RCU
-extern struct mutex cgroup_mutex;
extern spinlock_t css_set_lock;
#define task_css_set_check(task, __c) \
rcu_dereference_check((task)->cgroups, \
@@ -704,6 +715,8 @@ struct cgroup;
static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
static inline void css_get(struct cgroup_subsys_state *css) {}
static inline void css_put(struct cgroup_subsys_state *css) {}
+static inline void cgroup_lock(void) {}
+static inline void cgroup_unlock(void) {}
static inline int cgroup_attach_task_all(struct task_struct *from,
struct task_struct *t) { return 0; }
static inline int cgroupstats_build(struct cgroupstats *stats,
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,178 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id C5A9BC4332B
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 90B8C64F1F
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233424AbhCMH6i (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:38 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58970 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S232431AbhCMH6D (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:58:03 -0500
Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A425FC061574
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:03 -0800 (PST)
Received: by mail-yb1-xb4a.google.com with SMTP id 194so31802025ybl.5
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:03 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=773qLo/LiVz4n2L5CNuvuPAbdhL5vI+dQXWkGfq22YM=;
b=s62F923cqY3HHxlJ4hYL4HxcRTUA1o1Vmr0HgcffuxiRKFFBC1czWP98NMUIxWHBf1
ZTyWeNYix1pSuyOzyeK0NFpxLIWOmX10rPMvqWp8DuHg1yJhrNIGNko3fZT0atoX3aT9
tvbuoR86gyhnQZ8eh7p7K/l32hNMDiL/9yg2skyWxrtzqXc2LkdkDBaiklyidpzGD9Xo
glkEqmmHlh+PbMf1URYMZzEcs1zoLYSWmku5OQ0gpaw9yflCHjp7u8qrrfyCRliYK3I/
Tc+BFkGgrB7u8ficVc0QKkIHZrZFoWgOnbQ0s8MxrT5IfVlLG0WbP3MEYNuJZFSmG8zL
SKVQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=773qLo/LiVz4n2L5CNuvuPAbdhL5vI+dQXWkGfq22YM=;
b=JutL0wSOtSKnXJpfp0Rn/HxX7HIFKRMSUwsEChk8vd4csU2mQte1wA+/+UuChaDIbU
LAbDmBYxJhnK4nlISGat+zJubGDWfCTB+UZ0ZMedNwvo4kQ6wCMcrBVBswWbPfpCjDwr
A4KWgVBEKj1hggrx43gbsIj1+nseOCIQxhyAyqUZPXMWyh5DbkzFwS/Ofm/k9MlAtqkm
kxvprFWjiL/B2UVHRa6QH0Pd81vDVjQVhL1VANnhSRBRqVhMl9CKh6LDeEXIQ/j/SUW1
sU5WIzhVZuh5Ce8LmXpg49O0w7XWFKmLxS65P39JtYklewPiPLasFUZmGGQIW0YkgSAb
ikpA==
X-Gm-Message-State: AOAM531sNbnzQRY0viG10kk0YQk3CPKaMfG7csYITYFCtPUjz1sM9Whr
xig3F1mAxIB/3uXuYRA3nVADy29ViV4=
X-Google-Smtp-Source: ABdhPJyM3uGtugEuR+BtqyW7Pcf+QxhuRuUSNVC/LK4e6LJubLVomvaz1At1w7Vl14tsop0cWxsmMMW22Kg=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:a25:d84b:: with SMTP id p72mr22707445ybg.272.1615622282832;
Fri, 12 Mar 2021 23:58:02 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:38 -0700
In-Reply-To: <20210313075747.3781593-1-yuzhao@google.com>
Message-Id: <20210313075747.3781593-6-yuzhao@google.com>
Mime-Version: 1.0
References: <20210313075747.3781593-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 05/14] mm/swap.c: export activate_page()
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-6-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Export activate_page(), which is a merger between the existing
activate_page() and __lru_cache_activate_page(), so it can be used to
activate pages that are already on lru or queued in lru_pvecs.lru_add.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
include/linux/swap.h | 1 +
mm/swap.c | 28 +++++++++++++++-------------
2 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4cc6ec3bf0ab..de2bbbf181ba 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -344,6 +344,7 @@ extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_cpu_zone(struct zone *zone);
extern void lru_add_drain_all(void);
extern void rotate_reclaimable_page(struct page *page);
+extern void activate_page(struct page *page);
extern void deactivate_file_page(struct page *page);
extern void deactivate_page(struct page *page);
extern void mark_page_lazyfree(struct page *page);
diff --git a/mm/swap.c b/mm/swap.c
index 31b844d4ed94..f20ed56ebbbf 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -334,7 +334,7 @@ static bool need_activate_page_drain(int cpu)
return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0;
}
-static void activate_page(struct page *page)
+static void activate_page_on_lru(struct page *page)
{
page = compound_head(page);
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
@@ -354,7 +354,7 @@ static inline void activate_page_drain(int cpu)
{
}
-static void activate_page(struct page *page)
+static void activate_page_on_lru(struct page *page)
{
struct lruvec *lruvec;
@@ -368,11 +368,22 @@ static void activate_page(struct page *page)
}
#endif
-static void __lru_cache_activate_page(struct page *page)
+/*
+ * If the page is on the LRU, queue it for activation via
+ * lru_pvecs.activate_page. Otherwise, assume the page is on a
+ * pagevec, mark it active and it'll be moved to the active
+ * LRU on the next drain.
+ */
+void activate_page(struct page *page)
{
struct pagevec *pvec;
int i;
+ if (PageLRU(page)) {
+ activate_page_on_lru(page);
+ return;
+ }
+
local_lock(&lru_pvecs.lock);
pvec = this_cpu_ptr(&lru_pvecs.lru_add);
@@ -421,16 +432,7 @@ void mark_page_accessed(struct page *page)
* evictable page accessed has no effect.
*/
} else if (!PageActive(page)) {
- /*
- * If the page is on the LRU, queue it for activation via
- * lru_pvecs.activate_page. Otherwise, assume the page is on a
- * pagevec, mark it active and it'll be moved to the active
- * LRU on the next drain.
- */
- if (PageLRU(page))
- activate_page(page);
- else
- __lru_cache_activate_page(page);
+ activate_page(page);
ClearPageReferenced(page);
workingset_activation(page);
}
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,202 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id CFB78C43331
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id A37AF64F1E
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233446AbhCMH6j (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:39 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58976 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S232627AbhCMH6F (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:58:05 -0500
Received: from mail-qt1-x84a.google.com (mail-qt1-x84a.google.com [IPv6:2607:f8b0:4864:20::84a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id EF963C061574
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:04 -0800 (PST)
Received: by mail-qt1-x84a.google.com with SMTP id a16so3377560qtw.1
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:04 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=9FOrNXENvEBq+xjcXdvWYV76Tj/52PeZfpdHZpBdeTw=;
b=CZNFSLn1Vr+7g91u2WdSPY2RASOoMeGVnEu8XS9ogwps7Gq+7F5umE3fWsyowJpBWD
/BpSgazEV0uTx/142ccxmLjj6Tc5kR7KGsb79Ptj4azaGNuJBT032A7MXAqita6Xkryl
6IanFkwVS4tC+SsZAZtk+kuQzdp1pO5Pnx+cXwdQzEVQmCkWjuUnjKzoEPGf4IlnkcFb
QOa5YU3bEwcfmAFIwkc2tWsdZ2h8rKKycxzT/zHBKI265GiGZGNofgaHIdU33DduoJGc
Q3y72dvencUMJG5nccrm3sqMC+9s98wb5AiyP7gwQH8cNu/oUBPDF/k1zMnv1QbL0y4q
Wkmw==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=9FOrNXENvEBq+xjcXdvWYV76Tj/52PeZfpdHZpBdeTw=;
b=dwfL2qqtOLyHknmJLTbNquIIE99M3co4RRcjM8ZHW7E3b22r0qS7/uSdIuLxhwPvQA
Y3LL2jZN2S3rNEY31iqcQo0XPmDjtQwb13jd9vmVGL+LLS5tszC4uUapyiV0oPCH5uwL
smIUxO+PPvwuZT1NgqDpJ05pyAL6HNc5tjDAhFLsgZFN/eS2938P25wiN4+HDX1vPlLc
PXD85IpbqplkrTukFj4waHLj2xp8v/FPA9XzbBiYpaKEr1bL5w6oyKCxl6mnhZr0A9h6
m3QW2oQAPQpKG6YqSR4gB8S9wvQ5RcQOj/4jcVgdKH2lIq48/Tc9tYuJNF3HuISISSMR
K2Aw==
X-Gm-Message-State: AOAM530QAe48ahNQ/TdZi8OsNzl8PRShs8X4B4mC6ejSQSSJgG9mU0dS
C8+Z0ZLOLiEDu/n5izcyhgbt0bjgPuM=
X-Google-Smtp-Source: ABdhPJx3v9fo9fGIto8kHzW5LTGjVA6UTKRtTeuo4NwnDLlQop9mIBVZK8pHZLGtADbIHe5kSQ6HeS7hVrU=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:a0c:cb0c:: with SMTP id o12mr15467752qvk.54.1615622284101;
Fri, 12 Mar 2021 23:58:04 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:39 -0700
In-Reply-To: <20210313075747.3781593-1-yuzhao@google.com>
Message-Id: <20210313075747.3781593-7-yuzhao@google.com>
Mime-Version: 1.0
References: <20210313075747.3781593-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 06/14] mm, x86: support the access bit on non-leaf PMD entries
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-7-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Some architectures support the accessed bit on non-leaf PMD entries
(parents) in addition to leaf PTE entries (children) where pages are
mapped, e.g., x86_64 sets the accessed bit on a parent when using it
as part of linear-address translation [1]. Page table walkers who are
interested in the accessed bit on children can take advantage of this:
they do not need to search the children when the accessed bit is not
set on a parent, given that they have previously cleared the accessed
bit on this parent in addition to its children.
[1]: Intel 64 and IA-32 Architectures Software Developer's Manual
Volume 3 (October 2019), section 4.8
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
arch/Kconfig | 8 ++++++++
arch/x86/Kconfig | 1 +
arch/x86/include/asm/pgtable.h | 2 +-
arch/x86/mm/pgtable.c | 5 ++++-
include/linux/pgtable.h | 4 ++--
5 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 2bb30673d8e6..137446d17732 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -783,6 +783,14 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE
config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
bool
+config HAVE_ARCH_PARENT_PMD_YOUNG
+ bool
+ help
+ Architectures that select this are able to set the accessed bit on
+ non-leaf PMD entries in addition to leaf PTE entries where pages are
+ mapped. For them, page table walkers that clear the accessed bit may
+ stop at non-leaf PMD entries when they do not see the accessed bit.
+
config HAVE_ARCH_HUGE_VMAP
bool
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2792879d398e..b5972eb82337 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -163,6 +163,7 @@ config X86
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
+ select HAVE_ARCH_PARENT_PMD_YOUNG if X86_64
select HAVE_ARCH_USERFAULTFD_WP if X86_64 && USERFAULTFD
select HAVE_ARCH_VMAP_STACK if X86_64
select HAVE_ARCH_WITHIN_STACK_FRAMES
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a02c67291cfc..a6b5cfe1fc5a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -846,7 +846,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
static inline int pmd_bad(pmd_t pmd)
{
- return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
+ return ((pmd_flags(pmd) | _PAGE_ACCESSED) & ~_PAGE_USER) != _KERNPG_TABLE;
}
static inline unsigned long pages_to_mb(unsigned long npg)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index f6a9e2e36642..1c27e6f43f80 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
return ret;
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG)
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
@@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
return ret;
}
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pudp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pud_t *pudp)
{
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 5e772392a379..08dd9b8c055a 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -193,7 +193,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
#endif
#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG)
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
pmd_t *pmdp)
@@ -214,7 +214,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
BUILD_BUG();
return 0;
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG */
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,144 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id E3438C43332
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id C990264F1D
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233489AbhCMH6l (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:41 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58984 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S232702AbhCMH6G (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:58:06 -0500
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 3F2FEC061574
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:06 -0800 (PST)
Received: by mail-yb1-xb49.google.com with SMTP id j4so31727869ybt.23
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:06 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=sM28HXat3Ro78N+fdELj5tA2TTORYcmSJRB7y0xn668=;
b=TkMSfULPVVqqEs8nJoS183e8mdw8yepLBTDn2eNINUqggav0COpVRzISc3zd89NvN9
G1/R3rFQNbXB4Mc0QhWaGTD1Fq53asLndQ1E2i8CSZ95KHVj8MUNkswp9uk2yDDtfErk
Sbw+4/WN0WzNqVfegG6JQMHiPP9iXEqGFYVgSZLK8flNYafDsz7FgT4K+/4AxxZLFwj6
mhv5FnKYJxQDSuEX1b3S54OYObhuyWJnGpPGLfPwfJ/quLWGTNO+ZIAeb5KMAmBdnEKV
rF1QTgCoPPMsWsShNKe8BsfaSKCMVGiSbi90ZFx8J0HHcn4GpbXrkYUuLSCcm0F4KYI7
fQQw==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=sM28HXat3Ro78N+fdELj5tA2TTORYcmSJRB7y0xn668=;
b=RiLWz/T/zcexV2Bsbw7YiZb7BINvMGTgl79XdA6B/OHzniNtpaj8nGzPvt6kerrgat
ur0TSQPmxqyehHvbKNQ7mbs/PTvniAuwd45+Ub0Vv+9NgoF9uTsNfAK7Vd4jQHOWOvhm
m1E6fZ25eW7KaMDTCBmRrCvIXIblvOe8PuRRsZqiT991bZ5mnSFynpVrpRLetwouPyGk
pSnxWX8BcM/e6hZIrC3KPP2PeXLt4ehMvZ9h9fIeH0DezONgDMKpjc0TdLhdm0Z+EAPP
gLoBDsqcG45NgWLNP2KcFrcDXtjwD+f9JAiMZvrNFb7RI7F78RYvzjAuegAab8YW/u+o
Nkzg==
X-Gm-Message-State: AOAM530brY8KWy4SQHMkv1r6LKzek0Cq4xuuj7kcDTW0m3zhQz3OLAJt
GTHrB+1mfPD6eAMjZFoSYPQ7AwnDTeo=
X-Google-Smtp-Source: ABdhPJzZYSt4fmXD7VdJ6T+CnbKC2/TjZT+i4G4gEva1Omiq/qzC/4JrVYLGPq2Q22anUOeAEZWsql+skQ8=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:a25:2308:: with SMTP id j8mr24384456ybj.474.1615622285438;
Fri, 12 Mar 2021 23:58:05 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:40 -0700
In-Reply-To: <20210313075747.3781593-1-yuzhao@google.com>
Message-Id: <20210313075747.3781593-8-yuzhao@google.com>
Mime-Version: 1.0
References: <20210313075747.3781593-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 07/14] mm/pagewalk.c: add pud_entry_post() for post-order traversals
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-8-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Add a new callback pud_entry_post() to struct mm_walk_ops so that page
table walkers can visit the non-leaf PMD entries of a PUD entry after
they have visited with the leaf PTE entries. This allows page table
walkers who clear the accessed bit to take advantage of the last
commit, in a similar way walk_pte_range() works for the PTE entries of
a PMD entry: they only need to take PTL once to search all the child
entries of a parent entry.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
include/linux/pagewalk.h | 4 ++++
mm/pagewalk.c | 5 +++++
2 files changed, 9 insertions(+)
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index b1cb6b753abb..2b68ae9d27d3 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -11,6 +11,8 @@ struct mm_walk;
* @pgd_entry: if set, called for each non-empty PGD (top-level) entry
* @p4d_entry: if set, called for each non-empty P4D entry
* @pud_entry: if set, called for each non-empty PUD entry
+ * @pud_entry_post: if set, called for each non-empty PUD entry after
+ * pmd_entry is called, for post-order traversal.
* @pmd_entry: if set, called for each non-empty PMD entry
* this handler is required to be able to handle
* pmd_trans_huge() pmds. They may simply choose to
@@ -41,6 +43,8 @@ struct mm_walk_ops {
unsigned long next, struct mm_walk *walk);
int (*pud_entry)(pud_t *pud, unsigned long addr,
unsigned long next, struct mm_walk *walk);
+ int (*pud_entry_post)(pud_t *pud, unsigned long addr,
+ unsigned long next, struct mm_walk *walk);
int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
unsigned long next, struct mm_walk *walk);
int (*pte_entry)(pte_t *pte, unsigned long addr,
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index e81640d9f177..8ed1533f7eda 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -160,6 +160,11 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
err = walk_pmd_range(pud, addr, next, walk);
if (err)
break;
+
+ if (ops->pud_entry_post)
+ err = ops->pud_entry_post(pud, addr, next, walk);
+ if (err)
+ break;
} while (pud++, addr = next, addr != end);
return err;
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,311 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id CBC4DC4332D
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id B8A3D64ECE
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233468AbhCMH6l (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:41 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58990 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S232705AbhCMH6I (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:58:08 -0500
Received: from mail-qk1-x749.google.com (mail-qk1-x749.google.com [IPv6:2607:f8b0:4864:20::749])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A5BF1C061761
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:07 -0800 (PST)
Received: by mail-qk1-x749.google.com with SMTP id i188so19867209qkd.7
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:07 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=Y7HI6Iu8cEpN4/cKvfcV1g+GhNxN+RuODBCey3P9ub4=;
b=Yxxt5mdzkudc9JV5Q09FDlj0cvp1kmfCXxaVjGD5K0eCTz/So1vsV5U0rgmXDgA+oo
I0A6T+8UZJmQiHjXDXECTBckUL0jnXMRgRm7XVaG53YB34iWjqO40F4CKKEh9+/OoVRc
G0AmzhscUbBJ/kam3+ejsn6XT8HmEmL1E1JV5ccYQd+ZntwFpRDTuwloc8JJwbzlOPz6
/TmQCN3DlJawauHo+hK8esqlLBqlUo0Hc3ctZf6lI6jfTJ6xP19kgHg147e6ddt/0Gru
4TRJ0LLQctTAhahAckLyUPLY8X0ofJQRXl5IFVx653ZZtEjcg5D9kIYZI+E7lddJGHTM
WCSQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=Y7HI6Iu8cEpN4/cKvfcV1g+GhNxN+RuODBCey3P9ub4=;
b=IhPlACmRhDBTPdmCHr0Oo1gV9hWUVpXkJQAh9lb+wytYYH7q3Kq7e+gKEZ0WvXAxP5
HCkzZEXo0KeDSrSNfpNxh8YUrLoNxRyc67U6jB0GE7MI8jH4hpyANf1w/gGK8VfBJVP6
HEd0MgT957tjxC4w5s2bFihlDvVujIJLQPYS+KO24VxvtbXLbjOayAHo8fH37VLxQ9U2
nOkP5ADzrpG+7bfsI2JutdLL5J6KjOX/ix5YTdFiiS1XlDgF+UDcwUkwnju04+gULHCK
SWuoOrT0DOfqlcQ4ome3RlXCB1UU/RCIPUUA06Y1a7h09m5zX6C1pqv0hnFWEk1Ehj8t
sE+g==
X-Gm-Message-State: AOAM533FjF5wfsmOCevkWqM2Zy0aRGz/n1+9ldB7hstZ21MlGbAcnU9S
iwofL/8D5KT2K6tTLoKvMx4do3+86Ss=
X-Google-Smtp-Source: ABdhPJwaHhrMfJqAPFj0pJIH6XbHZzHvvKGA6xcQa4GVhmvHaXMnKKa3+FEHfHSk+8gEmFOMqKskjMN6bjo=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:ad4:50c7:: with SMTP id e7mr1747988qvq.58.1615622286760;
Fri, 12 Mar 2021 23:58:06 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:41 -0700
In-Reply-To: <20210313075747.3781593-1-yuzhao@google.com>
Message-Id: <20210313075747.3781593-9-yuzhao@google.com>
Mime-Version: 1.0
References: <20210313075747.3781593-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 08/14] mm/vmscan.c: refactor shrink_node()
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-9-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Heuristics in shrink_node() are rather independent and can be
refactored into a separate function to improve readability.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
mm/vmscan.c | 186 +++++++++++++++++++++++++++-------------------------
1 file changed, 98 insertions(+), 88 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 562e87cbd7a1..1a24d2e0a4cb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2224,6 +2224,103 @@ enum scan_balance {
SCAN_FILE,
};
+static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
+{
+ unsigned long file;
+ struct lruvec *target_lruvec;
+
+ target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
+
+ /*
+ * Determine the scan balance between anon and file LRUs.
+ */
+ spin_lock_irq(&target_lruvec->lru_lock);
+ sc->anon_cost = target_lruvec->anon_cost;
+ sc->file_cost = target_lruvec->file_cost;
+ spin_unlock_irq(&target_lruvec->lru_lock);
+
+ /*
+ * Target desirable inactive:active list ratios for the anon
+ * and file LRU lists.
+ */
+ if (!sc->force_deactivate) {
+ unsigned long refaults;
+
+ refaults = lruvec_page_state(target_lruvec,
+ WORKINGSET_ACTIVATE_ANON);
+ if (refaults != target_lruvec->refaults[0] ||
+ inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
+ sc->may_deactivate |= DEACTIVATE_ANON;
+ else
+ sc->may_deactivate &= ~DEACTIVATE_ANON;
+
+ /*
+ * When refaults are being observed, it means a new
+ * workingset is being established. Deactivate to get
+ * rid of any stale active pages quickly.
+ */
+ refaults = lruvec_page_state(target_lruvec,
+ WORKINGSET_ACTIVATE_FILE);
+ if (refaults != target_lruvec->refaults[1] ||
+ inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
+ sc->may_deactivate |= DEACTIVATE_FILE;
+ else
+ sc->may_deactivate &= ~DEACTIVATE_FILE;
+ } else
+ sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
+
+ /*
+ * If we have plenty of inactive file pages that aren't
+ * thrashing, try to reclaim those first before touching
+ * anonymous pages.
+ */
+ file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
+ if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
+ sc->cache_trim_mode = 1;
+ else
+ sc->cache_trim_mode = 0;
+
+ /*
+ * Prevent the reclaimer from falling into the cache trap: as
+ * cache pages start out inactive, every cache fault will tip
+ * the scan balance towards the file LRU. And as the file LRU
+ * shrinks, so does the window for rotation from references.
+ * This means we have a runaway feedback loop where a tiny
+ * thrashing file LRU becomes infinitely more attractive than
+ * anon pages. Try to detect this based on file LRU size.
+ */
+ if (!cgroup_reclaim(sc)) {
+ unsigned long total_high_wmark = 0;
+ unsigned long free, anon;
+ int z;
+
+ free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
+ file = node_page_state(pgdat, NR_ACTIVE_FILE) +
+ node_page_state(pgdat, NR_INACTIVE_FILE);
+
+ for (z = 0; z < MAX_NR_ZONES; z++) {
+ struct zone *zone = &pgdat->node_zones[z];
+
+ if (!managed_zone(zone))
+ continue;
+
+ total_high_wmark += high_wmark_pages(zone);
+ }
+
+ /*
+ * Consider anon: if that's low too, this isn't a
+ * runaway file reclaim problem, but rather just
+ * extreme pressure. Reclaim as per usual then.
+ */
+ anon = node_page_state(pgdat, NR_INACTIVE_ANON);
+
+ sc->file_is_tiny =
+ file + free <= total_high_wmark &&
+ !(sc->may_deactivate & DEACTIVATE_ANON) &&
+ anon >> sc->priority;
+ }
+}
+
/*
* Determine how aggressively the anon and file LRU lists should be
* scanned. The relative value of each set of LRU lists is determined
@@ -2669,7 +2766,6 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
unsigned long nr_reclaimed, nr_scanned;
struct lruvec *target_lruvec;
bool reclaimable = false;
- unsigned long file;
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
@@ -2679,93 +2775,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
nr_reclaimed = sc->nr_reclaimed;
nr_scanned = sc->nr_scanned;
- /*
- * Determine the scan balance between anon and file LRUs.
- */
- spin_lock_irq(&target_lruvec->lru_lock);
- sc->anon_cost = target_lruvec->anon_cost;
- sc->file_cost = target_lruvec->file_cost;
- spin_unlock_irq(&target_lruvec->lru_lock);
-
- /*
- * Target desirable inactive:active list ratios for the anon
- * and file LRU lists.
- */
- if (!sc->force_deactivate) {
- unsigned long refaults;
-
- refaults = lruvec_page_state(target_lruvec,
- WORKINGSET_ACTIVATE_ANON);
- if (refaults != target_lruvec->refaults[0] ||
- inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
- sc->may_deactivate |= DEACTIVATE_ANON;
- else
- sc->may_deactivate &= ~DEACTIVATE_ANON;
-
- /*
- * When refaults are being observed, it means a new
- * workingset is being established. Deactivate to get
- * rid of any stale active pages quickly.
- */
- refaults = lruvec_page_state(target_lruvec,
- WORKINGSET_ACTIVATE_FILE);
- if (refaults != target_lruvec->refaults[1] ||
- inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
- sc->may_deactivate |= DEACTIVATE_FILE;
- else
- sc->may_deactivate &= ~DEACTIVATE_FILE;
- } else
- sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
-
- /*
- * If we have plenty of inactive file pages that aren't
- * thrashing, try to reclaim those first before touching
- * anonymous pages.
- */
- file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
- if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
- sc->cache_trim_mode = 1;
- else
- sc->cache_trim_mode = 0;
-
- /*
- * Prevent the reclaimer from falling into the cache trap: as
- * cache pages start out inactive, every cache fault will tip
- * the scan balance towards the file LRU. And as the file LRU
- * shrinks, so does the window for rotation from references.
- * This means we have a runaway feedback loop where a tiny
- * thrashing file LRU becomes infinitely more attractive than
- * anon pages. Try to detect this based on file LRU size.
- */
- if (!cgroup_reclaim(sc)) {
- unsigned long total_high_wmark = 0;
- unsigned long free, anon;
- int z;
-
- free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
- file = node_page_state(pgdat, NR_ACTIVE_FILE) +
- node_page_state(pgdat, NR_INACTIVE_FILE);
-
- for (z = 0; z < MAX_NR_ZONES; z++) {
- struct zone *zone = &pgdat->node_zones[z];
- if (!managed_zone(zone))
- continue;
-
- total_high_wmark += high_wmark_pages(zone);
- }
-
- /*
- * Consider anon: if that's low too, this isn't a
- * runaway file reclaim problem, but rather just
- * extreme pressure. Reclaim as per usual then.
- */
- anon = node_page_state(pgdat, NR_INACTIVE_ANON);
-
- sc->file_is_tiny =
- file + free <= total_high_wmark &&
- !(sc->may_deactivate & DEACTIVATE_ANON) &&
- anon >> sc->priority;
- }
+ prepare_scan_count(pgdat, sc);
shrink_node_memcgs(pgdat, sc);
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,749 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 0360FC4332E
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:07 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id E377164F1E
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:06 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233512AbhCMH6m (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:42 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58996 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S232709AbhCMH6J (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:58:09 -0500
Received: from mail-qk1-x749.google.com (mail-qk1-x749.google.com [IPv6:2607:f8b0:4864:20::749])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E0386C061574
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:08 -0800 (PST)
Received: by mail-qk1-x749.google.com with SMTP id h134so19965073qke.1
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:08 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=7aNjMZwXzkZhXFvRmRWQJ8BQ+8lusUpXBFHauhX2ubg=;
b=rXTENBp2Eom7kHIkgQlwaM0zAjOFi5gmzyQ9fUwZQJp4tjb12IZlxofeTMBMfAGa/r
L4Ghc3L00KAFuV2LaP7uyJH7AsU4qMsIGq1k1CkPhOMdO7EV4BDTbgd4vEf68FTu94xi
vXrlWZYOskUlSCRkRuZtatqD65DIQyZkjMKdk2hKBnk6QJmcpPB6RWsgv88u3qVxEBZx
iiEhsjInkvzH6qtUYApIn6cqLI7Fd+8G1HrkDMmx13q4PXkdeunv1Az0GMeKsUNzGYYs
e4N6HA5c9v+Un/TrJ4yGhAbzwYgJSTU4Xrzr5/9QWPKNcFUkXgh0KosPnfdwtEviCEBv
6pmg==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=7aNjMZwXzkZhXFvRmRWQJ8BQ+8lusUpXBFHauhX2ubg=;
b=YWegDePJI84B+MDTMEsT3ncwRQ6xqfLVLoxaVcjlpDCwizTWwtdyaGlleP04JZ5N+5
AkfxqK0DQUXAyBBm7v7dckpiAm/jUldpE5n9Uh4ZmUf6oJt35HCt8C2aWgCxnV6YH3xL
86xAS1GV/vZb1DaCjG3Fa6mqH6EJPd/c9xZPVGxYnPYoLGKZDyI2j04nHrPyKye2NZMZ
HQohID0ijQkEqVBAuj4H9CyNS/XwXkM24UKVFyYk6hKAmmMp/Na4vDYy5LN2rRariQhj
uPMou6WiKPb6Ph6mcd35an6LnCUHgzKHE2Tu+AnxXcrqTu9ijLJ5E6/FY2cCcrIEXK7S
VoHQ==
X-Gm-Message-State: AOAM530An7ZbSyg9gdVA41zNIb/3lnpDD6ALzchYWG8q+sQY4pqkltfs
r5qU0vNqoKeYC+YKblti/xtfb5zboKM=
X-Google-Smtp-Source: ABdhPJxXAZ3oX16iFANwEv0b7ybctnsmZAK24tAjZNdUXRU2Fm50sCFyGpIqOrn3ll+aJz5mcnB7+tMAtOQ=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:a0c:b8a3:: with SMTP id y35mr15828724qvf.23.1615622288067;
Fri, 12 Mar 2021 23:58:08 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:42 -0700
In-Reply-To: <20210313075747.3781593-1-yuzhao@google.com>
Message-Id: <20210313075747.3781593-10-yuzhao@google.com>
Mime-Version: 1.0
References: <20210313075747.3781593-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 09/14] mm: multigenerational lru: mm_struct list
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-10-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Add an infrastructure that maintains either a system-wide mm_struct
list or per-memcg mm_struct lists. Multiple threads can concurrently
work on the same mm_struct list, and each of them will be given a
different mm_struct. Those who finish early can optionally wait on the
rest after the iterator has reached the end of the list.
This infrastructure also tracks whether an mm_struct is being used on
any CPUs or has been used since the last time a worker looked at it.
In other words, workers will not be given an mm_struct that belongs to
a process that has been sleeping.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
fs/exec.c | 2 +
include/linux/memcontrol.h | 4 +
include/linux/mm_types.h | 135 +++++++++++++++++++
include/linux/mmzone.h | 2 -
kernel/exit.c | 1 +
kernel/fork.c | 10 ++
kernel/kthread.c | 1 +
kernel/sched/core.c | 2 +
mm/memcontrol.c | 28 ++++
mm/vmscan.c | 263 +++++++++++++++++++++++++++++++++++++
10 files changed, 446 insertions(+), 2 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c
index 18594f11c31f..c691d4d7720c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1008,6 +1008,7 @@ static int exec_mmap(struct mm_struct *mm)
active_mm = tsk->active_mm;
tsk->active_mm = mm;
tsk->mm = mm;
+ lru_gen_add_mm(mm);
/*
* This prevents preemption while active_mm is being loaded and
* it and mm are being updated, which could cause problems for
@@ -1018,6 +1019,7 @@ static int exec_mmap(struct mm_struct *mm)
if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
local_irq_enable();
activate_mm(active_mm, mm);
+ lru_gen_switch_mm(active_mm, mm);
if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
local_irq_enable();
tsk->mm->vmacache_seqnum = 0;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f325aeb4b4e8..591557c5b7e2 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -335,6 +335,10 @@ struct mem_cgroup {
struct deferred_split deferred_split_queue;
#endif
+#ifdef CONFIG_LRU_GEN
+ struct lru_gen_mm_list *mm_list;
+#endif
+
struct mem_cgroup_per_node *nodeinfo[0];
/* WARNING: nodeinfo must be the last member here */
};
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0974ad501a47..b8a038a016f2 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -15,6 +15,8 @@
#include <linux/page-flags-layout.h>
#include <linux/workqueue.h>
#include <linux/seqlock.h>
+#include <linux/nodemask.h>
+#include <linux/mmdebug.h>
#include <asm/mmu.h>
@@ -382,6 +384,8 @@ struct core_state {
struct completion startup;
};
+#define ANON_AND_FILE 2
+
struct kioctx_table;
struct mm_struct {
struct {
@@ -560,6 +564,22 @@ struct mm_struct {
#ifdef CONFIG_IOMMU_SUPPORT
u32 pasid;
+#endif
+#ifdef CONFIG_LRU_GEN
+ struct {
+ /* node of a global or per-memcg mm list */
+ struct list_head list;
+#ifdef CONFIG_MEMCG
+ /* points to memcg of the owner task above */
+ struct mem_cgroup *memcg;
+#endif
+ /* indicates this mm has been used since last walk */
+ nodemask_t nodes[ANON_AND_FILE];
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ /* number of cpus that are using this mm */
+ atomic_t nr_cpus;
+#endif
+ } lru_gen;
#endif
} __randomize_layout;
@@ -587,6 +607,121 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
return (struct cpumask *)&mm->cpu_bitmap;
}
+#ifdef CONFIG_LRU_GEN
+
+struct lru_gen_mm_list {
+ /* head of a global or per-memcg mm list */
+ struct list_head head;
+ /* protects the list */
+ spinlock_t lock;
+ struct {
+ /* set to max_seq after each round of walk */
+ unsigned long cur_seq;
+ /* next mm on the list to walk */
+ struct list_head *iter;
+ /* to wait for last worker to finish */
+ struct wait_queue_head wait;
+ /* number of concurrent workers */
+ int nr_workers;
+ } nodes[0];
+};
+
+void lru_gen_init_mm(struct mm_struct *mm);
+void lru_gen_add_mm(struct mm_struct *mm);
+void lru_gen_del_mm(struct mm_struct *mm);
+#ifdef CONFIG_MEMCG
+int lru_gen_alloc_mm_list(struct mem_cgroup *memcg);
+void lru_gen_free_mm_list(struct mem_cgroup *memcg);
+void lru_gen_migrate_mm(struct mm_struct *mm);
+#endif
+
+/*
+ * Track usage so mms that haven't been used since last walk can be skipped.
+ *
+ * This function introduces a theoretical overhead for each mm switch, but it
+ * hasn't been measurable.
+ */
+static inline void lru_gen_switch_mm(struct mm_struct *old, struct mm_struct *new)
+{
+ int file;
+
+ /* exclude init_mm, efi_mm, etc. */
+ if (!core_kernel_data((unsigned long)old)) {
+ VM_BUG_ON(old == &init_mm);
+
+ for (file = 0; file < ANON_AND_FILE; file++)
+ nodes_setall(old->lru_gen.nodes[file]);
+
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ atomic_dec(&old->lru_gen.nr_cpus);
+ VM_BUG_ON_MM(atomic_read(&old->lru_gen.nr_cpus) < 0, old);
+#endif
+ } else
+ VM_BUG_ON_MM(READ_ONCE(old->lru_gen.list.prev) ||
+ READ_ONCE(old->lru_gen.list.next), old);
+
+ if (!core_kernel_data((unsigned long)new)) {
+ VM_BUG_ON(new == &init_mm);
+
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ atomic_inc(&new->lru_gen.nr_cpus);
+ VM_BUG_ON_MM(atomic_read(&new->lru_gen.nr_cpus) < 0, new);
+#endif
+ } else
+ VM_BUG_ON_MM(READ_ONCE(new->lru_gen.list.prev) ||
+ READ_ONCE(new->lru_gen.list.next), new);
+}
+
+/* Returns whether the mm is being used on any cpus. */
+static inline bool lru_gen_mm_is_active(struct mm_struct *mm)
+{
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ return !cpumask_empty(mm_cpumask(mm));
+#else
+ return atomic_read(&mm->lru_gen.nr_cpus);
+#endif
+}
+
+#else /* CONFIG_LRU_GEN */
+
+static inline void lru_gen_init_mm(struct mm_struct *mm)
+{
+}
+
+static inline void lru_gen_add_mm(struct mm_struct *mm)
+{
+}
+
+static inline void lru_gen_del_mm(struct mm_struct *mm)
+{
+}
+
+#ifdef CONFIG_MEMCG
+static inline int lru_gen_alloc_mm_list(struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
+static inline void lru_gen_free_mm_list(struct mem_cgroup *memcg)
+{
+}
+
+static inline void lru_gen_migrate_mm(struct mm_struct *mm)
+{
+}
+#endif
+
+static inline void lru_gen_switch_mm(struct mm_struct *old, struct mm_struct *new)
+{
+}
+
+static inline bool lru_gen_mm_is_active(struct mm_struct *mm)
+{
+ return false;
+}
+
+#endif /* CONFIG_LRU_GEN */
+
struct mmu_gather;
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 47946cec7584..a99a1050565a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -285,8 +285,6 @@ static inline bool is_active_lru(enum lru_list lru)
return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
}
-#define ANON_AND_FILE 2
-
enum lruvec_flags {
LRUVEC_CONGESTED, /* lruvec has many dirty pages
* backed by a congested BDI
diff --git a/kernel/exit.c b/kernel/exit.c
index 04029e35e69a..e4292717ce37 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -422,6 +422,7 @@ void mm_update_next_owner(struct mm_struct *mm)
goto retry;
}
WRITE_ONCE(mm->owner, c);
+ lru_gen_migrate_mm(mm);
task_unlock(c);
put_task_struct(c);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index d3171e8e88e5..e261b797955d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -665,6 +665,7 @@ static void check_mm(struct mm_struct *mm)
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
#endif
+ VM_BUG_ON_MM(lru_gen_mm_is_active(mm), mm);
}
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
@@ -1047,6 +1048,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
goto fail_nocontext;
mm->user_ns = get_user_ns(user_ns);
+ lru_gen_init_mm(mm);
return mm;
fail_nocontext:
@@ -1089,6 +1091,7 @@ static inline void __mmput(struct mm_struct *mm)
}
if (mm->binfmt)
module_put(mm->binfmt->module);
+ lru_gen_del_mm(mm);
mmdrop(mm);
}
@@ -2513,6 +2516,13 @@ pid_t kernel_clone(struct kernel_clone_args *args)
get_task_struct(p);
}
+ if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
+ /* lock p to synchronize with memcg migration */
+ task_lock(p);
+ lru_gen_add_mm(p->mm);
+ task_unlock(p);
+ }
+
wake_up_new_task(p);
/* forking complete and child started to run, tell ptracer */
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 1578973c5740..8da7767bb06a 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1303,6 +1303,7 @@ void kthread_use_mm(struct mm_struct *mm)
tsk->mm = mm;
membarrier_update_current_mm(mm);
switch_mm_irqs_off(active_mm, mm, tsk);
+ lru_gen_switch_mm(active_mm, mm);
local_irq_enable();
task_unlock(tsk);
#ifdef finish_arch_post_lock_switch
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ca2bb629595f..56274a14ce09 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4308,6 +4308,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
* finish_task_switch()'s mmdrop().
*/
switch_mm_irqs_off(prev->active_mm, next->mm, next);
+ lru_gen_switch_mm(prev->active_mm, next->mm);
if (!prev->mm) { // from kernel
/* will mmdrop() in finish_task_switch(). */
@@ -7599,6 +7600,7 @@ void idle_task_exit(void)
if (mm != &init_mm) {
switch_mm(mm, &init_mm, current);
+ lru_gen_switch_mm(mm, &init_mm);
finish_arch_post_lock_switch();
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 845eec01ef9d..5836780fe138 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5209,6 +5209,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->vmstats_percpu);
free_percpu(memcg->vmstats_local);
+ lru_gen_free_mm_list(memcg);
kfree(memcg);
}
@@ -5261,6 +5262,9 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (alloc_mem_cgroup_per_node_info(memcg, node))
goto fail;
+ if (lru_gen_alloc_mm_list(memcg))
+ goto fail;
+
if (memcg_wb_domain_init(memcg, GFP_KERNEL))
goto fail;
@@ -6165,6 +6169,29 @@ static void mem_cgroup_move_task(void)
}
#endif
+#ifdef CONFIG_LRU_GEN
+static void mem_cgroup_attach(struct cgroup_taskset *tset)
+{
+ struct cgroup_subsys_state *css;
+ struct task_struct *task = NULL;
+
+ cgroup_taskset_for_each_leader(task, css, tset)
+ ;
+
+ if (!task)
+ return;
+
+ task_lock(task);
+ if (task->mm && task->mm->owner == task)
+ lru_gen_migrate_mm(task->mm);
+ task_unlock(task);
+}
+#else
+static void mem_cgroup_attach(struct cgroup_taskset *tset)
+{
+}
+#endif
+
static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
{
if (value == PAGE_COUNTER_MAX)
@@ -6505,6 +6532,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_free = mem_cgroup_css_free,
.css_reset = mem_cgroup_css_reset,
.can_attach = mem_cgroup_can_attach,
+ .attach = mem_cgroup_attach,
.cancel_attach = mem_cgroup_cancel_attach,
.post_attach = mem_cgroup_move_task,
.dfl_cftypes = memory_files,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1a24d2e0a4cb..f7657ab0d4b7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4314,3 +4314,266 @@ void check_move_unevictable_pages(struct pagevec *pvec)
}
}
EXPORT_SYMBOL_GPL(check_move_unevictable_pages);
+
+#ifdef CONFIG_LRU_GEN
+
+/******************************************************************************
+ * global and per-memcg mm list
+ ******************************************************************************/
+
+/*
+ * After pages are faulted in, they become the youngest generation. They must
+ * go through aging process twice before they can be evicted. After first scan,
+ * their accessed bit set during initial faults are cleared and they become the
+ * second youngest generation. And second scan makes sure they haven't been used
+ * since the first.
+ */
+#define MIN_NR_GENS 2
+
+static struct lru_gen_mm_list *global_mm_list;
+
+static struct lru_gen_mm_list *alloc_mm_list(void)
+{
+ int nid;
+ struct lru_gen_mm_list *mm_list;
+
+ mm_list = kzalloc(struct_size(mm_list, nodes, nr_node_ids), GFP_KERNEL);
+ if (!mm_list)
+ return NULL;
+
+ INIT_LIST_HEAD(&mm_list->head);
+ spin_lock_init(&mm_list->lock);
+
+ for_each_node(nid) {
+ mm_list->nodes[nid].cur_seq = MIN_NR_GENS - 1;
+ mm_list->nodes[nid].iter = &mm_list->head;
+ init_waitqueue_head(&mm_list->nodes[nid].wait);
+ }
+
+ return mm_list;
+}
+
+static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg)
+{
+#ifdef CONFIG_MEMCG
+ if (!mem_cgroup_disabled())
+ return memcg ? memcg->mm_list : root_mem_cgroup->mm_list;
+#endif
+ VM_BUG_ON(memcg);
+
+ return global_mm_list;
+}
+
+void lru_gen_init_mm(struct mm_struct *mm)
+{
+ int file;
+
+ INIT_LIST_HEAD(&mm->lru_gen.list);
+#ifdef CONFIG_MEMCG
+ mm->lru_gen.memcg = NULL;
+#endif
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ atomic_set(&mm->lru_gen.nr_cpus, 0);
+#endif
+ for (file = 0; file < ANON_AND_FILE; file++)
+ nodes_clear(mm->lru_gen.nodes[file]);
+}
+
+void lru_gen_add_mm(struct mm_struct *mm)
+{
+ struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
+ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
+
+ VM_BUG_ON_MM(!list_empty(&mm->lru_gen.list), mm);
+#ifdef CONFIG_MEMCG
+ VM_BUG_ON_MM(mm->lru_gen.memcg, mm);
+ WRITE_ONCE(mm->lru_gen.memcg, memcg);
+#endif
+ spin_lock(&mm_list->lock);
+ list_add_tail(&mm->lru_gen.list, &mm_list->head);
+ spin_unlock(&mm_list->lock);
+}
+
+void lru_gen_del_mm(struct mm_struct *mm)
+{
+ int nid;
+#ifdef CONFIG_MEMCG
+ struct lru_gen_mm_list *mm_list = get_mm_list(mm->lru_gen.memcg);
+#else
+ struct lru_gen_mm_list *mm_list = get_mm_list(NULL);
+#endif
+
+ spin_lock(&mm_list->lock);
+
+ for_each_node(nid) {
+ if (mm_list->nodes[nid].iter != &mm->lru_gen.list)
+ continue;
+
+ mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next;
+ if (mm_list->nodes[nid].iter == &mm_list->head)
+ WRITE_ONCE(mm_list->nodes[nid].cur_seq,
+ mm_list->nodes[nid].cur_seq + 1);
+ }
+
+ list_del_init(&mm->lru_gen.list);
+
+ spin_unlock(&mm_list->lock);
+
+#ifdef CONFIG_MEMCG
+ mem_cgroup_put(mm->lru_gen.memcg);
+ WRITE_ONCE(mm->lru_gen.memcg, NULL);
+#endif
+}
+
+#ifdef CONFIG_MEMCG
+int lru_gen_alloc_mm_list(struct mem_cgroup *memcg)
+{
+ if (mem_cgroup_disabled())
+ return 0;
+
+ memcg->mm_list = alloc_mm_list();
+
+ return memcg->mm_list ? 0 : -ENOMEM;
+}
+
+void lru_gen_free_mm_list(struct mem_cgroup *memcg)
+{
+ kfree(memcg->mm_list);
+ memcg->mm_list = NULL;
+}
+
+void lru_gen_migrate_mm(struct mm_struct *mm)
+{
+ struct mem_cgroup *memcg;
+
+ lockdep_assert_held(&mm->owner->alloc_lock);
+
+ if (mem_cgroup_disabled())
+ return;
+
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(mm->owner);
+ rcu_read_unlock();
+ if (memcg == mm->lru_gen.memcg)
+ return;
+
+ VM_BUG_ON_MM(!mm->lru_gen.memcg, mm);
+ VM_BUG_ON_MM(list_empty(&mm->lru_gen.list), mm);
+
+ lru_gen_del_mm(mm);
+ lru_gen_add_mm(mm);
+}
+
+static bool mm_has_migrated(struct mm_struct *mm, struct mem_cgroup *memcg)
+{
+ return READ_ONCE(mm->lru_gen.memcg) != memcg;
+}
+#else
+static bool mm_has_migrated(struct mm_struct *mm, struct mem_cgroup *memcg)
+{
+ return false;
+}
+#endif
+
+static bool should_skip_mm(struct mm_struct *mm, int nid, int swappiness)
+{
+ int file;
+ unsigned long size = 0;
+
+ if (mm_is_oom_victim(mm))
+ return true;
+
+ for (file = !swappiness; file < ANON_AND_FILE; file++) {
+ if (lru_gen_mm_is_active(mm) || node_isset(nid, mm->lru_gen.nodes[file]))
+ size += file ? get_mm_counter(mm, MM_FILEPAGES) :
+ get_mm_counter(mm, MM_ANONPAGES) +
+ get_mm_counter(mm, MM_SHMEMPAGES);
+ }
+
+ if (size < SWAP_CLUSTER_MAX)
+ return true;
+
+ return !mmget_not_zero(mm);
+}
+
+/* To support multiple workers that concurrently walk mm list. */
+static bool get_next_mm(struct lruvec *lruvec, unsigned long next_seq,
+ int swappiness, struct mm_struct **iter)
+{
+ bool last = true;
+ struct mm_struct *mm = NULL;
+ int nid = lruvec_pgdat(lruvec)->node_id;
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
+ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
+
+ if (*iter)
+ mmput_async(*iter);
+ else if (next_seq <= READ_ONCE(mm_list->nodes[nid].cur_seq))
+ return false;
+
+ spin_lock(&mm_list->lock);
+
+ VM_BUG_ON(next_seq > mm_list->nodes[nid].cur_seq + 1);
+ VM_BUG_ON(*iter && next_seq < mm_list->nodes[nid].cur_seq);
+ VM_BUG_ON(*iter && !mm_list->nodes[nid].nr_workers);
+
+ if (next_seq <= mm_list->nodes[nid].cur_seq) {
+ last = *iter;
+ goto done;
+ }
+
+ if (mm_list->nodes[nid].iter == &mm_list->head) {
+ VM_BUG_ON(*iter || mm_list->nodes[nid].nr_workers);
+ mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next;
+ }
+
+ while (!mm && mm_list->nodes[nid].iter != &mm_list->head) {
+ mm = list_entry(mm_list->nodes[nid].iter, struct mm_struct, lru_gen.list);
+ mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next;
+ if (should_skip_mm(mm, nid, swappiness))
+ mm = NULL;
+ }
+
+ if (mm_list->nodes[nid].iter == &mm_list->head)
+ WRITE_ONCE(mm_list->nodes[nid].cur_seq,
+ mm_list->nodes[nid].cur_seq + 1);
+done:
+ if (*iter && !mm)
+ mm_list->nodes[nid].nr_workers--;
+ if (!*iter && mm)
+ mm_list->nodes[nid].nr_workers++;
+
+ last = last && !mm_list->nodes[nid].nr_workers &&
+ mm_list->nodes[nid].iter == &mm_list->head;
+
+ spin_unlock(&mm_list->lock);
+
+ *iter = mm;
+
+ return last;
+}
+
+/******************************************************************************
+ * initialization
+ ******************************************************************************/
+
+static int __init init_lru_gen(void)
+{
+ if (mem_cgroup_disabled()) {
+ global_mm_list = alloc_mm_list();
+ if (!global_mm_list) {
+ pr_err("lru_gen: failed to allocate global mm list\n");
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+};
+/*
+ * We want to run as early as possible because some debug code, e.g.,
+ * dma_resv_lockdep(), calls mm_alloc() and mmput(). We only depend on mm_kobj,
+ * which is initialized one stage earlier by postcore_initcall().
+ */
+arch_initcall(init_lru_gen);
+
+#endif /* CONFIG_LRU_GEN */
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,612 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 898B5C432C3
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:07 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 6BE1864ECE
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:07 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233554AbhCMH6p (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:45 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59010 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S232974AbhCMH6L (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:58:11 -0500
Received: from mail-qt1-x84a.google.com (mail-qt1-x84a.google.com [IPv6:2607:f8b0:4864:20::84a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AED49C061574
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:11 -0800 (PST)
Received: by mail-qt1-x84a.google.com with SMTP id b21so19336130qtr.8
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:11 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=xqbmxYHaO3cMzT5bj/KOIzl+A4iyODs6ga8GzAKSrkE=;
b=aCMPh4sfMlM3e6I+5or6zJlbHk6bJJ366JgVLE0jRyU2R9ZqTAdl/pzm6yajVT6ycB
R3xLH3mN1h4Agtzp3ZKjLdBAoR0i0R0R6lzXQljBVIbOuqUZFY+sw3o8WmPkGc03xc2m
dr44s/QwksFpUKxra33PNDauwhk0aM45ZbRd238UtPStswmrshOSmIlfZiRKTC4jSk1B
IT5xjTbNazZfQOZvoiY7Q3k+uwbArdxbxTXjz8ad9h7O5bQfF9wGt4VgjerJbWQKhTRU
zgbLNXljWtIdgOpRYmaNN+huMS6V5KkWhB+tZ5LkJGhbn+JmFhmrfjMn35Xnhql3Eveu
q92A==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=xqbmxYHaO3cMzT5bj/KOIzl+A4iyODs6ga8GzAKSrkE=;
b=ELwP/ih5rcnRV8ilH4wHoagohM2fYPTmpUPvhQxKwiuGY8hsFrPo23R7PzwR30oiWI
VctwtgGSRFQFOyfcWEPNP4SMqwwIk1eu8glqg6CQWKa87rY721qEk0JRsgjewFSqdoTI
pKd+k+VBiLUxlIqwPjnBAqVby7cJUv4e/5BOpOxLQJJh+spTjfp7T679SBSky3ro/JqC
EID5nP4xORL+8LLUSfsRJn1CTWks+GWRIh0JOE/gxlIb9DhVYxCv8PRv2CA2pjIrDXjk
3nxSdtaSAsInb0PAzpAG4+NVKF1suSWoJNb8d7fOob5HfXhA0/1aBLU8JsYpZdHS3oQL
ddhw==
X-Gm-Message-State: AOAM5319lFA9q7zjycOKH1TFLtjK6rHjUIfzRtUZRSmbTbvpAGZvBnqr
cx6R08ZkL0PPGp5qZ/enqEaNYC+EDX8=
X-Google-Smtp-Source: ABdhPJx1/5VDYsBrZetaVUsn197WpRCAdLjUCneommYhd37knsedDkkTX9reu4Vy54B+q98cUeZMGTzp9xY=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:ad4:4581:: with SMTP id x1mr1753140qvu.9.1615622290851;
Fri, 12 Mar 2021 23:58:10 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:44 -0700
In-Reply-To: <20210313075747.3781593-1-yuzhao@google.com>
Message-Id: <20210313075747.3781593-12-yuzhao@google.com>
Mime-Version: 1.0
References: <20210313075747.3781593-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 11/14] mm: multigenerational lru: page activation
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-12-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
In the page fault path, we want to add pages to the per-zone lists
index by max_seq as they cannot be evicted without going through
the aging first. For anon pages, we rename
lru_cache_add_inactive_or_unevictable() to lru_cache_add_page_vma()
and add a new parameter, which is set to true in the page fault path,
to indicate whether they should be added to the per-zone lists index
by max_seq. For page/swap cache, since we cannot differentiate the
page fault path from the read ahead path at the time we call
lru_cache_add() in add_to_page_cache_lru() and
__read_swap_cache_async(), we have to add a new function
lru_gen_activate_page(), which is essentially activate_page(), to move
pages to the per-zone lists indexed by max_seq at a later time.
Hopefully we would find pages we want to activate in lru_pvecs.lru_add
and simply set PageActive() on them without having to actually move
them.
In the reclaim path, pages mapped around a referenced PTE may also
have been referenced due to spatial locality. We add a new function
lru_gen_scan_around() to scan the vicinity of such a PTE.
In addition, we add a new function page_is_active() to tell whether a
page is active. We cannot use PageActive() because it is only set on
active pages while they are not on multigenerational lru. It is
cleared while pages are on multigenerational lru, in order to spare
the aging the trouble of clearing it when an active generation becomes
inactive. Internally, page_is_active() compares the generation number
of a page with max_seq and max_seq-1, which are active generations and
protected from the eviction. Other generations, which may or may not
exist, are inactive.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
fs/proc/task_mmu.c | 3 ++-
include/linux/mm_inline.h | 52 ++++++++++++++++++++++++++++++++++++++
include/linux/mmzone.h | 6 +++++
include/linux/swap.h | 4 +--
kernel/events/uprobes.c | 2 +-
mm/huge_memory.c | 2 +-
mm/khugepaged.c | 2 +-
mm/memory.c | 14 +++++++----
mm/migrate.c | 2 +-
mm/rmap.c | 6 +++++
mm/swap.c | 26 +++++++++++--------
mm/swapfile.c | 2 +-
mm/userfaultfd.c | 2 +-
mm/vmscan.c | 53 ++++++++++++++++++++++++++++++++++++++-
14 files changed, 150 insertions(+), 26 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3cec6fbef725..7cd173710e76 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -19,6 +19,7 @@
#include <linux/shmem_fs.h>
#include <linux/uaccess.h>
#include <linux/pkeys.h>
+#include <linux/mm_inline.h>
#include <asm/elf.h>
#include <asm/tlb.h>
@@ -1720,7 +1721,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
if (PageSwapCache(page))
md->swapcache += nr_pages;
- if (PageActive(page) || PageUnevictable(page))
+ if (PageUnevictable(page) || page_is_active(compound_head(page), NULL))
md->active += nr_pages;
if (PageWriteback(page))
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 2d306cab36bc..a1a382418fc4 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -116,6 +116,49 @@ static inline int page_lru_gen(struct page *page)
return ((READ_ONCE(page->flags) & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
}
+/* This function works regardless whether multigenerational lru is enabled. */
+static inline bool page_is_active(struct page *page, struct lruvec *lruvec)
+{
+ struct mem_cgroup *memcg;
+ int gen = page_lru_gen(page);
+ bool active = false;
+
+ VM_BUG_ON_PAGE(PageTail(page), page);
+
+ if (gen < 0)
+ return PageActive(page);
+
+ if (lruvec) {
+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
+ VM_BUG_ON_PAGE(PageActive(page), page);
+ lockdep_assert_held(&lruvec->lru_lock);
+
+ return lru_gen_is_active(lruvec, gen);
+ }
+
+ rcu_read_lock();
+
+ memcg = page_memcg_rcu(page);
+ lruvec = mem_cgroup_lruvec(memcg, page_pgdat(page));
+ active = lru_gen_is_active(lruvec, gen);
+
+ rcu_read_unlock();
+
+ return active;
+}
+
+/* Activate a page from page cache or swap cache after it's mapped. */
+static inline void lru_gen_activate_page(struct page *page, struct vm_area_struct *vma)
+{
+ if (!lru_gen_enabled() || PageActive(page))
+ return;
+
+ if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_HUGETLB))
+ return;
+
+ activate_page(page);
+}
+
/* Update multigenerational lru sizes in addition to active/inactive lru sizes. */
static inline void lru_gen_update_size(struct page *page, struct lruvec *lruvec,
int old_gen, int new_gen)
@@ -252,6 +295,15 @@ static inline bool lru_gen_enabled(void)
return false;
}
+static inline bool page_is_active(struct page *page, struct lruvec *lruvec)
+{
+ return PageActive(page);
+}
+
+static inline void lru_gen_activate_page(struct page *page, struct vm_area_struct *vma)
+{
+}
+
static inline bool page_set_lru_gen(struct page *page, struct lruvec *lruvec, bool front)
{
return false;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 173083bb846e..99156602cd06 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -292,6 +292,7 @@ enum lruvec_flags {
};
struct lruvec;
+struct page_vma_mapped_walk;
#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
@@ -328,6 +329,7 @@ struct lru_gen {
void lru_gen_init_lruvec(struct lruvec *lruvec);
void lru_gen_set_state(bool enable, bool main, bool swap);
+void lru_gen_scan_around(struct page_vma_mapped_walk *pvmw);
#else /* CONFIG_LRU_GEN */
@@ -339,6 +341,10 @@ static inline void lru_gen_set_state(bool enable, bool main, bool swap)
{
}
+static inline void lru_gen_scan_around(struct page_vma_mapped_walk *pvmw)
+{
+}
+
#endif /* CONFIG_LRU_GEN */
struct lruvec {
diff --git a/include/linux/swap.h b/include/linux/swap.h
index de2bbbf181ba..0e7532c7db22 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -350,8 +350,8 @@ extern void deactivate_page(struct page *page);
extern void mark_page_lazyfree(struct page *page);
extern void swap_setup(void);
-extern void lru_cache_add_inactive_or_unevictable(struct page *page,
- struct vm_area_struct *vma);
+extern void lru_cache_add_page_vma(struct page *page, struct vm_area_struct *vma,
+ bool faulting);
/* linux/mm/vmscan.c */
extern unsigned long zone_reclaimable_pages(struct zone *zone);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6addc9780319..4e93e5602723 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -184,7 +184,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
if (new_page) {
get_page(new_page);
page_add_new_anon_rmap(new_page, vma, addr, false);
- lru_cache_add_inactive_or_unevictable(new_page, vma);
+ lru_cache_add_page_vma(new_page, vma, false);
} else
/* no new page, just dec_mm_counter for old_page */
dec_mm_counter(mm, MM_ANONPAGES);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index be9bf681313c..62e14da5264e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -637,7 +637,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
page_add_new_anon_rmap(page, vma, haddr, true);
- lru_cache_add_inactive_or_unevictable(page, vma);
+ lru_cache_add_page_vma(page, vma, true);
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a7d6cb912b05..08a43910f232 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1199,7 +1199,7 @@ static void collapse_huge_page(struct mm_struct *mm,
spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
page_add_new_anon_rmap(new_page, vma, address, true);
- lru_cache_add_inactive_or_unevictable(new_page, vma);
+ lru_cache_add_page_vma(new_page, vma, true);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, address, pmd, _pmd);
update_mmu_cache_pmd(vma, address, pmd);
diff --git a/mm/memory.c b/mm/memory.c
index c8e357627318..7188607bddb9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -73,6 +73,7 @@
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/vmalloc.h>
+#include <linux/mm_inline.h>
#include <trace/events/kmem.h>
@@ -845,7 +846,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
copy_user_highpage(new_page, page, addr, src_vma);
__SetPageUptodate(new_page);
page_add_new_anon_rmap(new_page, dst_vma, addr, false);
- lru_cache_add_inactive_or_unevictable(new_page, dst_vma);
+ lru_cache_add_page_vma(new_page, dst_vma, false);
rss[mm_counter(new_page)]++;
/* All done, just insert the new page copy in the child */
@@ -2913,7 +2914,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
*/
ptep_clear_flush_notify(vma, vmf->address, vmf->pte);
page_add_new_anon_rmap(new_page, vma, vmf->address, false);
- lru_cache_add_inactive_or_unevictable(new_page, vma);
+ lru_cache_add_page_vma(new_page, vma, true);
/*
* We call the notify macro here because, when using secondary
* mmu page tables (such as kvm shadow page tables), we want the
@@ -3436,9 +3437,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
/* ksm created a completely new copy */
if (unlikely(page != swapcache && swapcache)) {
page_add_new_anon_rmap(page, vma, vmf->address, false);
- lru_cache_add_inactive_or_unevictable(page, vma);
+ lru_cache_add_page_vma(page, vma, true);
} else {
do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
+ lru_gen_activate_page(page, vma);
}
swap_free(entry);
@@ -3582,7 +3584,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, vmf->address, false);
- lru_cache_add_inactive_or_unevictable(page, vma);
+ lru_cache_add_page_vma(page, vma, true);
setpte:
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
@@ -3707,6 +3709,7 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR);
page_add_file_rmap(page, true);
+ lru_gen_activate_page(page, vma);
/*
* deposit and withdraw with pmd lock held
*/
@@ -3750,10 +3753,11 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
if (write && !(vma->vm_flags & VM_SHARED)) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, addr, false);
- lru_cache_add_inactive_or_unevictable(page, vma);
+ lru_cache_add_page_vma(page, vma, true);
} else {
inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
page_add_file_rmap(page, false);
+ lru_gen_activate_page(page, vma);
}
set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 62b81d5257aa..1064b03cac33 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -3004,7 +3004,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
inc_mm_counter(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, addr, false);
if (!is_zone_device_page(page))
- lru_cache_add_inactive_or_unevictable(page, vma);
+ lru_cache_add_page_vma(page, vma, false);
get_page(page);
if (flush) {
diff --git a/mm/rmap.c b/mm/rmap.c
index b0fc27e77d6d..a44f9ee74ee1 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -72,6 +72,7 @@
#include <linux/page_idle.h>
#include <linux/memremap.h>
#include <linux/userfaultfd_k.h>
+#include <linux/mm_inline.h>
#include <asm/tlbflush.h>
@@ -792,6 +793,11 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
}
if (pvmw.pte) {
+ /* multigenerational lru exploits spatial locality */
+ if (lru_gen_enabled() && pte_young(*pvmw.pte)) {
+ lru_gen_scan_around(&pvmw);
+ referenced++;
+ }
if (ptep_clear_flush_young_notify(vma, address,
pvmw.pte)) {
/*
diff --git a/mm/swap.c b/mm/swap.c
index bd10efe00684..7aa85004b490 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -310,7 +310,7 @@ void lru_note_cost_page(struct page *page)
static void __activate_page(struct page *page, struct lruvec *lruvec)
{
- if (!PageActive(page) && !PageUnevictable(page)) {
+ if (!PageUnevictable(page) && !page_is_active(page, lruvec)) {
int nr_pages = thp_nr_pages(page);
del_page_from_lru_list(page, lruvec);
@@ -341,7 +341,7 @@ static bool need_activate_page_drain(int cpu)
static void activate_page_on_lru(struct page *page)
{
page = compound_head(page);
- if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+ if (PageLRU(page) && !PageUnevictable(page) && !page_is_active(page, NULL)) {
struct pagevec *pvec;
local_lock(&lru_pvecs.lock);
@@ -435,7 +435,7 @@ void mark_page_accessed(struct page *page)
* this list is never rotated or maintained, so marking an
* evictable page accessed has no effect.
*/
- } else if (!PageActive(page)) {
+ } else if (!page_is_active(page, NULL)) {
activate_page(page);
ClearPageReferenced(page);
workingset_activation(page);
@@ -471,15 +471,14 @@ void lru_cache_add(struct page *page)
EXPORT_SYMBOL(lru_cache_add);
/**
- * lru_cache_add_inactive_or_unevictable
+ * lru_cache_add_page_vma
* @page: the page to be added to LRU
* @vma: vma in which page is mapped for determining reclaimability
*
- * Place @page on the inactive or unevictable LRU list, depending on its
- * evictability.
+ * Place @page on an LRU list, depending on its evictability.
*/
-void lru_cache_add_inactive_or_unevictable(struct page *page,
- struct vm_area_struct *vma)
+void lru_cache_add_page_vma(struct page *page, struct vm_area_struct *vma,
+ bool faulting)
{
bool unevictable;
@@ -496,6 +495,11 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
__mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
}
+
+ /* multigenerational lru uses PageActive() to track page faults */
+ if (lru_gen_enabled() && !unevictable && faulting)
+ SetPageActive(page);
+
lru_cache_add(page);
}
@@ -522,7 +526,7 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
*/
static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
{
- bool active = PageActive(page);
+ bool active = page_is_active(page, lruvec);
int nr_pages = thp_nr_pages(page);
if (PageUnevictable(page))
@@ -562,7 +566,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
{
- if (PageActive(page) && !PageUnevictable(page)) {
+ if (!PageUnevictable(page) && page_is_active(page, lruvec)) {
int nr_pages = thp_nr_pages(page);
del_page_from_lru_list(page, lruvec);
@@ -676,7 +680,7 @@ void deactivate_file_page(struct page *page)
*/
void deactivate_page(struct page *page)
{
- if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
+ if (PageLRU(page) && !PageUnevictable(page) && page_is_active(page, NULL)) {
struct pagevec *pvec;
local_lock(&lru_pvecs.lock);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fe03cfeaa08f..c0956b3bde03 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1936,7 +1936,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
page_add_anon_rmap(page, vma, addr, false);
} else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, addr, false);
- lru_cache_add_inactive_or_unevictable(page, vma);
+ lru_cache_add_page_vma(page, vma, false);
}
swap_free(entry);
out:
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 9a3d451402d7..e1d4cd3103b8 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -123,7 +123,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
inc_mm_counter(dst_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
- lru_cache_add_inactive_or_unevictable(page, dst_vma);
+ lru_cache_add_page_vma(page, dst_vma, true);
set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index fd49a9a5d7f5..ce868d89dc53 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1876,7 +1876,7 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
add_page_to_lru_list(page, lruvec);
nr_pages = thp_nr_pages(page);
nr_moved += nr_pages;
- if (PageActive(page))
+ if (page_is_active(page, lruvec))
workingset_age_nonresident(lruvec, nr_pages);
}
@@ -4688,6 +4688,57 @@ static int page_update_lru_gen(struct page *page, int new_gen)
return old_gen;
}
+void lru_gen_scan_around(struct page_vma_mapped_walk *pvmw)
+{
+ pte_t *pte;
+ unsigned long start, end;
+ int old_gen, new_gen;
+ unsigned long flags;
+ struct lruvec *lruvec;
+ struct mem_cgroup *memcg;
+ struct pglist_data *pgdat = page_pgdat(pvmw->page);
+
+ lockdep_assert_held(pvmw->ptl);
+ VM_BUG_ON_VMA(pvmw->address < pvmw->vma->vm_start, pvmw->vma);
+
+ start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start);
+ end = pmd_addr_end(pvmw->address, pvmw->vma->vm_end);
+ pte = pvmw->pte - ((pvmw->address - start) >> PAGE_SHIFT);
+
+ memcg = lock_page_memcg(pvmw->page);
+ lruvec = lock_page_lruvec_irqsave(pvmw->page, &flags);
+
+ new_gen = lru_gen_from_seq(lruvec->evictable.max_seq);
+
+ for (; start != end; pte++, start += PAGE_SIZE) {
+ struct page *page;
+ unsigned long pfn = pte_pfn(*pte);
+
+ if (!pte_present(*pte) || !pte_young(*pte) || is_zero_pfn(pfn))
+ continue;
+
+ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
+ continue;
+
+ page = compound_head(pte_page(*pte));
+ if (page_to_nid(page) != pgdat->node_id)
+ continue;
+ if (page_memcg_rcu(page) != memcg)
+ continue;
+ /*
+ * We may be holding many locks. So try to finish as fast as
+ * possible and leave the accessed and the dirty bits to page
+ * table walk.
+ */
+ old_gen = page_update_lru_gen(page, new_gen);
+ if (old_gen >= 0 && old_gen != new_gen)
+ lru_gen_update_size(page, lruvec, old_gen, new_gen);
+ }
+
+ unlock_page_lruvec_irqrestore(lruvec, flags);
+ unlock_page_memcg(pvmw->page);
+}
+
struct mm_walk_args {
struct mem_cgroup *memcg;
unsigned long max_seq;
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,492 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 29E96C4321A
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:07 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 1649264ECE
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:07 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233577AbhCMH6q (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:46 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59016 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S232992AbhCMH6N (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:58:13 -0500
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id F2926C061574
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:12 -0800 (PST)
Received: by mail-yb1-xb49.google.com with SMTP id b127so31851193ybc.13
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:12 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=HpM4Xhq08m+hN0lK+sEgEqrsIbbrPIT86c74gNI/u9w=;
b=iZooLRgVq0QKt7uS3ecgWUo9C9cGTHyr/2U5sxKCS6QXN0IgrJO3PAgtZX95WUfwcs
ZSAQR6gNrWG+wP3LmXgu+DSkMxuKFw0cNRjKklelEg68uDJJWokPALnGLPA4nMy8dmmL
gNmywFSsTBT8s8Opdpx1NmeoX3AoH+b/8wRfOXpwgaGDC+vZ82D7uiEVDdLrnbyopU8f
IpPsdEWG/PKjNmqr9aaxr+DYpbpcOwr48yDoUFRDhKXq2kRMLS3q9Cr2+5Aa3cOZI9gL
a6NMw3O/GtXpPy+iDG3kH66GA3tICHTkX1Zm9PHqXzTR/dFawFQyNro80/QMStsyVm0P
hUPA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=HpM4Xhq08m+hN0lK+sEgEqrsIbbrPIT86c74gNI/u9w=;
b=enhVyxFe4tZkU5M5CCcAvOq7XPOtrI0KR9T9RiANQFtlMzTvtrxKdJw8h9WJfPozxv
DODj66oaquifIDjLTzxgHvuElCPtya1VXVDoZr/k/dHc0n6u5v10BW2bAYXIVIBtZgMb
WwMcgh3rGt4wsE7QWUeFmRne+9VW55GHi1MtsKV2+fvK2y7UyMEoP6hy3iQbBcTxeLH0
F684asdtadbsGGkyGN1KvT5S9l71CJFhEI6Zz6ag4NgojHVz2zN0kv4/qzQCkWhPUHNs
Pqv8OmYBrbTHERb8vRmdpXAn8C+qWgQ5PHrVk8OOP3YB1/IBCCwYKqMe/Q7FeRocip4d
Q5vw==
X-Gm-Message-State: AOAM532t/+vC4rNBlDPUITq8/UGTKPQERiyG/Yu/AZbe1MTxG9Izfw1X
ohDrQJQrlikp3FZQGyfgd8ZO5sD99kg=
X-Google-Smtp-Source: ABdhPJyqRrRSS1bJd41QeWsraiQMIBjlJledYF/KyYHH/bW9K+768sYy2pOiqW2DerYXRlOndOy2egSD7FE=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:a25:1184:: with SMTP id 126mr22571060ybr.430.1615622292183;
Fri, 12 Mar 2021 23:58:12 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:45 -0700
In-Reply-To: <20210313075747.3781593-1-yuzhao@google.com>
Message-Id: <20210313075747.3781593-13-yuzhao@google.com>
Mime-Version: 1.0
References: <20210313075747.3781593-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 12/14] mm: multigenerational lru: user space interface
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-13-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Add a sysfs file /sys/kernel/mm/lru_gen/enabled so user space can
enable and disable multigenerational lru at runtime.
Add a sysfs file /sys/kernel/mm/lru_gen/spread so user space can
spread pages out across multiple generations. More generations make
the background aging more aggressive.
Add a debugfs file /sys/kernel/debug/lru_gen so user space can monitor
multigenerational lru and trigger the aging and the eviction. This
file has the following output:
memcg memcg_id memcg_path
node node_id
min_gen birth_time anon_size file_size
...
max_gen birth_time anon_size file_size
Given a memcg and a node, "min_gen" is the oldest generation (number)
and "max_gen" is the youngest. Birth time is in milliseconds. Anon and
file sizes are in pages.
Write "+ memcg_id node_id gen [swappiness]" to this file to account
referenced pages to generation "max_gen" and create next generation
"max_gen"+1. "gen" must be equal to "max_gen" in order to avoid races.
A swap file and a non-zero swappiness value are required to scan anon
pages. If swapping is not desired, set vm.swappiness to 0 and
overwrite it with a non-zero "swappiness".
Write "- memcg_id node_id gen [swappiness] [nr_to_reclaim]" to this
file to evict generations less than or equal to "gen". "gen" must be
less than "max_gen"-1 as "max_gen" and "max_gen"-1 are active
generations and therefore protected from the eviction. "nr_to_reclaim"
can be used to limit the number of pages to be evicted.
Multiple command lines are supported, so does concatenation with
delimiters "," and ";".
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
mm/vmscan.c | 334 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 334 insertions(+)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ce868d89dc53..b59b556e9587 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -51,6 +51,7 @@
#include <linux/psi.h>
#include <linux/pagewalk.h>
#include <linux/memory.h>
+#include <linux/debugfs.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -5833,6 +5834,334 @@ lru_gen_online_mem(struct notifier_block *self, unsigned long action, void *arg)
return NOTIFY_DONE;
}
+/******************************************************************************
+ * sysfs interface
+ ******************************************************************************/
+
+static ssize_t show_lru_gen_spread(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", READ_ONCE(lru_gen_spread));
+}
+
+static ssize_t store_lru_gen_spread(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ int spread;
+
+ if (kstrtoint(buf, 10, &spread) || spread >= MAX_NR_GENS)
+ return -EINVAL;
+
+ WRITE_ONCE(lru_gen_spread, spread);
+
+ return len;
+}
+
+static struct kobj_attribute lru_gen_spread_attr = __ATTR(
+ spread, 0644,
+ show_lru_gen_spread, store_lru_gen_spread
+);
+
+static ssize_t show_lru_gen_enabled(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%ld\n", lru_gen_enabled());
+}
+
+static ssize_t store_lru_gen_enabled(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ int enable;
+
+ if (kstrtoint(buf, 10, &enable))
+ return -EINVAL;
+
+ lru_gen_set_state(enable, true, false);
+
+ return len;
+}
+
+static struct kobj_attribute lru_gen_enabled_attr = __ATTR(
+ enabled, 0644, show_lru_gen_enabled, store_lru_gen_enabled
+);
+
+static struct attribute *lru_gen_attrs[] = {
+ &lru_gen_spread_attr.attr,
+ &lru_gen_enabled_attr.attr,
+ NULL
+};
+
+static struct attribute_group lru_gen_attr_group = {
+ .name = "lru_gen",
+ .attrs = lru_gen_attrs,
+};
+
+/******************************************************************************
+ * debugfs interface
+ ******************************************************************************/
+
+static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos)
+{
+ struct mem_cgroup *memcg;
+ loff_t nr_to_skip = *pos;
+
+ m->private = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!m->private)
+ return ERR_PTR(-ENOMEM);
+
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ do {
+ int nid;
+
+ for_each_node_state(nid, N_MEMORY) {
+ if (!nr_to_skip--)
+ return mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+ }
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
+
+ return NULL;
+}
+
+static void lru_gen_seq_stop(struct seq_file *m, void *v)
+{
+ if (!IS_ERR_OR_NULL(v))
+ mem_cgroup_iter_break(NULL, lruvec_memcg(v));
+
+ kfree(m->private);
+ m->private = NULL;
+}
+
+static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ int nid = lruvec_pgdat(v)->node_id;
+ struct mem_cgroup *memcg = lruvec_memcg(v);
+
+ ++*pos;
+
+ nid = next_memory_node(nid);
+ if (nid == MAX_NUMNODES) {
+ memcg = mem_cgroup_iter(NULL, memcg, NULL);
+ if (!memcg)
+ return NULL;
+
+ nid = first_memory_node;
+ }
+
+ return mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+}
+
+static int lru_gen_seq_show(struct seq_file *m, void *v)
+{
+ unsigned long seq;
+ struct lruvec *lruvec = v;
+ int nid = lruvec_pgdat(lruvec)->node_id;
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
+ DEFINE_MAX_SEQ(lruvec);
+ DEFINE_MIN_SEQ(lruvec);
+
+ if (nid == first_memory_node) {
+#ifdef CONFIG_MEMCG
+ if (memcg)
+ cgroup_path(memcg->css.cgroup, m->private, PATH_MAX);
+#endif
+ seq_printf(m, "memcg %5hu %s\n",
+ mem_cgroup_id(memcg), (char *)m->private);
+ }
+
+ seq_printf(m, " node %4d\n", nid);
+
+ for (seq = min(min_seq[0], min_seq[1]); seq <= max_seq; seq++) {
+ int gen, file, zone;
+ unsigned int msecs;
+ long sizes[ANON_AND_FILE] = {};
+
+ gen = lru_gen_from_seq(seq);
+
+ msecs = jiffies_to_msecs(jiffies - READ_ONCE(
+ lruvec->evictable.timestamps[gen]));
+
+ for_each_type_zone(file, zone)
+ sizes[file] += READ_ONCE(
+ lruvec->evictable.sizes[gen][file][zone]);
+
+ sizes[0] = max(sizes[0], 0L);
+ sizes[1] = max(sizes[1], 0L);
+
+ seq_printf(m, "%11lu %9u %9lu %9lu\n",
+ seq, msecs, sizes[0], sizes[1]);
+ }
+
+ return 0;
+}
+
+static const struct seq_operations lru_gen_seq_ops = {
+ .start = lru_gen_seq_start,
+ .stop = lru_gen_seq_stop,
+ .next = lru_gen_seq_next,
+ .show = lru_gen_seq_show,
+};
+
+static int lru_gen_debugfs_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &lru_gen_seq_ops);
+}
+
+static int advance_max_seq(struct lruvec *lruvec, unsigned long seq, int swappiness)
+{
+ struct scan_control sc = {
+ .target_mem_cgroup = lruvec_memcg(lruvec),
+ };
+ DEFINE_MAX_SEQ(lruvec);
+
+ if (seq == max_seq)
+ walk_mm_list(lruvec, max_seq, &sc, swappiness);
+
+ return seq > max_seq ? -EINVAL : 0;
+}
+
+static int advance_min_seq(struct lruvec *lruvec, unsigned long seq, int swappiness,
+ unsigned long nr_to_reclaim)
+{
+ struct blk_plug plug;
+ int err = -EINTR;
+ long nr_to_scan = LONG_MAX;
+ struct scan_control sc = {
+ .nr_to_reclaim = nr_to_reclaim,
+ .target_mem_cgroup = lruvec_memcg(lruvec),
+ .may_writepage = 1,
+ .may_unmap = 1,
+ .may_swap = 1,
+ .reclaim_idx = MAX_NR_ZONES - 1,
+ .gfp_mask = GFP_KERNEL,
+ };
+ DEFINE_MAX_SEQ(lruvec);
+
+ if (seq >= max_seq - 1)
+ return -EINVAL;
+
+ blk_start_plug(&plug);
+
+ while (!signal_pending(current)) {
+ DEFINE_MIN_SEQ(lruvec);
+
+ if (seq < min(min_seq[!swappiness], min_seq[swappiness < 200]) ||
+ !evict_lru_gen_pages(lruvec, &sc, swappiness, &nr_to_scan)) {
+ err = 0;
+ break;
+ }
+
+ cond_resched();
+ }
+
+ blk_finish_plug(&plug);
+
+ return err;
+}
+
+static int advance_seq(char cmd, int memcg_id, int nid, unsigned long seq,
+ int swappiness, unsigned long nr_to_reclaim)
+{
+ struct lruvec *lruvec;
+ int err = -EINVAL;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!mem_cgroup_disabled()) {
+ rcu_read_lock();
+ memcg = mem_cgroup_from_id(memcg_id);
+#ifdef CONFIG_MEMCG
+ if (memcg && !css_tryget(&memcg->css))
+ memcg = NULL;
+#endif
+ rcu_read_unlock();
+
+ if (!memcg)
+ goto done;
+ }
+ if (memcg_id != mem_cgroup_id(memcg))
+ goto done;
+
+ if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY))
+ goto done;
+
+ lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+
+ if (swappiness == -1)
+ swappiness = get_swappiness(lruvec);
+ else if (swappiness > 200U)
+ goto done;
+
+ switch (cmd) {
+ case '+':
+ err = advance_max_seq(lruvec, seq, swappiness);
+ break;
+ case '-':
+ err = advance_min_seq(lruvec, seq, swappiness, nr_to_reclaim);
+ break;
+ }
+done:
+ mem_cgroup_put(memcg);
+
+ return err;
+}
+
+static ssize_t lru_gen_debugfs_write(struct file *file, const char __user *src,
+ size_t len, loff_t *pos)
+{
+ void *buf;
+ char *cur, *next;
+ int err = 0;
+
+ buf = kvmalloc(len + 1, GFP_USER);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, src, len)) {
+ kvfree(buf);
+ return -EFAULT;
+ }
+
+ next = buf;
+ next[len] = '\0';
+
+ while ((cur = strsep(&next, ",;\n"))) {
+ int n;
+ int end;
+ char cmd;
+ int memcg_id;
+ int nid;
+ unsigned long seq;
+ int swappiness = -1;
+ unsigned long nr_to_reclaim = -1;
+
+ cur = skip_spaces(cur);
+ if (!*cur)
+ continue;
+
+ n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid,
+ &seq, &end, &swappiness, &end, &nr_to_reclaim, &end);
+ if (n < 4 || cur[end]) {
+ err = -EINVAL;
+ break;
+ }
+
+ err = advance_seq(cmd, memcg_id, nid, seq, swappiness, nr_to_reclaim);
+ if (err)
+ break;
+ }
+
+ kvfree(buf);
+
+ return err ? : len;
+}
+
+static const struct file_operations lru_gen_debugfs_ops = {
+ .open = lru_gen_debugfs_open,
+ .read = seq_read,
+ .write = lru_gen_debugfs_write,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
/******************************************************************************
* initialization
******************************************************************************/
@@ -5873,6 +6202,11 @@ static int __init init_lru_gen(void)
if (hotplug_memory_notifier(lru_gen_online_mem, 0))
pr_err("lru_gen: failed to subscribe hotplug notifications\n");
+ if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
+ pr_err("lru_gen: failed to create sysfs group\n");
+
+ debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_debugfs_ops);
+
return 0;
};
/*
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,137 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 9E997C43619
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:07 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 8CDE364F1F
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:07 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233595AbhCMH6r (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:47 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59028 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S232999AbhCMH6P (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:58:15 -0500
Received: from mail-qk1-x74a.google.com (mail-qk1-x74a.google.com [IPv6:2607:f8b0:4864:20::74a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7BED3C061574
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:14 -0800 (PST)
Received: by mail-qk1-x74a.google.com with SMTP id c1so19954167qke.8
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:14 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=uE7IkUDVFpC8s1yHDZEBmC5mjUj6EJwbkUDWk669ASY=;
b=uXhx05PdjRvuFkUcjNWapddDUcA7Q5UwyJE/THSSQbBgSLQkmn7ajsUcb7ZmUjhauR
/jgBj7/Odh6Ngd12GetPXsZawVQtDY3F/Xog0R3yIye6citJOlL5TJ+2wwf2gcYmueuQ
WYDJiJoxC2qvezUikRORLJGGQFEDfwGtR7lOiuTnagaswHyGlY8OAHiBnbM2NFrUlS9F
wuwz3bRewUhC6hOmirv0YN+eR4e/S0TxFsdjMMf1mOQtK0M77IA18i0YjomUSsNcZo01
GPjHMp59Yr0/XsqaDXOK5S+CA6611MojOlFbWTfafAQpXFIKwJXgkzUeIs9A5goZIWHY
WCzQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=uE7IkUDVFpC8s1yHDZEBmC5mjUj6EJwbkUDWk669ASY=;
b=bLNHbmZfUarHE/zKcwTOEXRDaaA3M3Wb9opsS8eMo1jlzRy67SWU0QGKWyLhaQIUuj
jYh9cCeQdHC6qIfno1lltA2+v9qIhbWTkQq8IDWpy+DqlnHKeJ9+XXBQRQaso3znd4ai
nPcd4O7/NtulCEnPJS2gjxYdwdJpTRn7zQmPaq7GzeKJRU9mvdhcR8zfovaRnZIIgwNw
Ek53R7AbW1KCHAQtw9b0DwJz5OUi6Uqj50tx1chZcH3FUwhFwZUE2dqTAAbpx8adie8Y
0NBJ8v2hnoSoCSy2gxlP1J+gN0mEx5Qn0C5zIt7OKkWf4Sh01kTSKIDwmcQsD0ghz6hu
7qiQ==
X-Gm-Message-State: AOAM5339O1/TQ9+FOQ9m7N+jX6OVojHsWyNTehwokZ5ewkZ6BLvWDrEp
OjNCxrW3VzSi1/zx5w0Sch6pYeFtRCc=
X-Google-Smtp-Source: ABdhPJz7oVSCBhnoApkNm1wW85JMl4kSXploQGL6Mdvavu7deAd9Mg85kMswIg+jCkfg3Z/h90N99XOQmFk=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:a05:6214:1909:: with SMTP id
er9mr1749542qvb.5.1615622293640; Fri, 12 Mar 2021 23:58:13 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:46 -0700
In-Reply-To: <20210313075747.3781593-1-yuzhao@google.com>
Message-Id: <20210313075747.3781593-14-yuzhao@google.com>
Mime-Version: 1.0
References: <20210313075747.3781593-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 13/14] mm: multigenerational lru: Kconfig
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-14-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Add configuration options for multigenerational lru.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
mm/Kconfig | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/mm/Kconfig b/mm/Kconfig
index 24c045b24b95..3a5bcc2d7a45 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -872,4 +872,33 @@ config MAPPING_DIRTY_HELPERS
config KMAP_LOCAL
bool
+config LRU_GEN
+ bool "Multigenerational LRU"
+ depends on MMU
+ help
+ High performance multigenerational LRU to heavily overcommit workloads
+ that are not IO bound. See Documentation/vm/multigen_lru.rst for
+ details.
+
+ Warning: do not enable this option unless you plan to use it because
+ it introduces a small per-process memory overhead.
+
+config NR_LRU_GENS
+ int "Max number of generations"
+ depends on LRU_GEN
+ range 4 63
+ default 7
+ help
+ This will use ilog2(N)+1 spare bits from page flags.
+
+ Warning: do not use numbers larger than necessary because each
+ generation introduces a small per-node and per-memcg memory overhead.
+
+config LRU_GEN_ENABLED
+ bool "Turn on by default"
+ depends on LRU_GEN
+ help
+ The default value of /sys/kernel/mm/lru_gen/enabled is 0. This option
+ changes it to 1.
+
endmenu
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,329 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 99A73C4360C
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:07 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 7C9EA64F1E
for <linux-kernel@archiver.kernel.org>; Sat, 13 Mar 2021 07:59:07 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S233629AbhCMH6s (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Sat, 13 Mar 2021 02:58:48 -0500
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59030 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S233011AbhCMH6Q (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Sat, 13 Mar 2021 02:58:16 -0500
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D3F77C061574
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:15 -0800 (PST)
Received: by mail-yb1-xb49.google.com with SMTP id y7so31766185ybh.20
for <linux-kernel@vger.kernel.org>; Fri, 12 Mar 2021 23:58:15 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=b33M625vmeFm8iJFKYIy9IbS+yyXzJHrz2YlprWAE88=;
b=qyBWCu6iSCz/+GOTBSyjEGx0UNh3wx8I4EpB+DGhW3FtbTsYmoVsgkJK7K9lMib92D
8UESs064HgmPaCcFC9wummpEDT04EZB57UgnWkSzwsmT8q8yKbsLNsdnaqxDho13rxSL
l1lhvY8XggaGyQS76caURzCZzmuuIb31yoMyJa36cSNEQIIGzS/Qm0HS9FQ4Sslqjhio
7G+7M9RsfMDtCuFijNWCkO0VasJ5hLLwIPnUW2My7qRxlwAQoGToYUEA5ipkn9Ckz+I1
ZZZL32LyYugyxj8DFHGhkOK2vtm0J8rqvkbb7eJOL7RwHttQzqGHotvqWMTx+tw95ZXr
O2hQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=b33M625vmeFm8iJFKYIy9IbS+yyXzJHrz2YlprWAE88=;
b=c/gz9vPAEJyp/EJn3y5EFyXbzSo7i/3uPaPFgvAuDmTSD+ba6y1WjRuclOCOCm5zrN
rzU2v5yfzmo6p+fXYM+C5uFH8SqC+cMK+bYnZUuNl3OwvgwL3kofqlcrnKaQQMqRRSey
lrW47VbiLF0IySg6AM605BkxEjKxVIkdnWAS+bqXtQVym74gxHHOHwX5tGuCn/7Bs/c4
cekamb+2vIeO+6/P0YD1ZdLO9LS1OwgxSor5cocBSXXW7J7bBLCGKAyU++NrNnahjJs1
8ckqrHiqFC1mEWFcs+VBQ9/NCeVfcAfMGRtsMsljI5kll/myniTnSrETf49UyjrcTk87
zwYQ==
X-Gm-Message-State: AOAM531AI2qRYUeaR85JEsd20wl4qCSV4g5Qpav0tPw9JXRAxcdYk9S4
AlH8Rls2BT9Ub2LGipv5Jfv5X2rI3ho=
X-Google-Smtp-Source: ABdhPJwyYS+6AjG1nAXtEYBKUiLdJ5mAmkUQWOq9ngRJz7So3XTjiRhhO4QmZvWvkKbzXQ7oleb6ep4AFYs=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:f931:d3e4:faa0:4f74])
(user=yuzhao job=sendgmr) by 2002:a25:8003:: with SMTP id m3mr24313264ybk.452.1615622295044;
Fri, 12 Mar 2021 23:58:15 -0800 (PST)
Date: Sat, 13 Mar 2021 00:57:47 -0700
In-Reply-To: <20210313075747.3781593-1-yuzhao@google.com>
Message-Id: <20210313075747.3781593-15-yuzhao@google.com>
Mime-Version: 1.0
References: <20210313075747.3781593-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.0.rc2.261.g7f71774620-goog
Subject: [PATCH v1 14/14] mm: multigenerational lru: documentation
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alex.shi@linux.alibaba.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>, Michal Hocko <mhocko@suse.com>,
Roman Gushchin <guro@fb.com>, Vlastimil Babka <vbabka@suse.cz>,
Wei Yang <richard.weiyang@linux.alibaba.com>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>,
linux-kernel@vger.kernel.org, page-reclaim@google.com,
Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210313075747.3781593-15-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Add Documentation/vm/multigen_lru.rst.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
Documentation/vm/index.rst | 1 +
Documentation/vm/multigen_lru.rst | 210 ++++++++++++++++++++++++++++++
2 files changed, 211 insertions(+)
create mode 100644 Documentation/vm/multigen_lru.rst
diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
index eff5fbd492d0..c353b3f55924 100644
--- a/Documentation/vm/index.rst
+++ b/Documentation/vm/index.rst
@@ -17,6 +17,7 @@ various features of the Linux memory management
swap_numa
zswap
+ multigen_lru
Kernel developers MM documentation
==================================
diff --git a/Documentation/vm/multigen_lru.rst b/Documentation/vm/multigen_lru.rst
new file mode 100644
index 000000000000..fea927da2572
--- /dev/null
+++ b/Documentation/vm/multigen_lru.rst
@@ -0,0 +1,210 @@
+=====================
+Multigenerational LRU
+=====================
+
+Quick Start
+===========
+Build Options
+-------------
+:Required: Set ``CONFIG_LRU_GEN=y``.
+
+:Optional: Change ``CONFIG_NR_LRU_GENS`` to a number ``X`` to support
+ a maximum of ``X`` generations.
+
+:Optional: Set ``CONFIG_LRU_GEN_ENABLED=y`` to turn the feature on by
+ default.
+
+Runtime Options
+---------------
+:Required: Write ``1`` to ``/sys/kernel/mm/lru_gen/enable`` if the
+ feature was not turned on by default.
+
+:Optional: Change ``/sys/kernel/mm/lru_gen/spread`` to a number ``N``
+ to spread pages out across ``N+1`` generations. ``N`` must be less
+ than ``X``. Larger values make the background aging more aggressive.
+
+:Optional: Read ``/sys/kernel/debug/lru_gen`` to verify the feature.
+ This file has the following output:
+
+::
+
+ memcg memcg_id memcg_path
+ node node_id
+ min_gen birth_time anon_size file_size
+ ...
+ max_gen birth_time anon_size file_size
+
+Given a memcg and a node, ``min_gen`` is the oldest generation
+(number) and ``max_gen`` is the youngest. Birth time is in
+milliseconds. Anon and file sizes are in pages.
+
+Recipes
+-------
+:Android on ARMv8.1+: ``X=4``, ``N=0``
+
+:Android on pre-ARMv8.1 CPUs: Not recommended due to the lack of
+ ``ARM64_HW_AFDBM``
+
+:Laptops running Chrome on x86_64: ``X=7``, ``N=2``
+
+:Working set estimation: Write ``+ memcg_id node_id gen [swappiness]``
+ to ``/sys/kernel/debug/lru_gen`` to account referenced pages to
+ generation ``max_gen`` and create the next generation ``max_gen+1``.
+ ``gen`` must be equal to ``max_gen`` in order to avoid races. A swap
+ file and a non-zero swappiness value are required to scan anon pages.
+ If swapping is not desired, set ``vm.swappiness`` to ``0`` and
+ overwrite it with a non-zero ``swappiness``.
+
+:Proactive reclaim: Write ``- memcg_id node_id gen [swappiness]
+ [nr_to_reclaim]`` to ``/sys/kernel/debug/lru_gen`` to evict
+ generations less than or equal to ``gen``. ``gen`` must be less than
+ ``max_gen-1`` as ``max_gen`` and ``max_gen-1`` are active generations
+ and therefore protected from the eviction. ``nr_to_reclaim`` can be
+ used to limit the number of pages to be evicted. Multiple command
+ lines are supported, so does concatenation with delimiters ``,`` and
+ ``;``.
+
+Workflow
+========
+Evictable pages are divided into multiple generations for each
+``lruvec``. The youngest generation number is stored in ``max_seq``
+for both anon and file types as they are aged on an equal footing. The
+oldest generation numbers are stored in ``min_seq[2]`` separately for
+anon and file types as clean file pages can be evicted regardless of
+swap and write-back constraints. Generation numbers are truncated into
+``ilog2(CONFIG_NR_LRU_GENS)+1`` bits in order to fit into
+``page->flags``. The sliding window technique is used to prevent
+truncated generation numbers from overlapping. Each truncated
+generation number is an index to an array of per-type and per-zone
+lists. Evictable pages are added to the per-zone lists indexed by
+``max_seq`` or ``min_seq[2]`` (modulo ``CONFIG_NR_LRU_GENS``),
+depending on whether they are being faulted in or read ahead. The
+workflow comprises two conceptually independent functions: the aging
+and the eviction.
+
+Aging
+-----
+The aging produces young generations. Given an ``lruvec``, the aging
+scans page tables for referenced pages of this ``lruvec``. Upon
+finding one, the aging updates its generation number to ``max_seq``.
+After each round of scan, the aging increments ``max_seq``. The aging
+maintains either a system-wide ``mm_struct`` list or per-memcg
+``mm_struct`` lists, and it only scans page tables of processes that
+have been scheduled since the last scan. Since scans are differential
+with respect to referenced pages, the cost is roughly proportional to
+their number.
+
+Eviction
+--------
+The eviction consumes old generations. Given an ``lruvec``, the
+eviction scans the pages on the per-zone lists indexed by either of
+``min_seq[2]``. It selects a type according to the values of
+``min_seq[2]`` and swappiness. During a scan, the eviction either
+sorts or isolates a page, depending on whether the aging has updated
+its generation number. When it finds all the per-zone lists are empty,
+the eviction increments ``min_seq[2]`` indexed by this selected type.
+The eviction triggers the aging when both of ``min_seq[2]`` reaches
+``max_seq-1``, assuming both anon and file types are reclaimable.
+
+Rationale
+=========
+Characteristics of cloud workloads
+----------------------------------
+With cloud storage gone mainstream, the role of local storage has
+diminished. For most of the systems running cloud workloads, anon
+pages account for the majority of memory consumption and page cache
+contains mostly executable pages. Notably, the portion of the unmapped
+is negligible.
+
+As a result, swapping is necessary to achieve substantial memory
+overcommit. And the ``rmap`` is the hottest in the reclaim path
+because its usage is proportional to the number of scanned pages,
+which on average is many times the number of reclaimed pages.
+
+With ``zram``, a typical ``kswapd`` profile on v5.11 looks like:
+
+::
+
+ 31.03% page_vma_mapped_walk
+ 25.59% lzo1x_1_do_compress
+ 4.63% do_raw_spin_lock
+ 3.89% vma_interval_tree_iter_next
+ 3.33% vma_interval_tree_subtree_search
+
+And with real swap, it looks like:
+
+::
+
+ 45.16% page_vma_mapped_walk
+ 7.61% do_raw_spin_lock
+ 5.69% vma_interval_tree_iter_next
+ 4.91% vma_interval_tree_subtree_search
+ 3.71% page_referenced_one
+
+Limitations of the Current Implementation
+-----------------------------------------
+Notion of the Active/Inactive
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+For servers equipped with hundreds of gigabytes of memory, the
+granularity of the active/inactive is too coarse to be useful for job
+scheduling. And false active/inactive rates are relatively high.
+
+For phones and laptops, the eviction is biased toward file pages
+because the selection has to resort to heuristics as direct
+comparisons between anon and file types are infeasible.
+
+For systems with multiple nodes and/or memcgs, it is impossible to
+compare ``lruvec``\s based on the notion of the active/inactive.
+
+Incremental Scans via the ``rmap``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Each incremental scan picks up at where the last scan left off and
+stops after it has found a handful of unreferenced pages. For most of
+the systems running cloud workloads, incremental scans lose the
+advantage under sustained memory pressure due to high ratios of the
+number of scanned pages to the number of reclaimed pages. On top of
+that, the ``rmap`` has poor memory locality due to its complex data
+structures. The combined effects typically result in a high amount of
+CPU usage in the reclaim path.
+
+Benefits of the Multigenerational LRU
+-------------------------------------
+Notion of Generation Numbers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The notion of generation numbers introduces a quantitative approach to
+memory overcommit. A larger number of pages can be spread out across
+configurable generations, and thus they have relatively low false
+active/inactive rates. Each generation includes all pages that have
+been referenced since the last generation.
+
+Given an ``lruvec``, scans and the selections between anon and file
+types are all based on generation numbers, which are simple and yet
+effective. For different ``lruvec``\s, comparisons are still possible
+based on birth times of generations.
+
+Differential Scans via Page Tables
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Each differential scan discovers all pages that have been referenced
+since the last scan. Specifically, it walks the ``mm_struct`` list
+associated with an ``lruvec`` to scan page tables of processes that
+have been scheduled since the last scan. The cost of each differential
+scan is roughly proportional to the number of referenced pages it
+discovers. Unless address spaces are extremely sparse, page tables
+usually have better memory locality than the ``rmap``. The end result
+is generally a significant reduction in CPU usage, for most of the
+systems running cloud workloads.
+
+To-do List
+==========
+KVM Optimization
+----------------
+Support shadow page table walk.
+
+NUMA Optimization
+-----------------
+Add per-node RSS for ``should_skip_mm()``.
+
+Refault Tracking Optimization
+-----------------------------
+Use generation numbers rather than LRU positions in
+``workingset_eviction()`` and ``workingset_refault()``.
--
2.31.0.rc2.261.g7f71774620-goog

View File

@ -0,0 +1,146 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id B98A5C43462
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:56:52 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 9970A613D1
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:56:52 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S245186AbhDMG5J (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:57:09 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44138 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S242333AbhDMG5C (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:02 -0400
Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 28542C061574
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:43 -0700 (PDT)
Received: by mail-yb1-xb4a.google.com with SMTP id i2so15393704ybl.21
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:43 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=uoN+cWnulcs+MZz6Yfoth7IiX8iwaSm44WY0GAxt+Q4=;
b=Ky/g/4nTpvE6H1kNq4Im8vCSVqJJWgdY64updRqr3NGODL/gY7XSLNlMuXa/Yqagpg
8h8aUIGoWcm6zgtJI5Fw5fMN+PJDxOQb+W3x0OLBhrQ+nOe/aDQ/DaNsTpFLgKXpBR7/
Nvvw4ruE5Db9uCII9HC5YVMWkv6n0oPwKqmHcIgXqyJRfj6NX9MMyHBXVjqP883hb1k1
Uts/76AmsciIF0vpEK2WDi/7DTKQWJN38NKXgOIJgZwI3uctZHJ221m0qvGUkZ8xVQ8M
LJm2bY+K9olC9c50QyUPY+bxF/x11l+o56tHmajIr/WsoQoJ64e/eJ6Tpi1C0nsUQsqW
HHBQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=uoN+cWnulcs+MZz6Yfoth7IiX8iwaSm44WY0GAxt+Q4=;
b=q4VjA6z3lTu7Y75EQkCaOUnGPrZr+a8VxIneVHg9KIy8GcVnTbV6azYx3iJlfN/mqY
nM4GFUu6opNihX2CTE1sYviNzX90nlsf6Ip3WykacM0NVKoiD/02EGRPQvc0l3EE/8K0
43Y8NKqjqKspr7Tjz074a8EJrkBUqhaBpFzDGZwvcg5JCb19/+tTrjWSio3YSp1gtbA+
8OB8fTMMZlhaH5pTQWlQnQM3YN8CNJBooHERVgByq78Q7xObvheM9tjTza0hz5coErNv
aLMQMSIT87k3f7EWq0H6qOBAaxbbR8uChrhfVLanXWxhaw/G+ZI5csPO154ctl5A0+5/
Yc5g==
X-Gm-Message-State: AOAM5311I++jOq9dpMAS7ctzsZDbqRUOtVWfMxjhdktZjjKeusU8mSAv
AjoVQqVxKqAzXcw+CT2fcJSxxzNjPAU=
X-Google-Smtp-Source: ABdhPJyfoyUlusz71TmeoRvttPw/GuUM1FYO9KnbxFJsUN5OFDqRz4J7wq87XkLveCWglWGJeEC6Et9cWvE=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:a25:bb41:: with SMTP id b1mr41562657ybk.249.1618297002301;
Mon, 12 Apr 2021 23:56:42 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:18 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-2-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 01/16] include/linux/memcontrol.h: do not warn in
page_memcg_rcu() if !CONFIG_MEMCG
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-2-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
page_memcg_rcu() warns on !rcu_read_lock_held() regardless of
CONFIG_MEMCG. The following code is legit, but it triggers the warning
when !CONFIG_MEMCG, since lock_page_memcg() and unlock_page_memcg()
are empty for this config.
memcg = lock_page_memcg(page1)
(rcu_read_lock() if CONFIG_MEMCG=y)
do something to page1
if (page_memcg_rcu(page2) == memcg)
do something to page2 too as it cannot be migrated away from the
memcg either.
unlock_page_memcg(page1)
(rcu_read_unlock() if CONFIG_MEMCG=y)
Locking/unlocking rcu consistently for both configs is rigorous but it
also forces unnecessary locking upon users who have no interest in
CONFIG_MEMCG.
This patch removes the assertion for !CONFIG_MEMCG, because
page_memcg_rcu() has a few callers and there are no concerns regarding
their correctness at the moment.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
include/linux/memcontrol.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0c04d39a7967..f13dc02cf277 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1077,7 +1077,6 @@ static inline struct mem_cgroup *page_memcg(struct page *page)
static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
{
- WARN_ON_ONCE(!rcu_read_lock_held());
return NULL;
}
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,124 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=ham autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id DD966C43460
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:56:56 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id AD0DD613B1
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:56:56 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S245188AbhDMG5O (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:57:14 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44148 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S245147AbhDMG5D (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:03 -0400
Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AF0D4C061574
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:44 -0700 (PDT)
Received: by mail-yb1-xb4a.google.com with SMTP id e185so6246113ybf.4
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:44 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=EM7U/N62rbxjpd/wy3lwoMJ7CSKXstnqAzc6WMXVO+c=;
b=t/TvdOo7hn9eFyLRcO6IKN2knJLFlMvJD85LqS3p70ezJY9KmJyQnoNmrkIR2uthXy
WmFHutjhP3sNRUFV88YVqyqRzdb/QCULw0znZtShHzf8oRGvUznrafDt1yFbCPXkkI+0
Y1bOuKRWZn44z9QIgS0RLo1mHpFU76jVw8i6GqzSatKn5V3qIjC6li7inmOfVCGRz5Zl
+SxAwEh7kMa92WQx0NoeerKExD4+Xxk3+iMBmL0VuvWnWnvSTan6oFLfspI3Vr1AfObf
fAVPm3SigqMxgdFIo7OoLz/1wI9FPVPrUSETRfh9HMZZzvtlTIxOqZEUvZjaaMCiZtbS
2tUA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=EM7U/N62rbxjpd/wy3lwoMJ7CSKXstnqAzc6WMXVO+c=;
b=rGLib4fG3u5JfGsMESfD549XkyQTbkxo+ZDK2peyx+gBJaLu40gpIMHYqYOJAyzqzG
ix/FmZokOmB2+3Naq4VoOPQoJeMjsTJL0YBtF/6MDHz1/XjT5miqUjxHiUs4UtTo2Du6
F/+TEZ6RtK0ePZqj+F41HO2cFdLMN0FfxwTT86IF0q5FEXGo7ZGqUj/nGxuH9w5dgmHf
9Nskde954GH8rRzCtUmRNHuA8h7Ac3cmaz+uI7FTFiX01W+tcnke/SrzFAqCCl6ML8Ah
6Js8R+1sL+sXe8TtZjGQ2aa7aOQGYsPwyF+SJW5qYMLvDpcoUNdkKpfb2nSVpEKolrJA
C3cg==
X-Gm-Message-State: AOAM533k6NruViQt9bY73WARuw0APJWRdFLtJTsHl/VJrzJggskh0kcA
On0mU/on2LGVIbt6g8dxcT+hA0GZgOI=
X-Google-Smtp-Source: ABdhPJx9dY0CYhzp53dRcd9T1SUoIr4KnxC7LGKi7djvDgAR/DF3q/feIx7ybIki3WMXmS4BOiKGzGOIvao=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:a5b:b4a:: with SMTP id b10mr2519734ybr.182.1618297003935;
Mon, 12 Apr 2021 23:56:43 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:19 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-3-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 02/16] include/linux/nodemask.h: define next_memory_node()
if !CONFIG_NUMA
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-3-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Currently next_memory_node only exists when CONFIG_NUMA=y. This patch
adds the macro for !CONFIG_NUMA.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
include/linux/nodemask.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index ac398e143c9a..89fe4e3592f9 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -486,6 +486,7 @@ static inline int num_node_state(enum node_states state)
#define first_online_node 0
#define first_memory_node 0
#define next_online_node(nid) (MAX_NUMNODES)
+#define next_memory_node(nid) (MAX_NUMNODES)
#define nr_node_ids 1U
#define nr_online_nodes 1U
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,130 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 6E4E7C433ED
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:56:58 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 2A301613EB
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:56:58 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S1345084AbhDMG5Q (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:57:16 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44152 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S237032AbhDMG5F (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:05 -0400
Received: from mail-qv1-xf4a.google.com (mail-qv1-xf4a.google.com [IPv6:2607:f8b0:4864:20::f4a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 26D6FC061574
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:46 -0700 (PDT)
Received: by mail-qv1-xf4a.google.com with SMTP id gu11so7133331qvb.0
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:46 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=ty30EBMFobCGhabQdsuq+v2Kg8uUmEONp40/WyUA1q8=;
b=i8n6+BP4XniO7GqYB3njPBeS1g0cajvT/0XeibRC9E79Y2kxVkXGp/HuAtF4IVW6+L
/n2Z+ZNUjzYoRG1K8TO2KT7wPH4dB0dBfh+QxjE4pa3hFSlYATFkHsATy+5tXCYxPNI5
icwBWKo7lmwEnXOUHSMAZbfasHoawvCVog/UnTwIW6ATbaU4DRzi4r/NM6Dk8D5iMFw0
uINBgxANuIFFKRfVUOyfzXT7qWKDHKlb5wvR3T/4y2+SRO3Xq0OMidUV+vii8Ijbi9C8
OKDCcdJr7BmAzQtIPAXlE+vxaL8G9raL19q09IcdqKKULLNIy57jVK2xtDVpTIbZE6jh
DVMg==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=ty30EBMFobCGhabQdsuq+v2Kg8uUmEONp40/WyUA1q8=;
b=YVzfIBXrv665u9gqpA0aaR8rYQ3ksKwQ6y1pnY3UhRF3H0B9Ey8UftLQ5sEjQSYXf5
4YJG1pSXti7Zr0NjAcVojZxJ3vul55+LG8QsAqvrkxu9kZe9BCPGcZ7CtjYmvAXZMaJS
LTzQMVutjT5FccfRztpgbLs4XZyflvf+EfncOMZ0jVl38t1cj4+1gqSFR9l9ghy+Xj2h
TuyP9qzN8JVm4XYhKfTX+rAB+yQ+CKmVvhh3Oj8O2I0hVOGKHfv1GT2BxP8lsdodzCri
TV4h5qxgSpmrJT5zS82i0VC+Kgi1iQ5lNkeUwKrowIXgTTdj2LkXGChb1hia2Sb2fq/c
/0RA==
X-Gm-Message-State: AOAM532KBvjkAJqjUGm4z3T6vDFjQzVEl4MdDPqiOTi/Sx/00HV2Sk4T
CDYdSIReMsyd3sZTjfEkJQizn1CUbQo=
X-Google-Smtp-Source: ABdhPJz9bP7GjZCXkR9CChLjfI00GuzH9av/gCfg2jgEdkGIxWUcBRwxRgL0Vxc4uB1fdD7yCdL0ylir3GM=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:a05:6214:161:: with SMTP id
y1mr13969669qvs.31.1618297005251; Mon, 12 Apr 2021 23:56:45 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:20 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-4-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 03/16] include/linux/huge_mm.h: define is_huge_zero_pmd()
if !CONFIG_TRANSPARENT_HUGEPAGE
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-4-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Currently is_huge_zero_pmd() only exists when
CONFIG_TRANSPARENT_HUGEPAGE=y. This patch adds the function for
!CONFIG_TRANSPARENT_HUGEPAGE.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
include/linux/huge_mm.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index ba973efcd369..0ba7b3f9029c 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -443,6 +443,11 @@ static inline bool is_huge_zero_page(struct page *page)
return false;
}
+static inline bool is_huge_zero_pmd(pmd_t pmd)
+{
+ return false;
+}
+
static inline bool is_huge_zero_pud(pud_t pud)
{
return false;
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,151 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=ham autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id B1779C433B4
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:56:59 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 93C83613CB
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:56:59 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S1345093AbhDMG5R (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:57:17 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44160 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S237122AbhDMG5G (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:06 -0400
Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 70228C061756
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:47 -0700 (PDT)
Received: by mail-yb1-xb4a.google.com with SMTP id d1so15228352ybj.15
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:47 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=NPo7MPPcRQhwQwi0VkGJEhiUUoPZKpCjODwiJd36ReE=;
b=baGnCiioZTP9ADs7IVEB/mQcb3cvKmCKgg9drauUZQ+Tp4ZFhqV8SVk54iVXXC/g4a
cpq3VBdcxXnUKSenbwAnH9Jp0vcf5HUqcvm0/PItCUte5xo66HxROV5Obn4PGte89xi9
p+R4eomS1+PIS2MLxgShOMpnFvyxeBgpYJvBAHU3FKJ3dtUuQ8TMqtRRYgDLRETQtThQ
kFEKuP+qBTfl6NS1fHTb9BFTIgP5Z/N1DOBc07huBgFItja27dgr56dPRNvm09QqhgN8
KNYrM6tJs6Md4vWQFOufoHl576biAVAYjl1tmh0+nRa81An0lfEfinpclElVWZVJap6f
3K6Q==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=NPo7MPPcRQhwQwi0VkGJEhiUUoPZKpCjODwiJd36ReE=;
b=VQUPKq30uKeUAF6Ejq35xfekJF7nOdr7VngI/76uX8lOU1pIKoO4mC5aTAYeOIOrr8
d9hpCUWEcuxEWFU49K2HTzz6r9TRtei0Z3TR3n5CdNJqIigsBiTmuLGfOPgRfmTdf4p1
Gy4MP3Ln+GHBFflwKZ+f5OPcq+R/slU8HpAWd4KR6PshMeb/Uf/RnHWhIQ3qI8S3QLXv
K66JL1wL5gT1XsIvdtHxoLQ/CLC3QqmB2rSMp/tB7Orqc6DK48r53Kt037j1ALstA0O7
qY6CPZRsbCum+NhqDvT8/KN1dsIkOSEmKUt0TfQc8hUEIm0I2juU0HYZsBV7D9xioz8r
p45w==
X-Gm-Message-State: AOAM533p7SYDUFBf9Ifm7vaTwGtjEO4CrlaCuZ4KoZ7jp3M6fMJFAFBH
4BBDhvIWmrjLJRxSeBVIWDYQXg1lPro=
X-Google-Smtp-Source: ABdhPJyRALAhdJY/7MdeRvaPV8dMvbenEwa1GhqHOoi94XTiY8IwvBzrDPMpa5ltVLi8kkX49f0gbWJD/40=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:a25:8b86:: with SMTP id j6mr39368340ybl.470.1618297006589;
Mon, 12 Apr 2021 23:56:46 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:21 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-5-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 04/16] include/linux/cgroup.h: export cgroup_mutex
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-5-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
cgroup_mutex is needed to synchronize with memcg creations.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
include/linux/cgroup.h | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 4f2f79de083e..bd5744360cfa 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgroup *cgrp)
css_put(&cgrp->self);
}
+extern struct mutex cgroup_mutex;
+
+static inline void cgroup_lock(void)
+{
+ mutex_lock(&cgroup_mutex);
+}
+
+static inline void cgroup_unlock(void)
+{
+ mutex_unlock(&cgroup_mutex);
+}
+
/**
* task_css_set_check - obtain a task's css_set with extra access conditions
* @task: the task to obtain css_set for
@@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgroup *cgrp)
* as locks used during the cgroup_subsys::attach() methods.
*/
#ifdef CONFIG_PROVE_RCU
-extern struct mutex cgroup_mutex;
extern spinlock_t css_set_lock;
#define task_css_set_check(task, __c) \
rcu_dereference_check((task)->cgroups, \
@@ -704,6 +715,8 @@ struct cgroup;
static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
static inline void css_get(struct cgroup_subsys_state *css) {}
static inline void css_put(struct cgroup_subsys_state *css) {}
+static inline void cgroup_lock(void) {}
+static inline void cgroup_unlock(void) {}
static inline int cgroup_attach_task_all(struct task_struct *from,
struct task_struct *t) { return 0; }
static inline int cgroupstats_build(struct cgroupstats *stats,
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,190 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 3D894C433ED
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:01 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 16A5761278
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:01 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S1345104AbhDMG5S (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:57:18 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44168 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S237169AbhDMG5I (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:08 -0400
Received: from mail-qk1-x749.google.com (mail-qk1-x749.google.com [IPv6:2607:f8b0:4864:20::749])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 01AF4C06175F
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:49 -0700 (PDT)
Received: by mail-qk1-x749.google.com with SMTP id j24so9889811qkg.7
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:48 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=kZ40TZQJmz2zt6lYwCpeAnxVbOWM8KwFdCtsfH6CbQ4=;
b=Lo7XMOOHbyzBoRlK8b2GE15qCT4QqS9ijyXSl1ryGVj5Alkuv2mcfhY4vR1gU/ak5i
HPCaNU4SNyd/togq6z9pJeIcKdhVNoakHlBzalPajFLmRC9Qbai2K4MiOiC3w/4zVP3/
NtLrS3pnu6kRnE/1OF1NCyaMABOTJ1Ahmg/dZPqItxMI54CzXgYo6GdLYksK4AzjBKx6
3OPkxOXxP71Nm7Tjl273X7BKZEBEv2cYYpFtO65/dAM6wU+OCRnD0EkkgtX7e7+gTBso
oX16tOXHwiiZ6sLaMJLirvmeW9Lp7bXGjP63ZC1IEHuQFyVaxg7TzhpG+PXULs33Mwht
64KQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=kZ40TZQJmz2zt6lYwCpeAnxVbOWM8KwFdCtsfH6CbQ4=;
b=m5HbExYCzmc21c5OLCzzHa8Xe8EdXvMRiTtiR09Dq8ChzNpcxJHIjjhpQyFMcUJWLj
+EmmgKiIE+uS4OHdEXmzNSv8MNhhEq7kUHf2SgjNDKlYLuCdTyrGG1MSWfK/msnX8s0I
ed03u8uPvY4i5nrXUPDSK0dSOilJdsKsbJ2GZF+UbwvHZb/bl7np8JUMFzrB2dYfV3GD
rJFKMpvlKiHjGv/usQSGWtLVDxlNl2ZH02SQETt2ZwtrhNj3g1Je8bALwt2ZVdzkZCGJ
ieq/RzKjaSqH69A9hehJuecmBRowdH3vtX4JtNR1N62OtoE92KN5JhRy7UIVzomglFHL
9n1A==
X-Gm-Message-State: AOAM533DVaJizLoTWtX7Zoe1e9yCLp7H3odxXAoCcHrMJ9IzNh+lDvEB
F0NqK2LlktrIoIPLMrk68BAVCsE0tyc=
X-Google-Smtp-Source: ABdhPJx0OFD8QshALbNm7ufdWhFpw5ctF+y/1hKbFM42Olw0k5XnLx6uQVu5On95xo6CAByxMQgtMhVbOBY=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:a0c:fa12:: with SMTP id q18mr9972206qvn.2.1618297008125;
Mon, 12 Apr 2021 23:56:48 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:22 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-6-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 05/16] mm/swap.c: export activate_page()
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-6-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
activate_page() is needed to activate pages that are already on lru or
queued in lru_pvecs.lru_add. The exported function is a merger between
the existing activate_page() and __lru_cache_activate_page().
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
include/linux/swap.h | 1 +
mm/swap.c | 28 +++++++++++++++-------------
2 files changed, 16 insertions(+), 13 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4cc6ec3bf0ab..de2bbbf181ba 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -344,6 +344,7 @@ extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_cpu_zone(struct zone *zone);
extern void lru_add_drain_all(void);
extern void rotate_reclaimable_page(struct page *page);
+extern void activate_page(struct page *page);
extern void deactivate_file_page(struct page *page);
extern void deactivate_page(struct page *page);
extern void mark_page_lazyfree(struct page *page);
diff --git a/mm/swap.c b/mm/swap.c
index 31b844d4ed94..f20ed56ebbbf 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -334,7 +334,7 @@ static bool need_activate_page_drain(int cpu)
return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0;
}
-static void activate_page(struct page *page)
+static void activate_page_on_lru(struct page *page)
{
page = compound_head(page);
if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
@@ -354,7 +354,7 @@ static inline void activate_page_drain(int cpu)
{
}
-static void activate_page(struct page *page)
+static void activate_page_on_lru(struct page *page)
{
struct lruvec *lruvec;
@@ -368,11 +368,22 @@ static void activate_page(struct page *page)
}
#endif
-static void __lru_cache_activate_page(struct page *page)
+/*
+ * If the page is on the LRU, queue it for activation via
+ * lru_pvecs.activate_page. Otherwise, assume the page is on a
+ * pagevec, mark it active and it'll be moved to the active
+ * LRU on the next drain.
+ */
+void activate_page(struct page *page)
{
struct pagevec *pvec;
int i;
+ if (PageLRU(page)) {
+ activate_page_on_lru(page);
+ return;
+ }
+
local_lock(&lru_pvecs.lock);
pvec = this_cpu_ptr(&lru_pvecs.lru_add);
@@ -421,16 +432,7 @@ void mark_page_accessed(struct page *page)
* evictable page accessed has no effect.
*/
} else if (!PageActive(page)) {
- /*
- * If the page is on the LRU, queue it for activation via
- * lru_pvecs.activate_page. Otherwise, assume the page is on a
- * pagevec, mark it active and it'll be moved to the active
- * LRU on the next drain.
- */
- if (PageLRU(page))
- activate_page(page);
- else
- __lru_cache_activate_page(page);
+ activate_page(page);
ClearPageReferenced(page);
workingset_activation(page);
}
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,214 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id AE093C433B4
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:02 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 867F3613B6
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:02 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S237032AbhDMG5T (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:57:19 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44174 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S245189AbhDMG5J (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:09 -0400
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 804B0C061756
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:50 -0700 (PDT)
Received: by mail-yb1-xb49.google.com with SMTP id t9so4737272ybd.11
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:50 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=ClG8Hyf5TTtr5EO4ugQ7lEF5g9jg1Khbtn9fiHHHUO4=;
b=Bz/NCIqrnVElEbVXzKEuDo/rZuQj9KS3qgxTKdWtHhz5pm8i/K2zNVWoVZLOT3rUSR
LsBDpHnPsr/ZpnLlRjgBWaTe1LWedpUZEH5ms55YmlHa6b6jgezdJL3RT6PspSs7PC0D
X2Cp8BNNHZoXRtz4WK/5SGU3p+K+AzCV3OWzqDVroA6mh4+0ezV8mgPVSzwRPD5kb0gr
h1rkXixNjOMz9WdBgGoShJ+IdH8LzpJqTgis+qWDrFblJngv4Of0j7VP1YZiUBDZBIO8
UPhfTPDB4QZtT8MN0GMlMXbeAlUWYEo/7WcySgFwiSO0kt7YfrA1ke9uBnFFX4PziJEZ
ISaA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=ClG8Hyf5TTtr5EO4ugQ7lEF5g9jg1Khbtn9fiHHHUO4=;
b=Ln0JHJYmVa2eSlKqpGtl/4uP0U/tFRs/pk5G6Sl8Iec4RrR5oqZdSeZC19j8TSeMUO
DmIZ5X8vhdMmgBAkWF7E4NxzMbBEJfzjseP4tvMHiWSQ+ZWeCLuYCrW6DEaObyCK+T7t
zIVNPEeJOIg1zDbSyPA0EVnJqpe6Gkec8ahBEG03YbyTmfuG6vb0McULQljJ5OhniFfX
UripKlgaIV1a55hf1KsyL81MPaz5nGMe/cCHrm8EHqvFhxWzKWFO1Qk4Tc1VI45wYTHS
YVo0QOvbSbampG2ears9RXvYdJ9QVT1M8JfO5/+bVnbN3VbRLxG7g4jVuwkA4zPKOHYI
dISw==
X-Gm-Message-State: AOAM531fA312edJF5bN6zMI4xlJ2NDI7L0pqlv/7HXEcSl6sGX7pfMuO
8LvKSxlzMxN/BLov7kCFr0vqNk/bYbk=
X-Google-Smtp-Source: ABdhPJwc8JriuoHPQ23GGBqKR69oc5Gp+cE2EiR0xXWJLv2glle7kn2s+OHctKLTVqR0qrsNshOCMzVz8BQ=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:a25:8b0f:: with SMTP id i15mr42151231ybl.277.1618297009506;
Mon, 12 Apr 2021 23:56:49 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:23 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-7-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 06/16] mm, x86: support the access bit on non-leaf PMD entries
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-7-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Some architectures support the accessed bit on non-leaf PMD entries
(parents) in addition to leaf PTE entries (children) where pages are
mapped, e.g., x86_64 sets the accessed bit on a parent when using it
as part of linear-address translation [1]. Page table walkers who are
interested in the accessed bit on children can take advantage of this:
they do not need to search the children when the accessed bit is not
set on a parent, given that they have previously cleared the accessed
bit on this parent.
[1]: Intel 64 and IA-32 Architectures Software Developer's Manual
Volume 3 (October 2019), section 4.8
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
arch/Kconfig | 9 +++++++++
arch/x86/Kconfig | 1 +
arch/x86/include/asm/pgtable.h | 2 +-
arch/x86/mm/pgtable.c | 5 ++++-
include/linux/pgtable.h | 4 ++--
5 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index ecfd3520b676..cbd7f66734ee 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -782,6 +782,15 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE
config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
bool
+config HAVE_ARCH_PARENT_PMD_YOUNG
+ bool
+ depends on PGTABLE_LEVELS > 2
+ help
+ Architectures that select this are able to set the accessed bit on
+ non-leaf PMD entries in addition to leaf PTE entries where pages are
+ mapped. For them, page table walkers that clear the accessed bit may
+ stop at non-leaf PMD entries when they do not see the accessed bit.
+
config HAVE_ARCH_HUGE_VMAP
bool
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2792879d398e..b5972eb82337 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -163,6 +163,7 @@ config X86
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
+ select HAVE_ARCH_PARENT_PMD_YOUNG if X86_64
select HAVE_ARCH_USERFAULTFD_WP if X86_64 && USERFAULTFD
select HAVE_ARCH_VMAP_STACK if X86_64
select HAVE_ARCH_WITHIN_STACK_FRAMES
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a02c67291cfc..a6b5cfe1fc5a 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -846,7 +846,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
static inline int pmd_bad(pmd_t pmd)
{
- return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
+ return ((pmd_flags(pmd) | _PAGE_ACCESSED) & ~_PAGE_USER) != _KERNPG_TABLE;
}
static inline unsigned long pages_to_mb(unsigned long npg)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index f6a9e2e36642..1c27e6f43f80 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
return ret;
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG)
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
@@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
return ret;
}
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
int pudp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pud_t *pudp)
{
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 5e772392a379..08dd9b8c055a 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -193,7 +193,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
#endif
#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG)
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address,
pmd_t *pmdp)
@@ -214,7 +214,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
BUILD_BUG();
return 0;
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG */
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,324 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 51FFEC43460
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:09 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 379F261278
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:09 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S1345127AbhDMG5Z (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:57:25 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44184 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S242333AbhDMG5L (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:11 -0400
Received: from mail-qk1-x74a.google.com (mail-qk1-x74a.google.com [IPv6:2607:f8b0:4864:20::74a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C5CAAC06138C
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:51 -0700 (PDT)
Received: by mail-qk1-x74a.google.com with SMTP id g62so10544674qkf.18
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:51 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=1g9DmXJ6S7uvtuGOH48osWaF0/2fGGaQ6ChmAYKTm4o=;
b=VX7vOS1iaX+Hrwo31qklSok4an751KXHjlIezhTcoCSLXRV871k6PBsw+EibR4qWwF
i7kN3+4V671SYh9T69KvNxd786HKo+6WHv6Cd77TeqTfMbKijle6EBM4m+gl3DmNgnt0
ZA8WH1LPEZfGwn3JGivnRSoUPFkulI9NBk9pGJpe7wwngua0FZfbXjlpD6td2UZKxBbD
sm8Xc+HrppZn5mA4exh2/iFeR515mlnGTrbTx70pum7Y/iYPYQ2/HgcjccRGsGWUBLbF
bSOTnALSUrqOctmdDO2fO0EzfSnndPfVgKwv5QWLNUcXAi3ZlYRs7lyuvShH4lnaJxFe
LTUA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=1g9DmXJ6S7uvtuGOH48osWaF0/2fGGaQ6ChmAYKTm4o=;
b=oh0TJS5Iv72EGLBpsE6HR3bE5cZX3J2uuz3z3TwZZpsfqvBQ4F+ZjeXnT9ZM8znSwl
DwO2yHU9V2acH3+Fw0txUASuMMXsp1h+eHsdlfoqbA5zx2G/8OJbldp/rudOwBO+wc4D
Wu7IiJYBc9jidKDE7Rputac3XOWXhSIhHMN1UGb8rIrlefaHD89A6pEKF6H/v6TSV99v
1MEtFUSmceep3K2EmUGX64fyXznC0KPZIkHHX/LcuC8xgYK2Go0LXGglt5x6U6QQ+Yk8
QGNr4pv1ynAg5b5FcA5bQe34gJ4JarQfXZx82+zF84UGh0Hj4hR4I60qEnSwVJBlCNqE
o7DA==
X-Gm-Message-State: AOAM532mqZo9PBRpK7zpxWavyuHSPxCR5uYKAcywst7dl0qA/ZdHQHKq
TyCJ6Kl6g2of6qtWwfJ7m9Y3UH3EDGM=
X-Google-Smtp-Source: ABdhPJwH+ey8nBGqYBlYs+cX0y6B8vZ/ifwsZXXs+V8u1FJGnhfXc1ufux+fOtI1iR9OnRAE6E9FqbhZIZQ=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:a0c:db82:: with SMTP id m2mr21253979qvk.37.1618297010980;
Mon, 12 Apr 2021 23:56:50 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:24 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-8-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 07/16] mm/vmscan.c: refactor shrink_node()
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-8-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Heuristics that determine scan balance between anon and file LRUs are
rather independent. Move them into a separate function to improve
readability.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
mm/vmscan.c | 186 +++++++++++++++++++++++++++-------------------------
1 file changed, 98 insertions(+), 88 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 562e87cbd7a1..1a24d2e0a4cb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2224,6 +2224,103 @@ enum scan_balance {
SCAN_FILE,
};
+static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
+{
+ unsigned long file;
+ struct lruvec *target_lruvec;
+
+ target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
+
+ /*
+ * Determine the scan balance between anon and file LRUs.
+ */
+ spin_lock_irq(&target_lruvec->lru_lock);
+ sc->anon_cost = target_lruvec->anon_cost;
+ sc->file_cost = target_lruvec->file_cost;
+ spin_unlock_irq(&target_lruvec->lru_lock);
+
+ /*
+ * Target desirable inactive:active list ratios for the anon
+ * and file LRU lists.
+ */
+ if (!sc->force_deactivate) {
+ unsigned long refaults;
+
+ refaults = lruvec_page_state(target_lruvec,
+ WORKINGSET_ACTIVATE_ANON);
+ if (refaults != target_lruvec->refaults[0] ||
+ inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
+ sc->may_deactivate |= DEACTIVATE_ANON;
+ else
+ sc->may_deactivate &= ~DEACTIVATE_ANON;
+
+ /*
+ * When refaults are being observed, it means a new
+ * workingset is being established. Deactivate to get
+ * rid of any stale active pages quickly.
+ */
+ refaults = lruvec_page_state(target_lruvec,
+ WORKINGSET_ACTIVATE_FILE);
+ if (refaults != target_lruvec->refaults[1] ||
+ inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
+ sc->may_deactivate |= DEACTIVATE_FILE;
+ else
+ sc->may_deactivate &= ~DEACTIVATE_FILE;
+ } else
+ sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
+
+ /*
+ * If we have plenty of inactive file pages that aren't
+ * thrashing, try to reclaim those first before touching
+ * anonymous pages.
+ */
+ file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
+ if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
+ sc->cache_trim_mode = 1;
+ else
+ sc->cache_trim_mode = 0;
+
+ /*
+ * Prevent the reclaimer from falling into the cache trap: as
+ * cache pages start out inactive, every cache fault will tip
+ * the scan balance towards the file LRU. And as the file LRU
+ * shrinks, so does the window for rotation from references.
+ * This means we have a runaway feedback loop where a tiny
+ * thrashing file LRU becomes infinitely more attractive than
+ * anon pages. Try to detect this based on file LRU size.
+ */
+ if (!cgroup_reclaim(sc)) {
+ unsigned long total_high_wmark = 0;
+ unsigned long free, anon;
+ int z;
+
+ free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
+ file = node_page_state(pgdat, NR_ACTIVE_FILE) +
+ node_page_state(pgdat, NR_INACTIVE_FILE);
+
+ for (z = 0; z < MAX_NR_ZONES; z++) {
+ struct zone *zone = &pgdat->node_zones[z];
+
+ if (!managed_zone(zone))
+ continue;
+
+ total_high_wmark += high_wmark_pages(zone);
+ }
+
+ /*
+ * Consider anon: if that's low too, this isn't a
+ * runaway file reclaim problem, but rather just
+ * extreme pressure. Reclaim as per usual then.
+ */
+ anon = node_page_state(pgdat, NR_INACTIVE_ANON);
+
+ sc->file_is_tiny =
+ file + free <= total_high_wmark &&
+ !(sc->may_deactivate & DEACTIVATE_ANON) &&
+ anon >> sc->priority;
+ }
+}
+
/*
* Determine how aggressively the anon and file LRU lists should be
* scanned. The relative value of each set of LRU lists is determined
@@ -2669,7 +2766,6 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
unsigned long nr_reclaimed, nr_scanned;
struct lruvec *target_lruvec;
bool reclaimable = false;
- unsigned long file;
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
@@ -2679,93 +2775,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
nr_reclaimed = sc->nr_reclaimed;
nr_scanned = sc->nr_scanned;
- /*
- * Determine the scan balance between anon and file LRUs.
- */
- spin_lock_irq(&target_lruvec->lru_lock);
- sc->anon_cost = target_lruvec->anon_cost;
- sc->file_cost = target_lruvec->file_cost;
- spin_unlock_irq(&target_lruvec->lru_lock);
-
- /*
- * Target desirable inactive:active list ratios for the anon
- * and file LRU lists.
- */
- if (!sc->force_deactivate) {
- unsigned long refaults;
-
- refaults = lruvec_page_state(target_lruvec,
- WORKINGSET_ACTIVATE_ANON);
- if (refaults != target_lruvec->refaults[0] ||
- inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
- sc->may_deactivate |= DEACTIVATE_ANON;
- else
- sc->may_deactivate &= ~DEACTIVATE_ANON;
-
- /*
- * When refaults are being observed, it means a new
- * workingset is being established. Deactivate to get
- * rid of any stale active pages quickly.
- */
- refaults = lruvec_page_state(target_lruvec,
- WORKINGSET_ACTIVATE_FILE);
- if (refaults != target_lruvec->refaults[1] ||
- inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
- sc->may_deactivate |= DEACTIVATE_FILE;
- else
- sc->may_deactivate &= ~DEACTIVATE_FILE;
- } else
- sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
-
- /*
- * If we have plenty of inactive file pages that aren't
- * thrashing, try to reclaim those first before touching
- * anonymous pages.
- */
- file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
- if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
- sc->cache_trim_mode = 1;
- else
- sc->cache_trim_mode = 0;
-
- /*
- * Prevent the reclaimer from falling into the cache trap: as
- * cache pages start out inactive, every cache fault will tip
- * the scan balance towards the file LRU. And as the file LRU
- * shrinks, so does the window for rotation from references.
- * This means we have a runaway feedback loop where a tiny
- * thrashing file LRU becomes infinitely more attractive than
- * anon pages. Try to detect this based on file LRU size.
- */
- if (!cgroup_reclaim(sc)) {
- unsigned long total_high_wmark = 0;
- unsigned long free, anon;
- int z;
-
- free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
- file = node_page_state(pgdat, NR_ACTIVE_FILE) +
- node_page_state(pgdat, NR_INACTIVE_FILE);
-
- for (z = 0; z < MAX_NR_ZONES; z++) {
- struct zone *zone = &pgdat->node_zones[z];
- if (!managed_zone(zone))
- continue;
-
- total_high_wmark += high_wmark_pages(zone);
- }
-
- /*
- * Consider anon: if that's low too, this isn't a
- * runaway file reclaim problem, but rather just
- * extreme pressure. Reclaim as per usual then.
- */
- anon = node_page_state(pgdat, NR_INACTIVE_ANON);
-
- sc->file_is_tiny =
- file + free <= total_high_wmark &&
- !(sc->may_deactivate & DEACTIVATE_ANON) &&
- anon >> sc->priority;
- }
+ prepare_scan_count(pgdat, sc);
shrink_node_memcgs(pgdat, sc);
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,940 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id EF4FEC43462
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:18 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id CFA6161278
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:18 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S1345133AbhDMG5g (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:57:36 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44204 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S1345075AbhDMG5O (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:14 -0400
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C1B27C061342
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:54 -0700 (PDT)
Received: by mail-yb1-xb49.google.com with SMTP id g7so15243258ybm.13
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:54 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=o5Jou7hUitprbLWSkwF9m0rzlQtpjYePVUNvL8744B4=;
b=j0OnRRuICsaUkKDFgMmxVB6XdLNdlw7bkERy4WEKt8hjBSvD+Kp0+iOIcFy8N7824S
fiIZT/4kse0kGwqLNz6aT5fmfZX9JxxYEdOVwlR/Ws0MZO827eTQkQKIlfbqh7xkc4GT
TA7uVRsWqbOXCZgWt9zOAQjOZb/rs2P9QMKUlOFvfucJY2YuTWnwAyhKKGoanMVjppPe
XiDsyf+xl36l8HZCKTFf1nC3jlDQYELifqMsU7LnJQvyp4qL2Ghw5qGYALRz1HLWn1HT
nDo94se9xqkySvHWr7K7F6f3bxkPeLasd/CUo3jf80RHfUmgLwPgfJh9UGJtXbKnz7fZ
QiIQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=o5Jou7hUitprbLWSkwF9m0rzlQtpjYePVUNvL8744B4=;
b=GyMzG4Y9CRlIQTVJmAqzu40iDf9Ip5RESHdeLQAYm+tiJUh2RGVBJa6vKg38UMcgXC
EphRx2fv2WzLbuzG3KYV63fQ6mVN44J7Q5DZllmGANTY0ulI4ONN6upN04OPR+6Py8nD
thVg9bECRFbbKis2TNfSLXbGoO0/p8IfhjTpTAY+/gcDlXuuEwdN42+F5w+mKC73Ybd4
YzMfYRrVWHdmd49KirIiJ2yKVwsTTFfOgJlsRhMjIxnKiDO88ZiQPXOhSThi9Pq3d4xZ
AKWIylGhQNKmESlmvpmEzuo3lhpofz6NtP61MD5kogRHKN8cOrfEwHfr81CTzg1JSAjQ
d+PQ==
X-Gm-Message-State: AOAM530BBghVYsHEGPHYaVOEjeRU+Fi6DhCLAJz+E/4KNkH046B//NxP
jRpr98Lw0DozCkFBmdQ3Y2SqfxcTm/k=
X-Google-Smtp-Source: ABdhPJw4gIvDWjMb3eWqmdPfHBjM8mpzIQ6uMlcwopqsTVyafHAw8KFn3kdXyj3+PrOeIymH0kmLZduE+GQ=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:a5b:f51:: with SMTP id y17mr7630772ybr.398.1618297013927;
Mon, 12 Apr 2021 23:56:53 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:26 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-10-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 09/16] mm: multigenerational lru: activation
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-10-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
For pages accessed multiple times via file descriptors, instead of
activating them upon the second accesses, we activate them based on
the refault rates of their tiers. Pages accessed N times via file
descriptors belong to tier order_base_2(N). Pages from tier 0, i.e.,
those read ahead, accessed once via file descriptors and accessed only
via page tables, are evicted regardless of the refault rate. Pages
from other tiers will be moved to the next generation, i.e.,
activated, if the refault rates of their tiers are higher than that of
tier 0. Each generation contains at most MAX_NR_TIERS tiers, and they
require additional MAX_NR_TIERS-2 bits in page->flags. This feedback
model has a few advantages over the current feedforward model:
1) It has a negligible overhead in the access path because
activations are done in the reclaim path.
2) It takes mapped pages into account and avoids overprotecting
pages accessed multiple times via file descriptors.
3) More tiers offer better protection to pages accessed more than
twice when buffered-I/O-intensive workloads are under memory
pressure.
For pages mapped upon page faults, the accessed bit is set and they
must be properly aged. We add them to the per-zone lists index by
max_seq, i.e., the youngest generation. For pages not in page cache
or swap cache, this can be done easily in the page fault path: we
rename lru_cache_add_inactive_or_unevictable() to
lru_cache_add_page_vma() and add a new parameter, which is set to true
for pages mapped upon page faults. For pages in page cache or swap
cache, we cannot differentiate the page fault path from the read ahead
path at the time we call lru_cache_add() in add_to_page_cache_lru()
and __read_swap_cache_async(). So we add a new function
lru_gen_activation(), which is essentially activate_page(), to move
pages to the per-zone lists indexed by max_seq at a later time.
Hopefully we would find those pages in lru_pvecs.lru_add and simply
set PageActive() on them without having to actually move them.
Finally, we need to be compatible with the existing notion of active
and inactive. We cannot use PageActive() because it is not set on
active pages unless they are isolated, in order to spare the aging the
trouble of clearing it when an active generation becomes inactive. A
new function page_is_active() compares the generation number of a page
with max_seq and max_seq-1 (modulo MAX_NR_GENS), which are considered
active and protected from the eviction. Other generations, which may
or may not exist, are considered inactive.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
fs/proc/task_mmu.c | 3 +-
include/linux/mm_inline.h | 101 +++++++++++++++++++++
include/linux/swap.h | 4 +-
kernel/events/uprobes.c | 2 +-
mm/huge_memory.c | 2 +-
mm/khugepaged.c | 2 +-
mm/memory.c | 14 +--
mm/migrate.c | 2 +-
mm/swap.c | 26 +++---
mm/swapfile.c | 2 +-
mm/userfaultfd.c | 2 +-
mm/vmscan.c | 91 ++++++++++++++++++-
mm/workingset.c | 179 +++++++++++++++++++++++++++++++-------
13 files changed, 371 insertions(+), 59 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e862cab69583..d292f20c4e3d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -19,6 +19,7 @@
#include <linux/shmem_fs.h>
#include <linux/uaccess.h>
#include <linux/pkeys.h>
+#include <linux/mm_inline.h>
#include <asm/elf.h>
#include <asm/tlb.h>
@@ -1718,7 +1719,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
if (PageSwapCache(page))
md->swapcache += nr_pages;
- if (PageActive(page) || PageUnevictable(page))
+ if (PageUnevictable(page) || page_is_active(compound_head(page), NULL))
md->active += nr_pages;
if (PageWriteback(page))
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 2bf910eb3dd7..5eb4b12972ec 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -95,6 +95,12 @@ static inline int lru_gen_from_seq(unsigned long seq)
return seq % MAX_NR_GENS;
}
+/* Convert the level of usage to a tier. See the comment on MAX_NR_TIERS. */
+static inline int lru_tier_from_usage(int usage)
+{
+ return order_base_2(usage + 1);
+}
+
/* Return a proper index regardless whether we keep a full history of stats. */
static inline int sid_from_seq_or_gen(int seq_or_gen)
{
@@ -238,12 +244,93 @@ static inline bool lru_gen_deletion(struct page *page, struct lruvec *lruvec)
return true;
}
+/* Activate a page from page cache or swap cache after it's mapped. */
+static inline void lru_gen_activation(struct page *page, struct vm_area_struct *vma)
+{
+ if (!lru_gen_enabled())
+ return;
+
+ if (PageActive(page) || PageUnevictable(page) || vma_is_dax(vma) ||
+ (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)))
+ return;
+ /*
+ * TODO: pass vm_fault to add_to_page_cache_lru() and
+ * __read_swap_cache_async() so they can activate pages directly when in
+ * the page fault path.
+ */
+ activate_page(page);
+}
+
/* Return -1 when a page is not on a list of the multigenerational lru. */
static inline int page_lru_gen(struct page *page)
{
return ((READ_ONCE(page->flags) & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
}
+/* This function works regardless whether the multigenerational lru is enabled. */
+static inline bool page_is_active(struct page *page, struct lruvec *lruvec)
+{
+ struct mem_cgroup *memcg;
+ int gen = page_lru_gen(page);
+ bool active = false;
+
+ VM_BUG_ON_PAGE(PageTail(page), page);
+
+ if (gen < 0)
+ return PageActive(page);
+
+ if (lruvec) {
+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
+ VM_BUG_ON_PAGE(PageActive(page), page);
+ lockdep_assert_held(&lruvec->lru_lock);
+
+ return lru_gen_is_active(lruvec, gen);
+ }
+
+ rcu_read_lock();
+
+ memcg = page_memcg_rcu(page);
+ lruvec = mem_cgroup_lruvec(memcg, page_pgdat(page));
+ active = lru_gen_is_active(lruvec, gen);
+
+ rcu_read_unlock();
+
+ return active;
+}
+
+/* Return the level of usage of a page. See the comment on MAX_NR_TIERS. */
+static inline int page_tier_usage(struct page *page)
+{
+ unsigned long flags = READ_ONCE(page->flags);
+
+ return flags & BIT(PG_workingset) ?
+ ((flags & LRU_USAGE_MASK) >> LRU_USAGE_PGOFF) + 1 : 0;
+}
+
+/* Increment the usage counter after a page is accessed via file descriptors. */
+static inline bool page_inc_usage(struct page *page)
+{
+ unsigned long old_flags, new_flags;
+
+ if (!lru_gen_enabled())
+ return PageActive(page);
+
+ do {
+ old_flags = READ_ONCE(page->flags);
+
+ if (!(old_flags & BIT(PG_workingset)))
+ new_flags = old_flags | BIT(PG_workingset);
+ else
+ new_flags = (old_flags & ~LRU_USAGE_MASK) | min(LRU_USAGE_MASK,
+ (old_flags & LRU_USAGE_MASK) + BIT(LRU_USAGE_PGOFF));
+
+ if (old_flags == new_flags)
+ break;
+ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
+
+ return true;
+}
+
#else /* CONFIG_LRU_GEN */
static inline bool lru_gen_enabled(void)
@@ -261,6 +348,20 @@ static inline bool lru_gen_deletion(struct page *page, struct lruvec *lruvec)
return false;
}
+static inline void lru_gen_activation(struct page *page, struct vm_area_struct *vma)
+{
+}
+
+static inline bool page_is_active(struct page *page, struct lruvec *lruvec)
+{
+ return PageActive(page);
+}
+
+static inline bool page_inc_usage(struct page *page)
+{
+ return PageActive(page);
+}
+
#endif /* CONFIG_LRU_GEN */
static __always_inline void add_page_to_lru_list(struct page *page,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index de2bbbf181ba..0e7532c7db22 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -350,8 +350,8 @@ extern void deactivate_page(struct page *page);
extern void mark_page_lazyfree(struct page *page);
extern void swap_setup(void);
-extern void lru_cache_add_inactive_or_unevictable(struct page *page,
- struct vm_area_struct *vma);
+extern void lru_cache_add_page_vma(struct page *page, struct vm_area_struct *vma,
+ bool faulting);
/* linux/mm/vmscan.c */
extern unsigned long zone_reclaimable_pages(struct zone *zone);
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6addc9780319..4e93e5602723 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -184,7 +184,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
if (new_page) {
get_page(new_page);
page_add_new_anon_rmap(new_page, vma, addr, false);
- lru_cache_add_inactive_or_unevictable(new_page, vma);
+ lru_cache_add_page_vma(new_page, vma, false);
} else
/* no new page, just dec_mm_counter for old_page */
dec_mm_counter(mm, MM_ANONPAGES);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 26d3cc4a7a0b..2cf46270c84b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -637,7 +637,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
page_add_new_anon_rmap(page, vma, haddr, true);
- lru_cache_add_inactive_or_unevictable(page, vma);
+ lru_cache_add_page_vma(page, vma, true);
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a7d6cb912b05..08a43910f232 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1199,7 +1199,7 @@ static void collapse_huge_page(struct mm_struct *mm,
spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
page_add_new_anon_rmap(new_page, vma, address, true);
- lru_cache_add_inactive_or_unevictable(new_page, vma);
+ lru_cache_add_page_vma(new_page, vma, true);
pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, address, pmd, _pmd);
update_mmu_cache_pmd(vma, address, pmd);
diff --git a/mm/memory.c b/mm/memory.c
index 550405fc3b5e..9a6cb6d31430 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -73,6 +73,7 @@
#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/vmalloc.h>
+#include <linux/mm_inline.h>
#include <trace/events/kmem.h>
@@ -839,7 +840,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
copy_user_highpage(new_page, page, addr, src_vma);
__SetPageUptodate(new_page);
page_add_new_anon_rmap(new_page, dst_vma, addr, false);
- lru_cache_add_inactive_or_unevictable(new_page, dst_vma);
+ lru_cache_add_page_vma(new_page, dst_vma, false);
rss[mm_counter(new_page)]++;
/* All done, just insert the new page copy in the child */
@@ -2907,7 +2908,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
*/
ptep_clear_flush_notify(vma, vmf->address, vmf->pte);
page_add_new_anon_rmap(new_page, vma, vmf->address, false);
- lru_cache_add_inactive_or_unevictable(new_page, vma);
+ lru_cache_add_page_vma(new_page, vma, true);
/*
* We call the notify macro here because, when using secondary
* mmu page tables (such as kvm shadow page tables), we want the
@@ -3438,9 +3439,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
/* ksm created a completely new copy */
if (unlikely(page != swapcache && swapcache)) {
page_add_new_anon_rmap(page, vma, vmf->address, false);
- lru_cache_add_inactive_or_unevictable(page, vma);
+ lru_cache_add_page_vma(page, vma, true);
} else {
do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
+ lru_gen_activation(page, vma);
}
swap_free(entry);
@@ -3584,7 +3586,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, vmf->address, false);
- lru_cache_add_inactive_or_unevictable(page, vma);
+ lru_cache_add_page_vma(page, vma, true);
setpte:
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
@@ -3709,6 +3711,7 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR);
page_add_file_rmap(page, true);
+ lru_gen_activation(page, vma);
/*
* deposit and withdraw with pmd lock held
*/
@@ -3752,10 +3755,11 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
if (write && !(vma->vm_flags & VM_SHARED)) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, addr, false);
- lru_cache_add_inactive_or_unevictable(page, vma);
+ lru_cache_add_page_vma(page, vma, true);
} else {
inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
page_add_file_rmap(page, false);
+ lru_gen_activation(page, vma);
}
set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 62b81d5257aa..1064b03cac33 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -3004,7 +3004,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
inc_mm_counter(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, addr, false);
if (!is_zone_device_page(page))
- lru_cache_add_inactive_or_unevictable(page, vma);
+ lru_cache_add_page_vma(page, vma, false);
get_page(page);
if (flush) {
diff --git a/mm/swap.c b/mm/swap.c
index f20ed56ebbbf..d6458ee1e9f8 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -306,7 +306,7 @@ void lru_note_cost_page(struct page *page)
static void __activate_page(struct page *page, struct lruvec *lruvec)
{
- if (!PageActive(page) && !PageUnevictable(page)) {
+ if (!PageUnevictable(page) && !page_is_active(page, lruvec)) {
int nr_pages = thp_nr_pages(page);
del_page_from_lru_list(page, lruvec);
@@ -337,7 +337,7 @@ static bool need_activate_page_drain(int cpu)
static void activate_page_on_lru(struct page *page)
{
page = compound_head(page);
- if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+ if (PageLRU(page) && !PageUnevictable(page) && !page_is_active(page, NULL)) {
struct pagevec *pvec;
local_lock(&lru_pvecs.lock);
@@ -431,7 +431,7 @@ void mark_page_accessed(struct page *page)
* this list is never rotated or maintained, so marking an
* evictable page accessed has no effect.
*/
- } else if (!PageActive(page)) {
+ } else if (!page_inc_usage(page)) {
activate_page(page);
ClearPageReferenced(page);
workingset_activation(page);
@@ -467,15 +467,14 @@ void lru_cache_add(struct page *page)
EXPORT_SYMBOL(lru_cache_add);
/**
- * lru_cache_add_inactive_or_unevictable
+ * lru_cache_add_page_vma
* @page: the page to be added to LRU
* @vma: vma in which page is mapped for determining reclaimability
*
- * Place @page on the inactive or unevictable LRU list, depending on its
- * evictability.
+ * Place @page on an LRU list, depending on its evictability.
*/
-void lru_cache_add_inactive_or_unevictable(struct page *page,
- struct vm_area_struct *vma)
+void lru_cache_add_page_vma(struct page *page, struct vm_area_struct *vma,
+ bool faulting)
{
bool unevictable;
@@ -492,6 +491,11 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
__mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
}
+
+ /* tell the multigenerational lru that the page is being faulted in */
+ if (lru_gen_enabled() && !unevictable && faulting)
+ SetPageActive(page);
+
lru_cache_add(page);
}
@@ -518,7 +522,7 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
*/
static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
{
- bool active = PageActive(page);
+ bool active = page_is_active(page, lruvec);
int nr_pages = thp_nr_pages(page);
if (PageUnevictable(page))
@@ -558,7 +562,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
{
- if (PageActive(page) && !PageUnevictable(page)) {
+ if (!PageUnevictable(page) && page_is_active(page, lruvec)) {
int nr_pages = thp_nr_pages(page);
del_page_from_lru_list(page, lruvec);
@@ -672,7 +676,7 @@ void deactivate_file_page(struct page *page)
*/
void deactivate_page(struct page *page)
{
- if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
+ if (PageLRU(page) && !PageUnevictable(page) && page_is_active(page, NULL)) {
struct pagevec *pvec;
local_lock(&lru_pvecs.lock);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index c6041d10a73a..ab3b5ca404fd 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1936,7 +1936,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
page_add_anon_rmap(page, vma, addr, false);
} else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, addr, false);
- lru_cache_add_inactive_or_unevictable(page, vma);
+ lru_cache_add_page_vma(page, vma, false);
}
swap_free(entry);
out:
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 9a3d451402d7..e1d4cd3103b8 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -123,7 +123,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
inc_mm_counter(dst_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
- lru_cache_add_inactive_or_unevictable(page, dst_vma);
+ lru_cache_add_page_vma(page, dst_vma, true);
set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8559bb94d452..c74ebe2039f7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -898,9 +898,11 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
if (PageSwapCache(page)) {
swp_entry_t swap = { .val = page_private(page) };
- mem_cgroup_swapout(page, swap);
+
+ /* get a shadow entry before page_memcg() is cleared */
if (reclaimed && !mapping_exiting(mapping))
shadow = workingset_eviction(page, target_memcg);
+ mem_cgroup_swapout(page, swap);
__delete_from_swap_cache(page, swap, shadow);
xa_unlock_irqrestore(&mapping->i_pages, flags);
put_swap_page(page, swap);
@@ -4375,6 +4377,93 @@ static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
get_nr_gens(lruvec, 1) <= MAX_NR_GENS;
}
+/******************************************************************************
+ * refault feedback loop
+ ******************************************************************************/
+
+/*
+ * A feedback loop modeled after the PID controller. Currently supports the
+ * proportional (P) and the integral (I) terms; the derivative (D) term can be
+ * added if necessary. The setpoint (SP) is the desired position; the process
+ * variable (PV) is the measured position. The error is the difference between
+ * the SP and the PV. A positive error results in a positive control output
+ * correction, which, in our case, is to allow eviction.
+ *
+ * The P term is the current refault rate refaulted/(evicted+activated), which
+ * has a weight of 1. The I term is the arithmetic mean of the last N refault
+ * rates, weighted by geometric series 1/2, 1/4, ..., 1/(1<<N).
+ *
+ * Our goal is to make sure upper tiers have similar refault rates as the base
+ * tier. That is we try to be fair to all tiers by maintaining similar refault
+ * rates across them.
+ */
+struct controller_pos {
+ unsigned long refaulted;
+ unsigned long total;
+ int gain;
+};
+
+static void read_controller_pos(struct controller_pos *pos, struct lruvec *lruvec,
+ int file, int tier, int gain)
+{
+ struct lrugen *lrugen = &lruvec->evictable;
+ int sid = sid_from_seq_or_gen(lrugen->min_seq[file]);
+
+ pos->refaulted = lrugen->avg_refaulted[file][tier] +
+ atomic_long_read(&lrugen->refaulted[sid][file][tier]);
+ pos->total = lrugen->avg_total[file][tier] +
+ atomic_long_read(&lrugen->evicted[sid][file][tier]);
+ if (tier)
+ pos->total += lrugen->activated[sid][file][tier - 1];
+ pos->gain = gain;
+}
+
+static void reset_controller_pos(struct lruvec *lruvec, int gen, int file)
+{
+ int tier;
+ int sid = sid_from_seq_or_gen(gen);
+ struct lrugen *lrugen = &lruvec->evictable;
+ bool carryover = gen == lru_gen_from_seq(lrugen->min_seq[file]);
+
+ if (!carryover && NR_STAT_GENS == 1)
+ return;
+
+ for (tier = 0; tier < MAX_NR_TIERS; tier++) {
+ if (carryover) {
+ unsigned long sum;
+
+ sum = lrugen->avg_refaulted[file][tier] +
+ atomic_long_read(&lrugen->refaulted[sid][file][tier]);
+ WRITE_ONCE(lrugen->avg_refaulted[file][tier], sum >> 1);
+
+ sum = lrugen->avg_total[file][tier] +
+ atomic_long_read(&lrugen->evicted[sid][file][tier]);
+ if (tier)
+ sum += lrugen->activated[sid][file][tier - 1];
+ WRITE_ONCE(lrugen->avg_total[file][tier], sum >> 1);
+
+ if (NR_STAT_GENS > 1)
+ continue;
+ }
+
+ atomic_long_set(&lrugen->refaulted[sid][file][tier], 0);
+ atomic_long_set(&lrugen->evicted[sid][file][tier], 0);
+ if (tier)
+ WRITE_ONCE(lrugen->activated[sid][file][tier - 1], 0);
+ }
+}
+
+static bool positive_ctrl_err(struct controller_pos *sp, struct controller_pos *pv)
+{
+ /*
+ * Allow eviction if the PV has a limited number of refaulted pages or a
+ * lower refault rate than the SP.
+ */
+ return pv->refaulted < SWAP_CLUSTER_MAX ||
+ pv->refaulted * max(sp->total, 1UL) * sp->gain <=
+ sp->refaulted * max(pv->total, 1UL) * pv->gain;
+}
+
/******************************************************************************
* state change
******************************************************************************/
diff --git a/mm/workingset.c b/mm/workingset.c
index cd39902c1062..df363f9419fc 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -168,9 +168,9 @@
* refault distance will immediately activate the refaulting page.
*/
-#define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \
- 1 + NODES_SHIFT + MEM_CGROUP_ID_SHIFT)
-#define EVICTION_MASK (~0UL >> EVICTION_SHIFT)
+#define EVICTION_SHIFT (BITS_PER_XA_VALUE - MEM_CGROUP_ID_SHIFT - NODES_SHIFT)
+#define EVICTION_MASK (BIT(EVICTION_SHIFT) - 1)
+#define WORKINGSET_WIDTH 1
/*
* Eviction timestamps need to be able to cover the full range of
@@ -182,38 +182,139 @@
*/
static unsigned int bucket_order __read_mostly;
-static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction,
- bool workingset)
+static void *pack_shadow(int memcg_id, struct pglist_data *pgdat, unsigned long val)
{
- eviction >>= bucket_order;
- eviction &= EVICTION_MASK;
- eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
- eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
- eviction = (eviction << 1) | workingset;
+ val = (val << MEM_CGROUP_ID_SHIFT) | memcg_id;
+ val = (val << NODES_SHIFT) | pgdat->node_id;
- return xa_mk_value(eviction);
+ return xa_mk_value(val);
}
-static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
- unsigned long *evictionp, bool *workingsetp)
+static unsigned long unpack_shadow(void *shadow, int *memcg_id, struct pglist_data **pgdat)
{
- unsigned long entry = xa_to_value(shadow);
- int memcgid, nid;
- bool workingset;
-
- workingset = entry & 1;
- entry >>= 1;
- nid = entry & ((1UL << NODES_SHIFT) - 1);
- entry >>= NODES_SHIFT;
- memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
- entry >>= MEM_CGROUP_ID_SHIFT;
-
- *memcgidp = memcgid;
- *pgdat = NODE_DATA(nid);
- *evictionp = entry << bucket_order;
- *workingsetp = workingset;
+ unsigned long val = xa_to_value(shadow);
+
+ *pgdat = NODE_DATA(val & (BIT(NODES_SHIFT) - 1));
+ val >>= NODES_SHIFT;
+ *memcg_id = val & (BIT(MEM_CGROUP_ID_SHIFT) - 1);
+
+ return val >> MEM_CGROUP_ID_SHIFT;
+}
+
+#ifdef CONFIG_LRU_GEN
+
+#if LRU_GEN_SHIFT + LRU_USAGE_SHIFT >= EVICTION_SHIFT
+#error "Please try smaller NODES_SHIFT, NR_LRU_GENS and TIERS_PER_GEN configurations"
+#endif
+
+static void page_set_usage(struct page *page, int usage)
+{
+ unsigned long old_flags, new_flags;
+
+ VM_BUG_ON(usage > BIT(LRU_USAGE_WIDTH));
+
+ if (!usage)
+ return;
+
+ do {
+ old_flags = READ_ONCE(page->flags);
+ new_flags = (old_flags & ~LRU_USAGE_MASK) | LRU_TIER_FLAGS |
+ ((usage - 1UL) << LRU_USAGE_PGOFF);
+ if (old_flags == new_flags)
+ break;
+ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
+}
+
+/* Return a token to be stored in the shadow entry of a page being evicted. */
+static void *lru_gen_eviction(struct page *page)
+{
+ int sid, tier;
+ unsigned long token;
+ unsigned long min_seq;
+ struct lruvec *lruvec;
+ struct lrugen *lrugen;
+ int file = page_is_file_lru(page);
+ int usage = page_tier_usage(page);
+ struct mem_cgroup *memcg = page_memcg(page);
+ struct pglist_data *pgdat = page_pgdat(page);
+
+ if (!lru_gen_enabled())
+ return NULL;
+
+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
+ lrugen = &lruvec->evictable;
+ min_seq = READ_ONCE(lrugen->min_seq[file]);
+ token = (min_seq << LRU_USAGE_SHIFT) | usage;
+
+ sid = sid_from_seq_or_gen(min_seq);
+ tier = lru_tier_from_usage(usage);
+ atomic_long_add(thp_nr_pages(page), &lrugen->evicted[sid][file][tier]);
+
+ return pack_shadow(mem_cgroup_id(memcg), pgdat, token);
+}
+
+/* Account a refaulted page based on the token stored in its shadow entry. */
+static bool lru_gen_refault(struct page *page, void *shadow)
+{
+ int sid, tier, usage;
+ int memcg_id;
+ unsigned long token;
+ unsigned long min_seq;
+ struct lruvec *lruvec;
+ struct lrugen *lrugen;
+ struct pglist_data *pgdat;
+ struct mem_cgroup *memcg;
+ int file = page_is_file_lru(page);
+
+ if (!lru_gen_enabled())
+ return false;
+
+ token = unpack_shadow(shadow, &memcg_id, &pgdat);
+ if (page_pgdat(page) != pgdat)
+ return true;
+
+ rcu_read_lock();
+ memcg = page_memcg_rcu(page);
+ if (mem_cgroup_id(memcg) != memcg_id)
+ goto unlock;
+
+ usage = token & (BIT(LRU_USAGE_SHIFT) - 1);
+ token >>= LRU_USAGE_SHIFT;
+
+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
+ lrugen = &lruvec->evictable;
+ min_seq = READ_ONCE(lrugen->min_seq[file]);
+ if (token != (min_seq & (EVICTION_MASK >> LRU_USAGE_SHIFT)))
+ goto unlock;
+
+ page_set_usage(page, usage);
+
+ sid = sid_from_seq_or_gen(min_seq);
+ tier = lru_tier_from_usage(usage);
+ atomic_long_add(thp_nr_pages(page), &lrugen->refaulted[sid][file][tier]);
+ inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file);
+ if (tier)
+ inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file);
+unlock:
+ rcu_read_unlock();
+
+ return true;
+}
+
+#else /* CONFIG_LRU_GEN */
+
+static void *lru_gen_eviction(struct page *page)
+{
+ return NULL;
}
+static bool lru_gen_refault(struct page *page, void *shadow)
+{
+ return false;
+}
+
+#endif /* CONFIG_LRU_GEN */
+
/**
* workingset_age_nonresident - age non-resident entries as LRU ages
* @lruvec: the lruvec that was aged
@@ -256,18 +357,25 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
unsigned long eviction;
struct lruvec *lruvec;
int memcgid;
+ void *shadow;
/* Page is fully exclusive and pins page's memory cgroup pointer */
VM_BUG_ON_PAGE(PageLRU(page), page);
VM_BUG_ON_PAGE(page_count(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
+ shadow = lru_gen_eviction(page);
+ if (shadow)
+ return shadow;
+
lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
/* XXX: target_memcg can be NULL, go through lruvec */
memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
eviction = atomic_long_read(&lruvec->nonresident_age);
+ eviction >>= bucket_order;
+ eviction = (eviction << WORKINGSET_WIDTH) | PageWorkingset(page);
workingset_age_nonresident(lruvec, thp_nr_pages(page));
- return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
+ return pack_shadow(memcgid, pgdat, eviction);
}
/**
@@ -294,7 +402,10 @@ void workingset_refault(struct page *page, void *shadow)
bool workingset;
int memcgid;
- unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
+ if (lru_gen_refault(page, shadow))
+ return;
+
+ eviction = unpack_shadow(shadow, &memcgid, &pgdat);
rcu_read_lock();
/*
@@ -318,6 +429,8 @@ void workingset_refault(struct page *page, void *shadow)
goto out;
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
+ workingset = eviction & (BIT(WORKINGSET_WIDTH) - 1);
+ eviction = (eviction >> WORKINGSET_WIDTH) << bucket_order;
/*
* Calculate the refault distance
@@ -335,7 +448,7 @@ void workingset_refault(struct page *page, void *shadow)
* longest time, so the occasional inappropriate activation
* leading to pressure on the active list is not a problem.
*/
- refault_distance = (refault - eviction) & EVICTION_MASK;
+ refault_distance = (refault - eviction) & (EVICTION_MASK >> WORKINGSET_WIDTH);
/*
* The activation decision for this page is made at the level
@@ -594,7 +707,7 @@ static int __init workingset_init(void)
unsigned int max_order;
int ret;
- BUILD_BUG_ON(BITS_PER_LONG < EVICTION_SHIFT);
+ BUILD_BUG_ON(EVICTION_SHIFT < WORKINGSET_WIDTH);
/*
* Calculate the eviction bucket size to cover the longest
* actionable refault distance, which is currently half of
@@ -602,7 +715,7 @@ static int __init workingset_init(void)
* some more pages at runtime, so keep working with up to
* double the initial memory by using totalram_pages as-is.
*/
- timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT;
+ timestamp_bits = EVICTION_SHIFT - WORKINGSET_WIDTH;
max_order = fls_long(totalram_pages() - 1);
if (max_order > timestamp_bits)
bucket_order = max_order - timestamp_bits;
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,814 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 584E2C433B4
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:24 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 364F560FDB
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:24 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S1345163AbhDMG5l (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:57:41 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44208 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S1345079AbhDMG5P (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:15 -0400
Received: from mail-qt1-x849.google.com (mail-qt1-x849.google.com [IPv6:2607:f8b0:4864:20::849])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 33EA5C061574
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:56 -0700 (PDT)
Received: by mail-qt1-x849.google.com with SMTP id o15so661346qtq.20
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:56 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=cu41ToFIF+otWxvIfaf0+qpOwdOuOIiyHS34SY2MKlA=;
b=DVE8DNOLCIkGnchiTSJf1aDqFVGLrvEGecUeUN0sDIHBw/EmgoB7xYiwrDwlmTJzfB
7mJ9wgXcC3xTW/xg8bwqYmzHvC/L4X4KSoDnIWPKnc562ObAH2IGWhiD3korjYqggzne
pjoL+Xglz7D6A6bOmM8M5cZKQhXRisrB5aDyIVUvRJmQLTWP2WB2n4JPqTvP/wVMQ9Sn
hXTZFKELKJbKA+BHU0pwjNA7cFy1nW2rJ9X9d+VP21+ThijMrCLuken/5O6OvPkUefZl
sakH+0tV7Yy/fR7EVGJoWcpUjUiGxd6+0AUNvryVNuijwkPETOtPNH6UfyfgZ6xdkl9P
OYsw==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=cu41ToFIF+otWxvIfaf0+qpOwdOuOIiyHS34SY2MKlA=;
b=Hv9g3zJZwz81XIFNxdDjfOsfsikJNtFff85YKjuCIJR3ru0Fl/o3i0TbhFzOjTUKBt
yhJLayQgM9XxSudGQ47m0Ya49B4k58xttPSNqFNA93EXYaxcUN7fG8T+ZYA0VxA96PeD
qZHRzegQrJ6SM3hYDYpBhvClDfl9zRD0Gpns+vVl2DjteDrRi+wekSzyz6MvMlGhtb/s
F1O38FNuucDx0CgK/so+BE9vzBcN8TzGAU9OaMBW6lDAhAcq+NxEl32LeO/a/P6Oz9A1
x77ZeDzQXRkpTd7y0bgBYZWdg+h/cc09EJonEBfUTa9tDdaDfqMhPlllI6ZHFFJYrlkh
gSDw==
X-Gm-Message-State: AOAM530hiDEzMAP2in3GTJKn5AqypprG9ZgOZOECg5xoh9CUzK15XTUw
0N5X5CtrUDDlCTAUV9QB3qMFCzKiHHg=
X-Google-Smtp-Source: ABdhPJzMmLOgNcb9fea/k5rqaH2vAtKGPRWVf2ZxGZXPr5TIM1jkpFwnMYJAYMnOr+dtOuXM8dcYCymh2hY=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:a05:6214:7ed:: with SMTP id
bp13mr7059024qvb.17.1618297015323; Mon, 12 Apr 2021 23:56:55 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:27 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-11-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 10/16] mm: multigenerational lru: mm_struct list
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-11-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
In order to scan page tables, we add an infrastructure to maintain
either a system-wide mm_struct list or per-memcg mm_struct lists.
Multiple threads can concurrently work on the same mm_struct list, and
each of them will be given a different mm_struct.
This infrastructure also tracks whether an mm_struct is being used on
any CPUs or has been used since the last time a worker looked at it.
In other words, workers will not be given an mm_struct that belongs to
a process that has been sleeping.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
fs/exec.c | 2 +
include/linux/memcontrol.h | 6 +
include/linux/mm_types.h | 117 ++++++++++++++
include/linux/mmzone.h | 2 -
kernel/exit.c | 1 +
kernel/fork.c | 10 ++
kernel/kthread.c | 1 +
kernel/sched/core.c | 2 +
mm/memcontrol.c | 28 ++++
mm/vmscan.c | 316 +++++++++++++++++++++++++++++++++++++
10 files changed, 483 insertions(+), 2 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c
index 18594f11c31f..c691d4d7720c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1008,6 +1008,7 @@ static int exec_mmap(struct mm_struct *mm)
active_mm = tsk->active_mm;
tsk->active_mm = mm;
tsk->mm = mm;
+ lru_gen_add_mm(mm);
/*
* This prevents preemption while active_mm is being loaded and
* it and mm are being updated, which could cause problems for
@@ -1018,6 +1019,7 @@ static int exec_mmap(struct mm_struct *mm)
if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
local_irq_enable();
activate_mm(active_mm, mm);
+ lru_gen_switch_mm(active_mm, mm);
if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
local_irq_enable();
tsk->mm->vmacache_seqnum = 0;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f13dc02cf277..cff95ed1ee2b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -212,6 +212,8 @@ struct obj_cgroup {
};
};
+struct lru_gen_mm_list;
+
/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per cgroup. We would eventually like to provide
@@ -335,6 +337,10 @@ struct mem_cgroup {
struct deferred_split deferred_split_queue;
#endif
+#ifdef CONFIG_LRU_GEN
+ struct lru_gen_mm_list *mm_list;
+#endif
+
struct mem_cgroup_per_node *nodeinfo[0];
/* WARNING: nodeinfo must be the last member here */
};
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6613b26a8894..f8a239fbb958 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -15,6 +15,8 @@
#include <linux/page-flags-layout.h>
#include <linux/workqueue.h>
#include <linux/seqlock.h>
+#include <linux/nodemask.h>
+#include <linux/mmdebug.h>
#include <asm/mmu.h>
@@ -383,6 +385,8 @@ struct core_state {
struct completion startup;
};
+#define ANON_AND_FILE 2
+
struct kioctx_table;
struct mm_struct {
struct {
@@ -561,6 +565,22 @@ struct mm_struct {
#ifdef CONFIG_IOMMU_SUPPORT
u32 pasid;
+#endif
+#ifdef CONFIG_LRU_GEN
+ struct {
+ /* the node of a global or per-memcg mm_struct list */
+ struct list_head list;
+#ifdef CONFIG_MEMCG
+ /* points to memcg of the owner task above */
+ struct mem_cgroup *memcg;
+#endif
+ /* whether this mm_struct has been used since the last walk */
+ nodemask_t nodes[ANON_AND_FILE];
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ /* the number of CPUs using this mm_struct */
+ atomic_t nr_cpus;
+#endif
+ } lrugen;
#endif
} __randomize_layout;
@@ -588,6 +608,103 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
return (struct cpumask *)&mm->cpu_bitmap;
}
+#ifdef CONFIG_LRU_GEN
+
+void lru_gen_init_mm(struct mm_struct *mm);
+void lru_gen_add_mm(struct mm_struct *mm);
+void lru_gen_del_mm(struct mm_struct *mm);
+#ifdef CONFIG_MEMCG
+int lru_gen_alloc_mm_list(struct mem_cgroup *memcg);
+void lru_gen_free_mm_list(struct mem_cgroup *memcg);
+void lru_gen_migrate_mm(struct mm_struct *mm);
+#endif
+
+/*
+ * Track the usage so mm_struct's that haven't been used since the last walk can
+ * be skipped. This function adds a theoretical overhead to each context switch,
+ * which hasn't been measurable.
+ */
+static inline void lru_gen_switch_mm(struct mm_struct *old, struct mm_struct *new)
+{
+ int file;
+
+ /* exclude init_mm, efi_mm, etc. */
+ if (!core_kernel_data((unsigned long)old)) {
+ VM_BUG_ON(old == &init_mm);
+
+ for (file = 0; file < ANON_AND_FILE; file++)
+ nodes_setall(old->lrugen.nodes[file]);
+
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ atomic_dec(&old->lrugen.nr_cpus);
+ VM_BUG_ON_MM(atomic_read(&old->lrugen.nr_cpus) < 0, old);
+#endif
+ } else
+ VM_BUG_ON_MM(READ_ONCE(old->lrugen.list.prev) ||
+ READ_ONCE(old->lrugen.list.next), old);
+
+ if (!core_kernel_data((unsigned long)new)) {
+ VM_BUG_ON(new == &init_mm);
+
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ atomic_inc(&new->lrugen.nr_cpus);
+ VM_BUG_ON_MM(atomic_read(&new->lrugen.nr_cpus) < 0, new);
+#endif
+ } else
+ VM_BUG_ON_MM(READ_ONCE(new->lrugen.list.prev) ||
+ READ_ONCE(new->lrugen.list.next), new);
+}
+
+/* Return whether this mm_struct is being used on any CPUs. */
+static inline bool lru_gen_mm_is_active(struct mm_struct *mm)
+{
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ return !cpumask_empty(mm_cpumask(mm));
+#else
+ return atomic_read(&mm->lrugen.nr_cpus);
+#endif
+}
+
+#else /* CONFIG_LRU_GEN */
+
+static inline void lru_gen_init_mm(struct mm_struct *mm)
+{
+}
+
+static inline void lru_gen_add_mm(struct mm_struct *mm)
+{
+}
+
+static inline void lru_gen_del_mm(struct mm_struct *mm)
+{
+}
+
+#ifdef CONFIG_MEMCG
+static inline int lru_gen_alloc_mm_list(struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
+static inline void lru_gen_free_mm_list(struct mem_cgroup *memcg)
+{
+}
+
+static inline void lru_gen_migrate_mm(struct mm_struct *mm)
+{
+}
+#endif
+
+static inline void lru_gen_switch_mm(struct mm_struct *old, struct mm_struct *new)
+{
+}
+
+static inline bool lru_gen_mm_is_active(struct mm_struct *mm)
+{
+ return false;
+}
+
+#endif /* CONFIG_LRU_GEN */
+
struct mmu_gather;
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index a60c7498afd7..dcfadf6a8c07 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -285,8 +285,6 @@ static inline bool is_active_lru(enum lru_list lru)
return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
}
-#define ANON_AND_FILE 2
-
enum lruvec_flags {
LRUVEC_CONGESTED, /* lruvec has many dirty pages
* backed by a congested BDI
diff --git a/kernel/exit.c b/kernel/exit.c
index 04029e35e69a..e4292717ce37 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -422,6 +422,7 @@ void mm_update_next_owner(struct mm_struct *mm)
goto retry;
}
WRITE_ONCE(mm->owner, c);
+ lru_gen_migrate_mm(mm);
task_unlock(c);
put_task_struct(c);
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 426cd0c51f9e..dfa84200229f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -665,6 +665,7 @@ static void check_mm(struct mm_struct *mm)
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
#endif
+ VM_BUG_ON_MM(lru_gen_mm_is_active(mm), mm);
}
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
@@ -1055,6 +1056,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
goto fail_nocontext;
mm->user_ns = get_user_ns(user_ns);
+ lru_gen_init_mm(mm);
return mm;
fail_nocontext:
@@ -1097,6 +1099,7 @@ static inline void __mmput(struct mm_struct *mm)
}
if (mm->binfmt)
module_put(mm->binfmt->module);
+ lru_gen_del_mm(mm);
mmdrop(mm);
}
@@ -2521,6 +2524,13 @@ pid_t kernel_clone(struct kernel_clone_args *args)
get_task_struct(p);
}
+ if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
+ /* lock the task to synchronize with memcg migration */
+ task_lock(p);
+ lru_gen_add_mm(p->mm);
+ task_unlock(p);
+ }
+
wake_up_new_task(p);
/* forking complete and child started to run, tell ptracer */
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 1578973c5740..8da7767bb06a 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1303,6 +1303,7 @@ void kthread_use_mm(struct mm_struct *mm)
tsk->mm = mm;
membarrier_update_current_mm(mm);
switch_mm_irqs_off(active_mm, mm, tsk);
+ lru_gen_switch_mm(active_mm, mm);
local_irq_enable();
task_unlock(tsk);
#ifdef finish_arch_post_lock_switch
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 98191218d891..bd626dbdb816 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4306,6 +4306,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
* finish_task_switch()'s mmdrop().
*/
switch_mm_irqs_off(prev->active_mm, next->mm, next);
+ lru_gen_switch_mm(prev->active_mm, next->mm);
if (!prev->mm) { // from kernel
/* will mmdrop() in finish_task_switch(). */
@@ -7597,6 +7598,7 @@ void idle_task_exit(void)
if (mm != &init_mm) {
switch_mm(mm, &init_mm, current);
+ lru_gen_switch_mm(mm, &init_mm);
finish_arch_post_lock_switch();
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e064ac0d850a..496e91e813af 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5206,6 +5206,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->vmstats_percpu);
free_percpu(memcg->vmstats_local);
+ lru_gen_free_mm_list(memcg);
kfree(memcg);
}
@@ -5258,6 +5259,9 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (alloc_mem_cgroup_per_node_info(memcg, node))
goto fail;
+ if (lru_gen_alloc_mm_list(memcg))
+ goto fail;
+
if (memcg_wb_domain_init(memcg, GFP_KERNEL))
goto fail;
@@ -6162,6 +6166,29 @@ static void mem_cgroup_move_task(void)
}
#endif
+#ifdef CONFIG_LRU_GEN
+static void mem_cgroup_attach(struct cgroup_taskset *tset)
+{
+ struct cgroup_subsys_state *css;
+ struct task_struct *task = NULL;
+
+ cgroup_taskset_for_each_leader(task, css, tset)
+ ;
+
+ if (!task)
+ return;
+
+ task_lock(task);
+ if (task->mm && task->mm->owner == task)
+ lru_gen_migrate_mm(task->mm);
+ task_unlock(task);
+}
+#else
+static void mem_cgroup_attach(struct cgroup_taskset *tset)
+{
+}
+#endif
+
static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
{
if (value == PAGE_COUNTER_MAX)
@@ -6502,6 +6529,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_free = mem_cgroup_css_free,
.css_reset = mem_cgroup_css_reset,
.can_attach = mem_cgroup_can_attach,
+ .attach = mem_cgroup_attach,
.cancel_attach = mem_cgroup_cancel_attach,
.post_attach = mem_cgroup_move_task,
.dfl_cftypes = memory_files,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c74ebe2039f7..d67dfd1e3930 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4464,6 +4464,313 @@ static bool positive_ctrl_err(struct controller_pos *sp, struct controller_pos *
sp->refaulted * max(pv->total, 1UL) * pv->gain;
}
+/******************************************************************************
+ * mm_struct list
+ ******************************************************************************/
+
+enum {
+ MM_SCHED_ACTIVE, /* running processes */
+ MM_SCHED_INACTIVE, /* sleeping processes */
+ MM_LOCK_CONTENTION, /* lock contentions */
+ MM_VMA_INTERVAL, /* VMAs within the range of current table */
+ MM_LEAF_OTHER_NODE, /* entries not from node under reclaim */
+ MM_LEAF_OTHER_MEMCG, /* entries not from memcg under reclaim */
+ MM_LEAF_OLD, /* old entries */
+ MM_LEAF_YOUNG, /* young entries */
+ MM_LEAF_DIRTY, /* dirty entries */
+ MM_LEAF_HOLE, /* non-present entries */
+ MM_NONLEAF_OLD, /* old non-leaf pmd entries */
+ MM_NONLEAF_YOUNG, /* young non-leaf pmd entries */
+ NR_MM_STATS
+};
+
+/* mnemonic codes for the stats above */
+#define MM_STAT_CODES "aicvnmoydhlu"
+
+struct lru_gen_mm_list {
+ /* the head of a global or per-memcg mm_struct list */
+ struct list_head head;
+ /* protects the list */
+ spinlock_t lock;
+ struct {
+ /* set to max_seq after each round of walk */
+ unsigned long cur_seq;
+ /* the next mm on the list to walk */
+ struct list_head *iter;
+ /* to wait for the last worker to finish */
+ struct wait_queue_head wait;
+ /* the number of concurrent workers */
+ int nr_workers;
+ /* stats for debugging */
+ unsigned long stats[NR_STAT_GENS][NR_MM_STATS];
+ } nodes[0];
+};
+
+static struct lru_gen_mm_list *global_mm_list;
+
+static struct lru_gen_mm_list *alloc_mm_list(void)
+{
+ int nid;
+ struct lru_gen_mm_list *mm_list;
+
+ mm_list = kzalloc(struct_size(mm_list, nodes, nr_node_ids), GFP_KERNEL);
+ if (!mm_list)
+ return NULL;
+
+ INIT_LIST_HEAD(&mm_list->head);
+ spin_lock_init(&mm_list->lock);
+
+ for_each_node(nid) {
+ mm_list->nodes[nid].cur_seq = MIN_NR_GENS;
+ mm_list->nodes[nid].iter = &mm_list->head;
+ init_waitqueue_head(&mm_list->nodes[nid].wait);
+ }
+
+ return mm_list;
+}
+
+static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg)
+{
+#ifdef CONFIG_MEMCG
+ if (!mem_cgroup_disabled())
+ return memcg ? memcg->mm_list : root_mem_cgroup->mm_list;
+#endif
+ VM_BUG_ON(memcg);
+
+ return global_mm_list;
+}
+
+void lru_gen_init_mm(struct mm_struct *mm)
+{
+ int file;
+
+ INIT_LIST_HEAD(&mm->lrugen.list);
+#ifdef CONFIG_MEMCG
+ mm->lrugen.memcg = NULL;
+#endif
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ atomic_set(&mm->lrugen.nr_cpus, 0);
+#endif
+ for (file = 0; file < ANON_AND_FILE; file++)
+ nodes_clear(mm->lrugen.nodes[file]);
+}
+
+void lru_gen_add_mm(struct mm_struct *mm)
+{
+ struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
+ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
+
+ VM_BUG_ON_MM(!list_empty(&mm->lrugen.list), mm);
+#ifdef CONFIG_MEMCG
+ VM_BUG_ON_MM(mm->lrugen.memcg, mm);
+ WRITE_ONCE(mm->lrugen.memcg, memcg);
+#endif
+ spin_lock(&mm_list->lock);
+ list_add_tail(&mm->lrugen.list, &mm_list->head);
+ spin_unlock(&mm_list->lock);
+}
+
+void lru_gen_del_mm(struct mm_struct *mm)
+{
+ int nid;
+#ifdef CONFIG_MEMCG
+ struct lru_gen_mm_list *mm_list = get_mm_list(mm->lrugen.memcg);
+#else
+ struct lru_gen_mm_list *mm_list = get_mm_list(NULL);
+#endif
+
+ spin_lock(&mm_list->lock);
+
+ for_each_node(nid) {
+ if (mm_list->nodes[nid].iter != &mm->lrugen.list)
+ continue;
+
+ mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next;
+ if (mm_list->nodes[nid].iter == &mm_list->head)
+ WRITE_ONCE(mm_list->nodes[nid].cur_seq,
+ mm_list->nodes[nid].cur_seq + 1);
+ }
+
+ list_del_init(&mm->lrugen.list);
+
+ spin_unlock(&mm_list->lock);
+
+#ifdef CONFIG_MEMCG
+ mem_cgroup_put(mm->lrugen.memcg);
+ WRITE_ONCE(mm->lrugen.memcg, NULL);
+#endif
+}
+
+#ifdef CONFIG_MEMCG
+int lru_gen_alloc_mm_list(struct mem_cgroup *memcg)
+{
+ if (mem_cgroup_disabled())
+ return 0;
+
+ memcg->mm_list = alloc_mm_list();
+
+ return memcg->mm_list ? 0 : -ENOMEM;
+}
+
+void lru_gen_free_mm_list(struct mem_cgroup *memcg)
+{
+ kfree(memcg->mm_list);
+ memcg->mm_list = NULL;
+}
+
+void lru_gen_migrate_mm(struct mm_struct *mm)
+{
+ struct mem_cgroup *memcg;
+
+ lockdep_assert_held(&mm->owner->alloc_lock);
+
+ if (mem_cgroup_disabled())
+ return;
+
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(mm->owner);
+ rcu_read_unlock();
+ if (memcg == mm->lrugen.memcg)
+ return;
+
+ VM_BUG_ON_MM(!mm->lrugen.memcg, mm);
+ VM_BUG_ON_MM(list_empty(&mm->lrugen.list), mm);
+
+ lru_gen_del_mm(mm);
+ lru_gen_add_mm(mm);
+}
+
+static bool mm_has_migrated(struct mm_struct *mm, struct mem_cgroup *memcg)
+{
+ return READ_ONCE(mm->lrugen.memcg) != memcg;
+}
+#else
+static bool mm_has_migrated(struct mm_struct *mm, struct mem_cgroup *memcg)
+{
+ return false;
+}
+#endif
+
+struct mm_walk_args {
+ struct mem_cgroup *memcg;
+ unsigned long max_seq;
+ unsigned long next_addr;
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+ int node_id;
+ int batch_size;
+ int mm_stats[NR_MM_STATS];
+ int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
+ bool should_walk[ANON_AND_FILE];
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG)
+ unsigned long bitmap[BITS_TO_LONGS(PTRS_PER_PMD)];
+#endif
+};
+
+static void reset_mm_stats(struct lru_gen_mm_list *mm_list, bool last,
+ struct mm_walk_args *args)
+{
+ int i;
+ int nid = args->node_id;
+ int sid = sid_from_seq_or_gen(args->max_seq);
+
+ lockdep_assert_held(&mm_list->lock);
+
+ for (i = 0; i < NR_MM_STATS; i++) {
+ WRITE_ONCE(mm_list->nodes[nid].stats[sid][i],
+ mm_list->nodes[nid].stats[sid][i] + args->mm_stats[i]);
+ args->mm_stats[i] = 0;
+ }
+
+ if (!last || NR_STAT_GENS == 1)
+ return;
+
+ sid = sid_from_seq_or_gen(args->max_seq + 1);
+ for (i = 0; i < NR_MM_STATS; i++)
+ WRITE_ONCE(mm_list->nodes[nid].stats[sid][i], 0);
+}
+
+static bool should_skip_mm(struct mm_struct *mm, int nid, int swappiness)
+{
+ int file;
+ unsigned long size = 0;
+
+ if (mm_is_oom_victim(mm))
+ return true;
+
+ for (file = !swappiness; file < ANON_AND_FILE; file++) {
+ if (lru_gen_mm_is_active(mm) || node_isset(nid, mm->lrugen.nodes[file]))
+ size += file ? get_mm_counter(mm, MM_FILEPAGES) :
+ get_mm_counter(mm, MM_ANONPAGES) +
+ get_mm_counter(mm, MM_SHMEMPAGES);
+ }
+
+ /* leave the legwork to the rmap if mapped pages are too sparse */
+ if (size < max(SWAP_CLUSTER_MAX, mm_pgtables_bytes(mm) / PAGE_SIZE))
+ return true;
+
+ return !mmget_not_zero(mm);
+}
+
+/* To support multiple workers that concurrently walk mm_struct list. */
+static bool get_next_mm(struct mm_walk_args *args, int swappiness, struct mm_struct **iter)
+{
+ bool last = true;
+ struct mm_struct *mm = NULL;
+ int nid = args->node_id;
+ struct lru_gen_mm_list *mm_list = get_mm_list(args->memcg);
+
+ if (*iter)
+ mmput_async(*iter);
+ else if (args->max_seq <= READ_ONCE(mm_list->nodes[nid].cur_seq))
+ return false;
+
+ spin_lock(&mm_list->lock);
+
+ VM_BUG_ON(args->max_seq > mm_list->nodes[nid].cur_seq + 1);
+ VM_BUG_ON(*iter && args->max_seq < mm_list->nodes[nid].cur_seq);
+ VM_BUG_ON(*iter && !mm_list->nodes[nid].nr_workers);
+
+ if (args->max_seq <= mm_list->nodes[nid].cur_seq) {
+ last = *iter;
+ goto done;
+ }
+
+ if (mm_list->nodes[nid].iter == &mm_list->head) {
+ VM_BUG_ON(*iter || mm_list->nodes[nid].nr_workers);
+ mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next;
+ }
+
+ while (!mm && mm_list->nodes[nid].iter != &mm_list->head) {
+ mm = list_entry(mm_list->nodes[nid].iter, struct mm_struct, lrugen.list);
+ mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next;
+ if (should_skip_mm(mm, nid, swappiness))
+ mm = NULL;
+
+ args->mm_stats[mm ? MM_SCHED_ACTIVE : MM_SCHED_INACTIVE]++;
+ }
+
+ if (mm_list->nodes[nid].iter == &mm_list->head)
+ WRITE_ONCE(mm_list->nodes[nid].cur_seq,
+ mm_list->nodes[nid].cur_seq + 1);
+done:
+ if (*iter && !mm)
+ mm_list->nodes[nid].nr_workers--;
+ if (!*iter && mm)
+ mm_list->nodes[nid].nr_workers++;
+
+ last = last && !mm_list->nodes[nid].nr_workers &&
+ mm_list->nodes[nid].iter == &mm_list->head;
+
+ reset_mm_stats(mm_list, last, args);
+
+ spin_unlock(&mm_list->lock);
+
+ *iter = mm;
+
+ return last;
+}
+
/******************************************************************************
* state change
******************************************************************************/
@@ -4694,6 +5001,15 @@ static int __init init_lru_gen(void)
{
BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
+ BUILD_BUG_ON(sizeof(MM_STAT_CODES) != NR_MM_STATS + 1);
+
+ if (mem_cgroup_disabled()) {
+ global_mm_list = alloc_mm_list();
+ if (!global_mm_list) {
+ pr_err("lru_gen: failed to allocate global mm_struct list\n");
+ return -ENOMEM;
+ }
+ }
if (hotplug_memory_notifier(lru_gen_online_mem, 0))
pr_err("lru_gen: failed to subscribe hotplug notifications\n");
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,853 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id CC788C43460
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:20 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id ADF7B6128E
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:20 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S1345140AbhDMG5i (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:57:38 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44200 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S1345088AbhDMG5Q (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:16 -0400
Received: from mail-qt1-x849.google.com (mail-qt1-x849.google.com [IPv6:2607:f8b0:4864:20::849])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8F91CC061756
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:57 -0700 (PDT)
Received: by mail-qt1-x849.google.com with SMTP id t18so666548qtw.15
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:57 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=6fhJHIbNqUBjvtOegfE2MyphyVhL6hJWTXmeiM/7CYU=;
b=nCCKEcrZRzhFu47i9x+KHFgV9bpn2QVPdLNp94/tvI2vdGJLS5yFnnrPQk/ZvV+805
oU9Y2xHhJFPVW5TfOLl+0cfdlw6G7bEAFmF1h4Uf+m4IIGVwMY+rg0tngfuV3hILEC/m
n+gQGstNi8BWz/WCQfT/CZcdFvYSUN04sTRJQZuLJPkujaFh7e8KEoTWM8Els3JqHgbc
LgYf9G3svPIdXSaGd7VPKBNPPf6gEFy/2HFBYAgJkJKvcduCSex9l6NdzI0GMRm0OYUM
C4BaQwaJZ6SJQXdHUAecfaC52R8b2Z/IZLmM44hUGJ3NGHSotvQ6lyAB8x6J2J/K2F2i
PJ9A==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=6fhJHIbNqUBjvtOegfE2MyphyVhL6hJWTXmeiM/7CYU=;
b=RKke0otlx0Z8q7yzlS4XpyZ5aovH7VEdxD07op8jejoFs5sh8CiOsB0OWYJ7WtpxIx
5eGpQFXb9BDl7z/w8mHGGABHKc6R44O+H6hfTDY7lBM6ycMXzUSbjQvnLzA1hgsk5Qzz
dFshVj2i3XpZoeXGBCx8f9E8lOrxcWydcMYmGU5PvLhJcJh5otr+dDPYiOpTdW+v1h1F
7zmsGOz9U6qOA3KwGKCLm44MrC1JtdV9omiuSJHBD+QfkfnIBcdeKCwgyRE44/35eufm
6b2R7XpOsNHciIksiDnzt5wgJJ1KnlB7E7hjCN/Q77qQcVL7cnSVQBCcYQOvUHoJ8lNg
fXFA==
X-Gm-Message-State: AOAM532Oo0F4MpWnfaEOY3TDummCsibMAZArGFkZs9eTu66X+a59qfdI
ziZoz/a2u1Q+YaODOe4XEW2tOqr3t3c=
X-Google-Smtp-Source: ABdhPJwG6wdrxi/hta1GN0K/zTCsJXK0CKzWYrx4efW6qkJhGiiXfKR8fAg0J/tzxkhd2xOMwJf4T1jXgvA=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:ad4:4894:: with SMTP id bv20mr10806368qvb.34.1618297016759;
Mon, 12 Apr 2021 23:56:56 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:28 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-12-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 11/16] mm: multigenerational lru: aging
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-12-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
The aging produces young generations. Given an lruvec, the aging walks
the mm_struct list associated with this lruvec to scan page tables for
referenced pages. Upon finding one, the aging updates the generation
number of this page to max_seq. After each round of scan, the aging
increments max_seq. The aging is due when both of min_seq[2] reaches
max_seq-1, assuming both anon and file types are reclaimable.
The aging uses the following optimizations when scanning page tables:
1) It will not scan page tables from processes that have been
sleeping since the last scan.
2) It will not scan PTE tables under non-leaf PMD entries that do
not have the accessed bit set, when
CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG=y.
3) It will not zigzag between the PGD table and the same PMD or PTE
table spanning multiple VMAs. In other words, it finishes all the
VMAs with the range of the same PMD or PTE table before it returns
to the PGD table. This optimizes workloads that have large numbers
of tiny VMAs, especially when CONFIG_PGTABLE_LEVELS=5.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
mm/vmscan.c | 700 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 700 insertions(+)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d67dfd1e3930..31e1b4155677 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -50,6 +50,7 @@
#include <linux/dax.h>
#include <linux/psi.h>
#include <linux/memory.h>
+#include <linux/pagewalk.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -4771,6 +4772,702 @@ static bool get_next_mm(struct mm_walk_args *args, int swappiness, struct mm_str
return last;
}
+/******************************************************************************
+ * the aging
+ ******************************************************************************/
+
+static void update_batch_size(struct page *page, int old_gen, int new_gen,
+ struct mm_walk_args *args)
+{
+ int file = page_is_file_lru(page);
+ int zone = page_zonenum(page);
+ int delta = thp_nr_pages(page);
+
+ VM_BUG_ON(old_gen >= MAX_NR_GENS);
+ VM_BUG_ON(new_gen >= MAX_NR_GENS);
+
+ args->batch_size++;
+
+ args->nr_pages[old_gen][file][zone] -= delta;
+ args->nr_pages[new_gen][file][zone] += delta;
+}
+
+static void reset_batch_size(struct lruvec *lruvec, struct mm_walk_args *args)
+{
+ int gen, file, zone;
+ struct lrugen *lrugen = &lruvec->evictable;
+
+ args->batch_size = 0;
+
+ spin_lock_irq(&lruvec->lru_lock);
+
+ for_each_gen_type_zone(gen, file, zone) {
+ enum lru_list lru = LRU_FILE * file;
+ int total = args->nr_pages[gen][file][zone];
+
+ if (!total)
+ continue;
+
+ args->nr_pages[gen][file][zone] = 0;
+ WRITE_ONCE(lrugen->sizes[gen][file][zone],
+ lrugen->sizes[gen][file][zone] + total);
+
+ if (lru_gen_is_active(lruvec, gen))
+ lru += LRU_ACTIVE;
+ update_lru_size(lruvec, lru, zone, total);
+ }
+
+ spin_unlock_irq(&lruvec->lru_lock);
+}
+
+static int page_update_gen(struct page *page, int new_gen)
+{
+ int old_gen;
+ unsigned long old_flags, new_flags;
+
+ VM_BUG_ON(new_gen >= MAX_NR_GENS);
+
+ do {
+ old_flags = READ_ONCE(page->flags);
+
+ old_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+ if (old_gen < 0)
+ new_flags = old_flags | BIT(PG_referenced);
+ else
+ new_flags = (old_flags & ~(LRU_GEN_MASK | LRU_USAGE_MASK |
+ LRU_TIER_FLAGS)) | ((new_gen + 1UL) << LRU_GEN_PGOFF);
+
+ if (old_flags == new_flags)
+ break;
+ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
+
+ return old_gen;
+}
+
+static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
+ struct mm_walk_args *args = walk->private;
+
+ if (!vma_is_accessible(vma) || is_vm_hugetlb_page(vma) ||
+ (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)))
+ return true;
+
+ if (vma_is_anonymous(vma))
+ return !args->should_walk[0];
+
+ if (vma_is_shmem(vma))
+ return !args->should_walk[0] ||
+ mapping_unevictable(vma->vm_file->f_mapping);
+
+ return !args->should_walk[1] || vma_is_dax(vma) ||
+ vma == get_gate_vma(vma->vm_mm) ||
+ mapping_unevictable(vma->vm_file->f_mapping);
+}
+
+/*
+ * Some userspace memory allocators create many single-page VMAs. So instead of
+ * returning back to the PGD table for each of such VMAs, we finish at least an
+ * entire PMD table and therefore avoid many zigzags. This optimizes page table
+ * walks for workloads that have large numbers of tiny VMAs.
+ *
+ * We scan PMD tables in two pass. The first pass reaches to PTE tables and
+ * doesn't take the PMD lock. The second pass clears the accessed bit on PMD
+ * entries and needs to take the PMD lock. The second pass is only done on the
+ * PMD entries that first pass has found the accessed bit is set, and they must
+ * be:
+ * 1) leaf entries mapping huge pages from the node under reclaim
+ * 2) non-leaf entries whose leaf entries only map pages from the node under
+ * reclaim, when CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG=y.
+ */
+static bool get_next_interval(struct mm_walk *walk, unsigned long mask, unsigned long size,
+ unsigned long *start, unsigned long *end)
+{
+ unsigned long next = round_up(*end, size);
+ struct mm_walk_args *args = walk->private;
+
+ VM_BUG_ON(mask & size);
+ VM_BUG_ON(*start != *end);
+ VM_BUG_ON(!(*end & ~mask));
+ VM_BUG_ON((*end & mask) != (next & mask));
+
+ while (walk->vma) {
+ if (next >= walk->vma->vm_end) {
+ walk->vma = walk->vma->vm_next;
+ continue;
+ }
+
+ if ((next & mask) != (walk->vma->vm_start & mask))
+ return false;
+
+ if (next <= walk->vma->vm_start &&
+ should_skip_vma(walk->vma->vm_start, walk->vma->vm_end, walk)) {
+ walk->vma = walk->vma->vm_next;
+ continue;
+ }
+
+ args->mm_stats[MM_VMA_INTERVAL]++;
+
+ *start = max(next, walk->vma->vm_start);
+ next = (next | ~mask) + 1;
+ /* rounded-up boundaries can wrap to 0 */
+ *end = next && next < walk->vma->vm_end ? next : walk->vma->vm_end;
+
+ return true;
+ }
+
+ return false;
+}
+
+static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ int i;
+ pte_t *pte;
+ spinlock_t *ptl;
+ int remote = 0;
+ struct mm_walk_args *args = walk->private;
+ int old_gen, new_gen = lru_gen_from_seq(args->max_seq);
+
+ VM_BUG_ON(pmd_leaf(*pmd));
+
+ pte = pte_offset_map_lock(walk->mm, pmd, start & PMD_MASK, &ptl);
+ arch_enter_lazy_mmu_mode();
+restart:
+ for (i = pte_index(start); start != end; i++, start += PAGE_SIZE) {
+ struct page *page;
+ unsigned long pfn = pte_pfn(pte[i]);
+
+ if (!pte_present(pte[i]) || is_zero_pfn(pfn)) {
+ args->mm_stats[MM_LEAF_HOLE]++;
+ continue;
+ }
+
+ if (!pte_young(pte[i])) {
+ args->mm_stats[MM_LEAF_OLD]++;
+ continue;
+ }
+
+ if (pfn < args->start_pfn || pfn >= args->end_pfn) {
+ remote++;
+ args->mm_stats[MM_LEAF_OTHER_NODE]++;
+ continue;
+ }
+
+ page = compound_head(pfn_to_page(pfn));
+ if (page_to_nid(page) != args->node_id) {
+ remote++;
+ args->mm_stats[MM_LEAF_OTHER_NODE]++;
+ continue;
+ }
+
+ if (!ptep_test_and_clear_young(walk->vma, start, pte + i))
+ continue;
+
+ if (pte_dirty(pte[i]) && !PageDirty(page) &&
+ !(PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page))) {
+ set_page_dirty(page);
+ args->mm_stats[MM_LEAF_DIRTY]++;
+ }
+
+ if (page_memcg_rcu(page) != args->memcg) {
+ args->mm_stats[MM_LEAF_OTHER_MEMCG]++;
+ continue;
+ }
+
+ old_gen = page_update_gen(page, new_gen);
+ if (old_gen >= 0 && old_gen != new_gen)
+ update_batch_size(page, old_gen, new_gen, args);
+ args->mm_stats[MM_LEAF_YOUNG]++;
+ }
+
+ if (i < PTRS_PER_PTE && get_next_interval(walk, PMD_MASK, PAGE_SIZE, &start, &end))
+ goto restart;
+
+ arch_leave_lazy_mmu_mode();
+ pte_unmap_unlock(pte, ptl);
+
+ return !remote;
+}
+
+static bool walk_pmd_range_unlocked(pud_t *pud, unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ int i;
+ pmd_t *pmd;
+ unsigned long next;
+ int young = 0;
+ struct mm_walk_args *args = walk->private;
+
+ VM_BUG_ON(pud_leaf(*pud));
+
+ pmd = pmd_offset(pud, start & PUD_MASK);
+restart:
+ for (i = pmd_index(start); start != end; i++, start = next) {
+ pmd_t val = pmd_read_atomic(pmd + i);
+
+ next = pmd_addr_end(start, end);
+
+ barrier();
+ if (!pmd_present(val) || is_huge_zero_pmd(val)) {
+ args->mm_stats[MM_LEAF_HOLE]++;
+ continue;
+ }
+
+ if (pmd_trans_huge(val)) {
+ unsigned long pfn = pmd_pfn(val);
+
+ if (!pmd_young(val)) {
+ args->mm_stats[MM_LEAF_OLD]++;
+ continue;
+ }
+
+ if (pfn < args->start_pfn || pfn >= args->end_pfn) {
+ args->mm_stats[MM_LEAF_OTHER_NODE]++;
+ continue;
+ }
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ young++;
+ __set_bit(i, args->bitmap);
+#endif
+ continue;
+ }
+
+#ifdef CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG
+ if (!pmd_young(val)) {
+ args->mm_stats[MM_NONLEAF_OLD]++;
+ continue;
+ }
+#endif
+
+ if (walk_pte_range(&val, start, next, walk)) {
+#ifdef CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG
+ young++;
+ __set_bit(i, args->bitmap);
+#endif
+ }
+ }
+
+ if (i < PTRS_PER_PMD && get_next_interval(walk, PUD_MASK, PMD_SIZE, &start, &end))
+ goto restart;
+
+ return young;
+}
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG)
+static void walk_pmd_range_locked(pud_t *pud, unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ int i;
+ pmd_t *pmd;
+ spinlock_t *ptl;
+ struct mm_walk_args *args = walk->private;
+ int old_gen, new_gen = lru_gen_from_seq(args->max_seq);
+
+ VM_BUG_ON(pud_leaf(*pud));
+
+ start &= PUD_MASK;
+ pmd = pmd_offset(pud, start);
+ ptl = pmd_lock(walk->mm, pmd);
+ arch_enter_lazy_mmu_mode();
+
+ for_each_set_bit(i, args->bitmap, PTRS_PER_PMD) {
+ struct page *page;
+ unsigned long pfn = pmd_pfn(pmd[i]);
+ unsigned long addr = start + PMD_SIZE * i;
+
+ if (!pmd_present(pmd[i]) || is_huge_zero_pmd(pmd[i])) {
+ args->mm_stats[MM_LEAF_HOLE]++;
+ continue;
+ }
+
+ if (!pmd_young(pmd[i])) {
+ args->mm_stats[MM_LEAF_OLD]++;
+ continue;
+ }
+
+ if (!pmd_trans_huge(pmd[i])) {
+#ifdef CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG
+ args->mm_stats[MM_NONLEAF_YOUNG]++;
+ pmdp_test_and_clear_young(walk->vma, addr, pmd + i);
+#endif
+ continue;
+ }
+
+ if (pfn < args->start_pfn || pfn >= args->end_pfn) {
+ args->mm_stats[MM_LEAF_OTHER_NODE]++;
+ continue;
+ }
+
+ page = pfn_to_page(pfn);
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ if (page_to_nid(page) != args->node_id) {
+ args->mm_stats[MM_LEAF_OTHER_NODE]++;
+ continue;
+ }
+
+ if (!pmdp_test_and_clear_young(walk->vma, addr, pmd + i))
+ continue;
+
+ if (pmd_dirty(pmd[i]) && !PageDirty(page) &&
+ !(PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page))) {
+ set_page_dirty(page);
+ args->mm_stats[MM_LEAF_DIRTY]++;
+ }
+
+ if (page_memcg_rcu(page) != args->memcg) {
+ args->mm_stats[MM_LEAF_OTHER_MEMCG]++;
+ continue;
+ }
+
+ old_gen = page_update_gen(page, new_gen);
+ if (old_gen >= 0 && old_gen != new_gen)
+ update_batch_size(page, old_gen, new_gen, args);
+ args->mm_stats[MM_LEAF_YOUNG]++;
+ }
+
+ arch_leave_lazy_mmu_mode();
+ spin_unlock(ptl);
+
+ memset(args->bitmap, 0, sizeof(args->bitmap));
+}
+#else
+static void walk_pmd_range_locked(pud_t *pud, unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+}
+#endif
+
+static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ int i;
+ pud_t *pud;
+ unsigned long next;
+ struct mm_walk_args *args = walk->private;
+
+ VM_BUG_ON(p4d_leaf(*p4d));
+
+ pud = pud_offset(p4d, start & P4D_MASK);
+restart:
+ for (i = pud_index(start); start != end; i++, start = next) {
+ pud_t val = READ_ONCE(pud[i]);
+
+ next = pud_addr_end(start, end);
+
+ if (!pud_present(val) || WARN_ON_ONCE(pud_leaf(val)))
+ continue;
+
+ if (walk_pmd_range_unlocked(&val, start, next, walk))
+ walk_pmd_range_locked(&val, start, next, walk);
+
+ if (args->batch_size >= MAX_BATCH_SIZE) {
+ end = (start | ~PUD_MASK) + 1;
+ goto done;
+ }
+ }
+
+ if (i < PTRS_PER_PUD && get_next_interval(walk, P4D_MASK, PUD_SIZE, &start, &end))
+ goto restart;
+
+ end = round_up(end, P4D_SIZE);
+done:
+ /* rounded-up boundaries can wrap to 0 */
+ args->next_addr = end && walk->vma ? max(end, walk->vma->vm_start) : 0;
+
+ return -EAGAIN;
+}
+
+static void walk_mm(struct mm_walk_args *args, int swappiness, struct mm_struct *mm)
+{
+ static const struct mm_walk_ops mm_walk_ops = {
+ .test_walk = should_skip_vma,
+ .p4d_entry = walk_pud_range,
+ };
+
+ int err;
+ int file;
+ int nid = args->node_id;
+ struct mem_cgroup *memcg = args->memcg;
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+
+ args->next_addr = FIRST_USER_ADDRESS;
+ for (file = !swappiness; file < ANON_AND_FILE; file++)
+ args->should_walk[file] = lru_gen_mm_is_active(mm) ||
+ node_isset(nid, mm->lrugen.nodes[file]);
+
+ do {
+ unsigned long start = args->next_addr;
+ unsigned long end = mm->highest_vm_end;
+
+ err = -EBUSY;
+
+ preempt_disable();
+ rcu_read_lock();
+
+#ifdef CONFIG_MEMCG
+ if (memcg && atomic_read(&memcg->moving_account)) {
+ args->mm_stats[MM_LOCK_CONTENTION]++;
+ goto contended;
+ }
+#endif
+ if (!mmap_read_trylock(mm)) {
+ args->mm_stats[MM_LOCK_CONTENTION]++;
+ goto contended;
+ }
+
+ err = walk_page_range(mm, start, end, &mm_walk_ops, args);
+
+ mmap_read_unlock(mm);
+
+ if (args->batch_size)
+ reset_batch_size(lruvec, args);
+contended:
+ rcu_read_unlock();
+ preempt_enable();
+
+ cond_resched();
+ } while (err == -EAGAIN && args->next_addr &&
+ !mm_is_oom_victim(mm) && !mm_has_migrated(mm, memcg));
+
+ if (err == -EBUSY)
+ return;
+
+ for (file = !swappiness; file < ANON_AND_FILE; file++) {
+ if (args->should_walk[file])
+ node_clear(nid, mm->lrugen.nodes[file]);
+ }
+}
+
+static void page_inc_gen(struct page *page, struct lruvec *lruvec, bool front)
+{
+ int old_gen, new_gen;
+ unsigned long old_flags, new_flags;
+ int file = page_is_file_lru(page);
+ int zone = page_zonenum(page);
+ struct lrugen *lrugen = &lruvec->evictable;
+
+ old_gen = lru_gen_from_seq(lrugen->min_seq[file]);
+
+ do {
+ old_flags = READ_ONCE(page->flags);
+ new_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1;
+ VM_BUG_ON_PAGE(new_gen < 0, page);
+ if (new_gen >= 0 && new_gen != old_gen)
+ goto sort;
+
+ new_gen = (old_gen + 1) % MAX_NR_GENS;
+ new_flags = (old_flags & ~(LRU_GEN_MASK | LRU_USAGE_MASK | LRU_TIER_FLAGS)) |
+ ((new_gen + 1UL) << LRU_GEN_PGOFF);
+ /* mark the page for reclaim if it's pending writeback */
+ if (front)
+ new_flags |= BIT(PG_reclaim);
+ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
+
+ lru_gen_update_size(page, lruvec, old_gen, new_gen);
+sort:
+ if (front)
+ list_move(&page->lru, &lrugen->lists[new_gen][file][zone]);
+ else
+ list_move_tail(&page->lru, &lrugen->lists[new_gen][file][zone]);
+}
+
+static bool try_inc_min_seq(struct lruvec *lruvec, int file)
+{
+ int gen, zone;
+ bool success = false;
+ struct lrugen *lrugen = &lruvec->evictable;
+
+ VM_BUG_ON(!seq_is_valid(lruvec));
+
+ while (get_nr_gens(lruvec, file) > MIN_NR_GENS) {
+ gen = lru_gen_from_seq(lrugen->min_seq[file]);
+
+ for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+ if (!list_empty(&lrugen->lists[gen][file][zone]))
+ return success;
+ }
+
+ reset_controller_pos(lruvec, gen, file);
+ WRITE_ONCE(lrugen->min_seq[file], lrugen->min_seq[file] + 1);
+
+ success = true;
+ }
+
+ return success;
+}
+
+static bool inc_min_seq(struct lruvec *lruvec, int file)
+{
+ int gen, zone;
+ int batch_size = 0;
+ struct lrugen *lrugen = &lruvec->evictable;
+
+ VM_BUG_ON(!seq_is_valid(lruvec));
+
+ if (get_nr_gens(lruvec, file) != MAX_NR_GENS)
+ return true;
+
+ gen = lru_gen_from_seq(lrugen->min_seq[file]);
+
+ for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+ struct list_head *head = &lrugen->lists[gen][file][zone];
+
+ while (!list_empty(head)) {
+ struct page *page = lru_to_page(head);
+
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
+ VM_BUG_ON_PAGE(PageActive(page), page);
+ VM_BUG_ON_PAGE(page_is_file_lru(page) != file, page);
+ VM_BUG_ON_PAGE(page_zonenum(page) != zone, page);
+
+ prefetchw_prev_lru_page(page, head, flags);
+
+ page_inc_gen(page, lruvec, false);
+
+ if (++batch_size == MAX_BATCH_SIZE)
+ return false;
+ }
+
+ VM_BUG_ON(lrugen->sizes[gen][file][zone]);
+ }
+
+ reset_controller_pos(lruvec, gen, file);
+ WRITE_ONCE(lrugen->min_seq[file], lrugen->min_seq[file] + 1);
+
+ return true;
+}
+
+static void inc_max_seq(struct lruvec *lruvec)
+{
+ int gen, file, zone;
+ struct lrugen *lrugen = &lruvec->evictable;
+
+ spin_lock_irq(&lruvec->lru_lock);
+
+ VM_BUG_ON(!seq_is_valid(lruvec));
+
+ for (file = 0; file < ANON_AND_FILE; file++) {
+ if (try_inc_min_seq(lruvec, file))
+ continue;
+
+ while (!inc_min_seq(lruvec, file)) {
+ spin_unlock_irq(&lruvec->lru_lock);
+ cond_resched();
+ spin_lock_irq(&lruvec->lru_lock);
+ }
+ }
+
+ gen = lru_gen_from_seq(lrugen->max_seq - 1);
+ for_each_type_zone(file, zone) {
+ enum lru_list lru = LRU_FILE * file;
+ long total = lrugen->sizes[gen][file][zone];
+
+ if (!total)
+ continue;
+
+ WARN_ON_ONCE(total != (int)total);
+
+ update_lru_size(lruvec, lru, zone, total);
+ update_lru_size(lruvec, lru + LRU_ACTIVE, zone, -total);
+ }
+
+ gen = lru_gen_from_seq(lrugen->max_seq + 1);
+ for_each_type_zone(file, zone) {
+ VM_BUG_ON(lrugen->sizes[gen][file][zone]);
+ VM_BUG_ON(!list_empty(&lrugen->lists[gen][file][zone]));
+ }
+
+ for (file = 0; file < ANON_AND_FILE; file++)
+ reset_controller_pos(lruvec, gen, file);
+
+ WRITE_ONCE(lrugen->timestamps[gen], jiffies);
+ /* make sure all preceding modifications appear first */
+ smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1);
+
+ spin_unlock_irq(&lruvec->lru_lock);
+}
+
+/* Main function used by foreground, background and user-triggered aging. */
+static bool walk_mm_list(struct lruvec *lruvec, unsigned long max_seq,
+ struct scan_control *sc, int swappiness, struct mm_walk_args *args)
+{
+ bool last;
+ bool alloc = !args;
+ struct mm_struct *mm = NULL;
+ struct lrugen *lrugen = &lruvec->evictable;
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+ int nid = pgdat->node_id;
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
+ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
+
+ VM_BUG_ON(max_seq > READ_ONCE(lrugen->max_seq));
+
+ /*
+ * For each walk of the mm_struct list of a memcg, we decrement the
+ * priority of its lrugen. For each walk of all memcgs in kswapd, we
+ * increment the priority of every lrugen.
+ *
+ * So if this lrugen has a higher priority (smaller value), it means
+ * other concurrent reclaimers have walked its mm list, and we skip it
+ * for this priority in order to balance the pressure on all memcgs.
+ */
+ if (!mem_cgroup_disabled() && !cgroup_reclaim(sc) &&
+ sc->priority > atomic_read(&lrugen->priority))
+ return false;
+
+ if (alloc) {
+ args = kvzalloc_node(sizeof(*args), GFP_KERNEL, nid);
+ if (!args)
+ return false;
+ }
+
+ args->memcg = memcg;
+ args->max_seq = max_seq;
+ args->start_pfn = pgdat->node_start_pfn;
+ args->end_pfn = pgdat_end_pfn(pgdat);
+ args->node_id = nid;
+
+ do {
+ last = get_next_mm(args, swappiness, &mm);
+ if (mm)
+ walk_mm(args, swappiness, mm);
+
+ cond_resched();
+ } while (mm);
+
+ if (alloc)
+ kvfree(args);
+
+ if (!last) {
+ /* foreground aging prefers not to wait unless "necessary" */
+ if (!current_is_kswapd() && sc->priority < DEF_PRIORITY - 2)
+ wait_event_killable(mm_list->nodes[nid].wait,
+ max_seq < READ_ONCE(lrugen->max_seq));
+
+ return max_seq < READ_ONCE(lrugen->max_seq);
+ }
+
+ VM_BUG_ON(max_seq != READ_ONCE(lrugen->max_seq));
+
+ inc_max_seq(lruvec);
+
+ if (!mem_cgroup_disabled())
+ atomic_add_unless(&lrugen->priority, -1, 0);
+
+ /* order against inc_max_seq() */
+ smp_mb();
+ /* either we see any waiters or they will see the updated max_seq */
+ if (waitqueue_active(&mm_list->nodes[nid].wait))
+ wake_up_all(&mm_list->nodes[nid].wait);
+
+ wakeup_flusher_threads(WB_REASON_VMSCAN);
+
+ return true;
+}
+
/******************************************************************************
* state change
******************************************************************************/
@@ -5002,6 +5699,9 @@ static int __init init_lru_gen(void)
BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
BUILD_BUG_ON(sizeof(MM_STAT_CODES) != NR_MM_STATS + 1);
+ BUILD_BUG_ON(PMD_SIZE / PAGE_SIZE != PTRS_PER_PTE);
+ BUILD_BUG_ON(PUD_SIZE / PMD_SIZE != PTRS_PER_PMD);
+ BUILD_BUG_ON(P4D_SIZE / PUD_SIZE != PTRS_PER_PUD);
if (mem_cgroup_disabled()) {
global_mm_list = alloc_mm_list();
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,474 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id D9882C433ED
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:29 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id B8C7E613B1
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:29 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S1345148AbhDMG5r (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:57:47 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44204 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S1345094AbhDMG5S (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:18 -0400
Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 3319EC06175F
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:59 -0700 (PDT)
Received: by mail-yb1-xb4a.google.com with SMTP id p75so9209456ybc.8
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:56:59 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=AtcshlKlEpO25DWX4HdWHYKkg2qmJuRhLpG3jAQhwYc=;
b=KpNWVguu83mUBVdG9rV7ayYNm+Qrzu5gAuasFnKSoWlkRinGKl/FvUmCisXgOrxGC0
C9Wgab1jU/EJCdE85EdYCvp7ANytDv3ICBmljKThBcjCsU/wnl68RE3qlTlwro63hIWt
MNfXX7skFRf+i1zpUlA6T7R/rTDSlD3n0pboX0T6KXoxN8TAWeB2SgBy2EDQkapMZU3f
Yj8IM3/wDy/W+hgIexStVVze+0Y+gs0LOFo9um6QLrtZfsj/heNSAn50raUEB2w/UGHv
wBBLmbIZyRpiDtLinzpzu1fIqj9Y/2CPQeg1p+ZMcg3wMV0JQXyTUvVglWkME0v6fKsG
fSRw==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=AtcshlKlEpO25DWX4HdWHYKkg2qmJuRhLpG3jAQhwYc=;
b=I5wcigjJOE57JyIN1RgYnvjQfqi/Tu5QohjDJ3zHpF6wCQbLs1mU8eUZ+TYGRp5xwm
PxULqfFEi9PFVydtMob1umooK7ndwpJBomSO9+hgGyBluwloY/kUvS3XtnV4b4UD45J/
Ny/ylsjBg1K+INdvvcBjsJ62q+kSQWanrORUhTCG8yKu+Uug/vhGdOECiKug4pBAgktX
gjqN4aglQeOGaw3UbEG4s6mQuxRdsGY9S1TSistPPCZr+GCvEHf6tG/uc1wmO0zvm3M9
5zAnThurIlICc11ju7PpVVH/k5HZNlo7SLO0yxf5Pr03wG+SAnHTeSmT9zPzHWGTfA/6
FxdA==
X-Gm-Message-State: AOAM532rwFd52QDY7yVuzhsUHKx/vQ3mvqMJUIYRA4CK/9WfDNvEvp4X
aLVlWGREIYgvAVa4LwBCuixrg5f/t3I=
X-Google-Smtp-Source: ABdhPJxtAb+i00KPB+eZ1AkPEHseGFum+ilW8ElwcmLIJblIT+FK3beKZjdoBl7K4l7X3wfk5ecz7lYtrhU=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:a25:f0b:: with SMTP id 11mr41690159ybp.208.1618297018316;
Mon, 12 Apr 2021 23:56:58 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:29 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-13-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 12/16] mm: multigenerational lru: eviction
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-13-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
The eviction consumes old generations. Given an lruvec, the eviction
scans the pages on the per-zone lists indexed by either of min_seq[2].
It first tries to select a type based on the values of min_seq[2].
When anon and file types are both available from the same generation,
it selects the one that has a lower refault rate.
During a scan, the eviction sorts pages according to their generation
numbers, if the aging has found them referenced. It also moves pages
from the tiers that have higher refault rates than tier 0 to the next
generation. When it finds all the per-zone lists of a selected type
are empty, the eviction increments min_seq[2] indexed by this selected
type.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
mm/vmscan.c | 341 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 341 insertions(+)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 31e1b4155677..6239b1acd84f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5468,6 +5468,347 @@ static bool walk_mm_list(struct lruvec *lruvec, unsigned long max_seq,
return true;
}
+/******************************************************************************
+ * the eviction
+ ******************************************************************************/
+
+static bool sort_page(struct page *page, struct lruvec *lruvec, int tier_to_isolate)
+{
+ bool success;
+ int gen = page_lru_gen(page);
+ int file = page_is_file_lru(page);
+ int zone = page_zonenum(page);
+ int tier = lru_tier_from_usage(page_tier_usage(page));
+ struct lrugen *lrugen = &lruvec->evictable;
+
+ VM_BUG_ON_PAGE(gen == -1, page);
+ VM_BUG_ON_PAGE(tier_to_isolate < 0, page);
+
+ /* a lazy-free page that has been written into? */
+ if (file && PageDirty(page) && PageAnon(page)) {
+ success = lru_gen_deletion(page, lruvec);
+ VM_BUG_ON_PAGE(!success, page);
+ SetPageSwapBacked(page);
+ add_page_to_lru_list_tail(page, lruvec);
+ return true;
+ }
+
+ /* page_update_gen() has updated the page? */
+ if (gen != lru_gen_from_seq(lrugen->min_seq[file])) {
+ list_move(&page->lru, &lrugen->lists[gen][file][zone]);
+ return true;
+ }
+
+ /* activate the page if its tier has a higher refault rate */
+ if (tier_to_isolate < tier) {
+ int sid = sid_from_seq_or_gen(gen);
+
+ page_inc_gen(page, lruvec, false);
+ WRITE_ONCE(lrugen->activated[sid][file][tier - 1],
+ lrugen->activated[sid][file][tier - 1] + thp_nr_pages(page));
+ inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file);
+ return true;
+ }
+
+ /*
+ * A page can't be immediately evicted, and page_inc_gen() will mark it
+ * for reclaim and hopefully writeback will write it soon if it's dirty.
+ */
+ if (PageLocked(page) || PageWriteback(page) || (file && PageDirty(page))) {
+ page_inc_gen(page, lruvec, true);
+ return true;
+ }
+
+ return false;
+}
+
+static bool should_skip_page(struct page *page, struct scan_control *sc)
+{
+ if (!sc->may_unmap && page_mapped(page))
+ return true;
+
+ if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
+ (PageDirty(page) || (PageAnon(page) && !PageSwapCache(page))))
+ return true;
+
+ if (!get_page_unless_zero(page))
+ return true;
+
+ if (!TestClearPageLRU(page)) {
+ put_page(page);
+ return true;
+ }
+
+ return false;
+}
+
+static void isolate_page(struct page *page, struct lruvec *lruvec)
+{
+ bool success;
+
+ success = lru_gen_deletion(page, lruvec);
+ VM_BUG_ON_PAGE(!success, page);
+
+ if (PageActive(page)) {
+ ClearPageActive(page);
+ /* make sure shrink_page_list() rejects this page */
+ SetPageReferenced(page);
+ return;
+ }
+
+ /* make sure shrink_page_list() doesn't try to write this page */
+ ClearPageReclaim(page);
+ /* make sure shrink_page_list() doesn't reject this page */
+ ClearPageReferenced(page);
+}
+
+static int scan_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc,
+ long *nr_to_scan, int file, int tier,
+ struct list_head *list)
+{
+ bool success;
+ int gen, zone;
+ enum vm_event_item item;
+ int sorted = 0;
+ int scanned = 0;
+ int isolated = 0;
+ int batch_size = 0;
+ struct lrugen *lrugen = &lruvec->evictable;
+
+ VM_BUG_ON(!list_empty(list));
+
+ if (get_nr_gens(lruvec, file) == MIN_NR_GENS)
+ return -ENOENT;
+
+ gen = lru_gen_from_seq(lrugen->min_seq[file]);
+
+ for (zone = sc->reclaim_idx; zone >= 0; zone--) {
+ LIST_HEAD(moved);
+ int skipped = 0;
+ struct list_head *head = &lrugen->lists[gen][file][zone];
+
+ while (!list_empty(head)) {
+ struct page *page = lru_to_page(head);
+ int delta = thp_nr_pages(page);
+
+ VM_BUG_ON_PAGE(PageTail(page), page);
+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
+ VM_BUG_ON_PAGE(PageActive(page), page);
+ VM_BUG_ON_PAGE(page_is_file_lru(page) != file, page);
+ VM_BUG_ON_PAGE(page_zonenum(page) != zone, page);
+
+ prefetchw_prev_lru_page(page, head, flags);
+
+ scanned += delta;
+
+ if (sort_page(page, lruvec, tier))
+ sorted += delta;
+ else if (should_skip_page(page, sc)) {
+ list_move(&page->lru, &moved);
+ skipped += delta;
+ } else {
+ isolate_page(page, lruvec);
+ list_add(&page->lru, list);
+ isolated += delta;
+ }
+
+ if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX ||
+ ++batch_size == MAX_BATCH_SIZE)
+ break;
+ }
+
+ list_splice(&moved, head);
+ __count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
+
+ if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX ||
+ batch_size == MAX_BATCH_SIZE)
+ break;
+ }
+
+ success = try_inc_min_seq(lruvec, file);
+
+ item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
+ if (!cgroup_reclaim(sc))
+ __count_vm_events(item, scanned);
+ __count_memcg_events(lruvec_memcg(lruvec), item, scanned);
+ __count_vm_events(PGSCAN_ANON + file, scanned);
+
+ *nr_to_scan -= scanned;
+
+ if (*nr_to_scan <= 0 || success || isolated)
+ return isolated;
+ /*
+ * We may have trouble finding eligible pages due to reclaim_idx,
+ * may_unmap and may_writepage. The following check makes sure we won't
+ * be stuck if we aren't making enough progress.
+ */
+ return batch_size == MAX_BATCH_SIZE && sorted >= SWAP_CLUSTER_MAX ? 0 : -ENOENT;
+}
+
+static int get_tier_to_isolate(struct lruvec *lruvec, int file)
+{
+ int tier;
+ struct controller_pos sp, pv;
+
+ /*
+ * Ideally we don't want to evict upper tiers that have higher refault
+ * rates. However, we need to leave some margin for the fluctuation in
+ * refault rates. So we use a larger gain factor to make sure upper
+ * tiers are indeed more active. We choose 2 because the lowest upper
+ * tier would have twice of the refault rate of the base tier, according
+ * to their numbers of accesses.
+ */
+ read_controller_pos(&sp, lruvec, file, 0, 1);
+ for (tier = 1; tier < MAX_NR_TIERS; tier++) {
+ read_controller_pos(&pv, lruvec, file, tier, 2);
+ if (!positive_ctrl_err(&sp, &pv))
+ break;
+ }
+
+ return tier - 1;
+}
+
+static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_to_isolate)
+{
+ int file, tier;
+ struct controller_pos sp, pv;
+ int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness };
+
+ /*
+ * Compare the refault rates between the base tiers of anon and file to
+ * determine which type to evict. Also need to compare the refault rates
+ * of the upper tiers of the selected type with that of the base tier to
+ * determine which tier of the selected type to evict.
+ */
+ read_controller_pos(&sp, lruvec, 0, 0, gain[0]);
+ read_controller_pos(&pv, lruvec, 1, 0, gain[1]);
+ file = positive_ctrl_err(&sp, &pv);
+
+ read_controller_pos(&sp, lruvec, !file, 0, gain[!file]);
+ for (tier = 1; tier < MAX_NR_TIERS; tier++) {
+ read_controller_pos(&pv, lruvec, file, tier, gain[file]);
+ if (!positive_ctrl_err(&sp, &pv))
+ break;
+ }
+
+ *tier_to_isolate = tier - 1;
+
+ return file;
+}
+
+static int isolate_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc,
+ int swappiness, long *nr_to_scan, int *type_to_scan,
+ struct list_head *list)
+{
+ int i;
+ int file;
+ int isolated;
+ int tier = -1;
+ DEFINE_MAX_SEQ();
+ DEFINE_MIN_SEQ();
+
+ VM_BUG_ON(!seq_is_valid(lruvec));
+
+ if (max_nr_gens(max_seq, min_seq, swappiness) == MIN_NR_GENS)
+ return 0;
+ /*
+ * Try to select a type based on generations and swappiness, and if that
+ * fails, fall back to get_type_to_scan(). When anon and file are both
+ * available from the same generation, swappiness 200 is interpreted as
+ * anon first and swappiness 1 is interpreted as file first.
+ */
+ file = !swappiness || min_seq[0] > min_seq[1] ||
+ (min_seq[0] == min_seq[1] && swappiness != 200 &&
+ (swappiness == 1 || get_type_to_scan(lruvec, swappiness, &tier)));
+
+ if (tier == -1)
+ tier = get_tier_to_isolate(lruvec, file);
+
+ for (i = !swappiness; i < ANON_AND_FILE; i++) {
+ isolated = scan_lru_gen_pages(lruvec, sc, nr_to_scan, file, tier, list);
+ if (isolated >= 0)
+ break;
+
+ file = !file;
+ tier = get_tier_to_isolate(lruvec, file);
+ }
+
+ if (isolated < 0)
+ isolated = *nr_to_scan = 0;
+
+ *type_to_scan = file;
+
+ return isolated;
+}
+
+/* Main function used by foreground, background and user-triggered eviction. */
+static bool evict_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc,
+ int swappiness, long *nr_to_scan)
+{
+ int file;
+ int isolated;
+ int reclaimed;
+ LIST_HEAD(list);
+ struct page *page;
+ enum vm_event_item item;
+ struct reclaim_stat stat;
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
+
+ spin_lock_irq(&lruvec->lru_lock);
+
+ isolated = isolate_lru_gen_pages(lruvec, sc, swappiness, nr_to_scan, &file, &list);
+ VM_BUG_ON(list_empty(&list) == !!isolated);
+
+ if (isolated)
+ __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, isolated);
+
+ spin_unlock_irq(&lruvec->lru_lock);
+
+ if (!isolated)
+ goto done;
+
+ reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false);
+ /*
+ * We need to prevent rejected pages from being added back to the same
+ * lists they were isolated from. Otherwise we may risk looping on them
+ * forever. We use PageActive() or !PageReferenced() && PageWorkingset()
+ * to tell lru_gen_addition() not to add them to the oldest generation.
+ */
+ list_for_each_entry(page, &list, lru) {
+ if (PageMlocked(page))
+ continue;
+
+ if (PageReferenced(page)) {
+ SetPageActive(page);
+ ClearPageReferenced(page);
+ } else {
+ ClearPageActive(page);
+ SetPageWorkingset(page);
+ }
+ }
+
+ spin_lock_irq(&lruvec->lru_lock);
+
+ move_pages_to_lru(lruvec, &list);
+
+ __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -isolated);
+
+ item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
+ if (!cgroup_reclaim(sc))
+ __count_vm_events(item, reclaimed);
+ __count_memcg_events(lruvec_memcg(lruvec), item, reclaimed);
+ __count_vm_events(PGSTEAL_ANON + file, reclaimed);
+
+ spin_unlock_irq(&lruvec->lru_lock);
+
+ mem_cgroup_uncharge_list(&list);
+ free_unref_page_list(&list);
+
+ sc->nr_reclaimed += reclaimed;
+done:
+ return *nr_to_scan > 0 && sc->nr_reclaimed < sc->nr_to_reclaim;
+}
+
/******************************************************************************
* state change
******************************************************************************/
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,479 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 555A5C43461
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:36 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 3220B60FDB
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:35 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S237146AbhDMG5w (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:57:52 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44208 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S1345113AbhDMG5T (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:19 -0400
Received: from mail-qk1-x749.google.com (mail-qk1-x749.google.com [IPv6:2607:f8b0:4864:20::749])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A2F06C061574
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:57:00 -0700 (PDT)
Received: by mail-qk1-x749.google.com with SMTP id n191so10007274qka.9
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:57:00 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=ZkZkBuwvqnJ3RNHJhCNbR3K9qvaxv7Y+ShqFogGYPM4=;
b=YuhzAl4jnf9B8DPsAHH+IEn6TeEK8tkXzqeIIUWrV6MKmrDwRVWEaxlfpyho7LEl9c
Yb/oFtKUHNb53oILQT33tlmVOzpPgzylMipFZ2l5j9KHbcsDyRmB0oqQUa1QZ2PJMYNK
fWpCu7LXduAtYRU+OGHNrJHXp576QKDulX5A0p9heBIoiC+vWWS/x+GcCoUk17noPsZC
Su6UQCzg6NAfh+hiQZUMluxkVxIZLc0tUeagDPWX8AYcx4WshWUrgTPuDgI3s1vI7M8C
K9lLKPVh9VeBFpsycJM4koujbXoOVbPXyfWOhPPIE23ETJR5Yb0o5n5VqtBZYTB2FIhK
TPQw==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=ZkZkBuwvqnJ3RNHJhCNbR3K9qvaxv7Y+ShqFogGYPM4=;
b=HJexHWiiyDZpfXt8l6/EGRqdtM5RulG6u9GDFQ0UJD2T5+wffn01FXEWBORtSlloEv
JVoGieHk3qJawZml66nLtDTbcVGYn6Nqs6EXRfNoDgICSYXdL9NTIaojCI0ZKGyD+IxL
PUrN7oxaD8d5VGq+sBRezfThw/BfDEZnlAKs7my6MuuAOjBT8on5yBIH8/j/ICvIEG6I
gMkvHTcz3g9emOaHqBpNgMwnOo6Nuia/0YbXpr3xWCmezGFqPyDmC8JYVrlrE7T1sOtt
aM45XTkzlUnUnCLZq+dVQPAsg4IjqDoWZ7K2SbzPqIHFPVW2baQfIGX+oVazwypGzv4P
ZVCw==
X-Gm-Message-State: AOAM531aC+Fl2Rjia4/Q8PO4GqZNI/QjyevwkXojS3zWLyfXFHA97+i9
GxwWwyU1OIpVhDJlWVmUnXRSn1z/KbE=
X-Google-Smtp-Source: ABdhPJz8UXRBXxFnHjwU9KHKJ57aCdWAlTupj/VfQPjKJc1AKD7gBysJ6np5sy0VpO9JJLZsJRX7gVcs/zM=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:a05:6214:1c0c:: with SMTP id
u12mr31837398qvc.24.1618297019786; Mon, 12 Apr 2021 23:56:59 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:30 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-14-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 13/16] mm: multigenerational lru: page reclaim
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-14-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
With the aging and the eviction in place, we can build the page
reclaim in a straightforward manner:
1) In order to reduce the latency, direct reclaim only invokes the
aging when both min_seq[2] reaches max_seq-1; otherwise it invokes
the eviction.
2) In order to avoid the aging in the direct reclaim path, kswapd
does the background aging more proactively. It invokes the aging
when either of min_seq[2] reaches max_seq-1; otherwise it invokes
the eviction.
And we add another optimization: pages mapped around a referenced PTE
may also have been referenced due to the spatial locality. In the
reclaim path, if the rmap finds the PTE mapping a page under reclaim
referenced, it calls a new function lru_gen_scan_around() to scan the
vicinity of the PTE. And if this new function finds others referenced
PTEs, it updates the generation number of the pages mapped by those
PTEs.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
include/linux/mmzone.h | 6 ++
mm/rmap.c | 6 ++
mm/vmscan.c | 236 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 248 insertions(+)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index dcfadf6a8c07..a22e9e40083f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -292,6 +292,7 @@ enum lruvec_flags {
};
struct lruvec;
+struct page_vma_mapped_walk;
#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF)
#define LRU_USAGE_MASK ((BIT(LRU_USAGE_WIDTH) - 1) << LRU_USAGE_PGOFF)
@@ -384,6 +385,7 @@ struct lrugen {
void lru_gen_init_lruvec(struct lruvec *lruvec);
void lru_gen_set_state(bool enable, bool main, bool swap);
+void lru_gen_scan_around(struct page_vma_mapped_walk *pvmw);
#else /* CONFIG_LRU_GEN */
@@ -395,6 +397,10 @@ static inline void lru_gen_set_state(bool enable, bool main, bool swap)
{
}
+static inline void lru_gen_scan_around(struct page_vma_mapped_walk *pvmw)
+{
+}
+
#endif /* CONFIG_LRU_GEN */
struct lruvec {
diff --git a/mm/rmap.c b/mm/rmap.c
index b0fc27e77d6d..d600b282ced5 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -72,6 +72,7 @@
#include <linux/page_idle.h>
#include <linux/memremap.h>
#include <linux/userfaultfd_k.h>
+#include <linux/mm_inline.h>
#include <asm/tlbflush.h>
@@ -792,6 +793,11 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,
}
if (pvmw.pte) {
+ /* the multigenerational lru exploits the spatial locality */
+ if (lru_gen_enabled() && pte_young(*pvmw.pte)) {
+ lru_gen_scan_around(&pvmw);
+ referenced++;
+ }
if (ptep_clear_flush_young_notify(vma, address,
pvmw.pte)) {
/*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6239b1acd84f..01c475386379 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1114,6 +1114,10 @@ static unsigned int shrink_page_list(struct list_head *page_list,
if (!sc->may_unmap && page_mapped(page))
goto keep_locked;
+ /* in case the page was found accessed by lru_gen_scan_around() */
+ if (lru_gen_enabled() && !ignore_references && PageReferenced(page))
+ goto keep_locked;
+
may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
@@ -2233,6 +2237,10 @@ static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
unsigned long file;
struct lruvec *target_lruvec;
+ /* the multigenerational lru doesn't use these counters */
+ if (lru_gen_enabled())
+ return;
+
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
/*
@@ -2522,6 +2530,19 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
}
}
+#ifdef CONFIG_LRU_GEN
+static void age_lru_gens(struct pglist_data *pgdat, struct scan_control *sc);
+static void shrink_lru_gens(struct lruvec *lruvec, struct scan_control *sc);
+#else
+static void age_lru_gens(struct pglist_data *pgdat, struct scan_control *sc)
+{
+}
+
+static void shrink_lru_gens(struct lruvec *lruvec, struct scan_control *sc)
+{
+}
+#endif
+
static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
{
unsigned long nr[NR_LRU_LISTS];
@@ -2533,6 +2554,11 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
struct blk_plug plug;
bool scan_adjusted;
+ if (lru_gen_enabled()) {
+ shrink_lru_gens(lruvec, sc);
+ return;
+ }
+
get_scan_count(lruvec, sc, nr);
/* Record the original scan target for proportional adjustments later */
@@ -2999,6 +3025,10 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat)
struct lruvec *target_lruvec;
unsigned long refaults;
+ /* the multigenerational lru doesn't use these counters */
+ if (lru_gen_enabled())
+ return;
+
target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON);
target_lruvec->refaults[0] = refaults;
@@ -3373,6 +3403,11 @@ static void age_active_anon(struct pglist_data *pgdat,
struct mem_cgroup *memcg;
struct lruvec *lruvec;
+ if (lru_gen_enabled()) {
+ age_lru_gens(pgdat, sc);
+ return;
+ }
+
if (!total_swap_pages)
return;
@@ -5468,6 +5503,57 @@ static bool walk_mm_list(struct lruvec *lruvec, unsigned long max_seq,
return true;
}
+void lru_gen_scan_around(struct page_vma_mapped_walk *pvmw)
+{
+ pte_t *pte;
+ unsigned long start, end;
+ int old_gen, new_gen;
+ unsigned long flags;
+ struct lruvec *lruvec;
+ struct mem_cgroup *memcg;
+ struct pglist_data *pgdat = page_pgdat(pvmw->page);
+
+ lockdep_assert_held(pvmw->ptl);
+
+ start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start);
+ end = pmd_addr_end(pvmw->address, pvmw->vma->vm_end);
+ pte = pvmw->pte - ((pvmw->address - start) >> PAGE_SHIFT);
+
+ memcg = lock_page_memcg(pvmw->page);
+ lruvec = lock_page_lruvec_irqsave(pvmw->page, &flags);
+
+ new_gen = lru_gen_from_seq(lruvec->evictable.max_seq);
+
+ for (; start != end; pte++, start += PAGE_SIZE) {
+ struct page *page;
+ unsigned long pfn = pte_pfn(*pte);
+
+ if (!pte_present(*pte) || !pte_young(*pte) || is_zero_pfn(pfn))
+ continue;
+
+ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
+ continue;
+
+ page = compound_head(pfn_to_page(pfn));
+ if (page_to_nid(page) != pgdat->node_id)
+ continue;
+
+ if (page_memcg_rcu(page) != memcg)
+ continue;
+ /*
+ * We may be holding many locks. So try to finish as fast as
+ * possible and leave the accessed and the dirty bits to page
+ * table walks.
+ */
+ old_gen = page_update_gen(page, new_gen);
+ if (old_gen >= 0 && old_gen != new_gen)
+ lru_gen_update_size(page, lruvec, old_gen, new_gen);
+ }
+
+ unlock_page_lruvec_irqrestore(lruvec, flags);
+ unlock_page_memcg(pvmw->page);
+}
+
/******************************************************************************
* the eviction
******************************************************************************/
@@ -5809,6 +5895,156 @@ static bool evict_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc,
return *nr_to_scan > 0 && sc->nr_reclaimed < sc->nr_to_reclaim;
}
+/******************************************************************************
+ * page reclaim
+ ******************************************************************************/
+
+static int get_swappiness(struct lruvec *lruvec)
+{
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
+ int swappiness = mem_cgroup_get_nr_swap_pages(memcg) >= (long)SWAP_CLUSTER_MAX ?
+ mem_cgroup_swappiness(memcg) : 0;
+
+ VM_BUG_ON(swappiness > 200U);
+
+ return swappiness;
+}
+
+static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
+ int swappiness)
+{
+ int gen, file, zone;
+ long nr_to_scan = 0;
+ struct lrugen *lrugen = &lruvec->evictable;
+ DEFINE_MAX_SEQ();
+ DEFINE_MIN_SEQ();
+
+ lru_add_drain();
+
+ for (file = !swappiness; file < ANON_AND_FILE; file++) {
+ unsigned long seq;
+
+ for (seq = min_seq[file]; seq <= max_seq; seq++) {
+ gen = lru_gen_from_seq(seq);
+
+ for (zone = 0; zone <= sc->reclaim_idx; zone++)
+ nr_to_scan += READ_ONCE(lrugen->sizes[gen][file][zone]);
+ }
+ }
+
+ nr_to_scan = max(nr_to_scan, 0L);
+ nr_to_scan = round_up(nr_to_scan >> sc->priority, SWAP_CLUSTER_MAX);
+
+ if (max_nr_gens(max_seq, min_seq, swappiness) > MIN_NR_GENS)
+ return nr_to_scan;
+
+ /* kswapd uses age_lru_gens() */
+ if (current_is_kswapd())
+ return 0;
+
+ return walk_mm_list(lruvec, max_seq, sc, swappiness, NULL) ? nr_to_scan : 0;
+}
+
+static void shrink_lru_gens(struct lruvec *lruvec, struct scan_control *sc)
+{
+ struct blk_plug plug;
+ unsigned long scanned = 0;
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
+
+ blk_start_plug(&plug);
+
+ while (true) {
+ long nr_to_scan;
+ int swappiness = sc->may_swap ? get_swappiness(lruvec) : 0;
+
+ nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness) - scanned;
+ if (nr_to_scan < (long)SWAP_CLUSTER_MAX)
+ break;
+
+ scanned += nr_to_scan;
+
+ if (!evict_lru_gen_pages(lruvec, sc, swappiness, &nr_to_scan))
+ break;
+
+ scanned -= nr_to_scan;
+
+ if (mem_cgroup_below_min(memcg) ||
+ (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
+ break;
+
+ cond_resched();
+ }
+
+ blk_finish_plug(&plug);
+}
+
+/******************************************************************************
+ * the background aging
+ ******************************************************************************/
+
+static int lru_gen_spread = MIN_NR_GENS;
+
+static void try_walk_mm_list(struct lruvec *lruvec, struct scan_control *sc)
+{
+ int gen, file, zone;
+ long old_and_young[2] = {};
+ struct mm_walk_args args = {};
+ int spread = READ_ONCE(lru_gen_spread);
+ int swappiness = get_swappiness(lruvec);
+ struct lrugen *lrugen = &lruvec->evictable;
+ DEFINE_MAX_SEQ();
+ DEFINE_MIN_SEQ();
+
+ lru_add_drain();
+
+ for (file = !swappiness; file < ANON_AND_FILE; file++) {
+ unsigned long seq;
+
+ for (seq = min_seq[file]; seq <= max_seq; seq++) {
+ gen = lru_gen_from_seq(seq);
+
+ for (zone = 0; zone < MAX_NR_ZONES; zone++)
+ old_and_young[seq == max_seq] +=
+ READ_ONCE(lrugen->sizes[gen][file][zone]);
+ }
+ }
+
+ old_and_young[0] = max(old_and_young[0], 0L);
+ old_and_young[1] = max(old_and_young[1], 0L);
+
+ if (old_and_young[0] + old_and_young[1] < SWAP_CLUSTER_MAX)
+ return;
+
+ /* try to spread pages out across spread+1 generations */
+ if (old_and_young[0] >= old_and_young[1] * spread &&
+ min_nr_gens(max_seq, min_seq, swappiness) > max(spread, MIN_NR_GENS))
+ return;
+
+ walk_mm_list(lruvec, max_seq, sc, swappiness, &args);
+}
+
+static void age_lru_gens(struct pglist_data *pgdat, struct scan_control *sc)
+{
+ struct mem_cgroup *memcg;
+
+ VM_BUG_ON(!current_is_kswapd());
+
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ do {
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
+ struct lrugen *lrugen = &lruvec->evictable;
+
+ if (!mem_cgroup_below_min(memcg) &&
+ (!mem_cgroup_below_low(memcg) || sc->memcg_low_reclaim))
+ try_walk_mm_list(lruvec, sc);
+
+ if (!mem_cgroup_disabled())
+ atomic_add_unless(&lrugen->priority, 1, DEF_PRIORITY);
+
+ cond_resched();
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
+}
+
/******************************************************************************
* state change
******************************************************************************/
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,575 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 31B6EC43470
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:41 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 0EBEA613B6
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:41 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S1345183AbhDMG54 (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:57:56 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44232 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S1345118AbhDMG5V (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:21 -0400
Received: from mail-qt1-x84a.google.com (mail-qt1-x84a.google.com [IPv6:2607:f8b0:4864:20::84a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1C4E9C061756
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:57:02 -0700 (PDT)
Received: by mail-qt1-x84a.google.com with SMTP id n21so671176qtv.12
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:57:02 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=bmixlp7YQskn8XLZNyskyxhbwQtBt0A28uS5+zjhVpk=;
b=oTGv6qg5bh0RzTaKM94g35MK59AI58jsQR7J4vE6o+6XFd35Jv2Zv+kkD/7cK0zRLR
Ck7Cs2RVKnfve+J1zVD+wa928VjcHUKUO3MuA+Cqt34BQiaAdVe26f2184VnzLQ3dvKx
z82OqBG1tTUndbk4EMVoB1ATBCP4BFNxWu8pKBJpk/N+I2MMj2uihIz/YB8QlxmuXlys
RwrXkZxVCCOUoq3encVAfJmCxv6JvxFy63iWYxkmY36qXToBwfkANHFMZAz4lcdJeH/y
xKzfHqA5vpuNdb9vsTsrozNb0UaKCAiSMM4mlUb5dey98HhAeu/oBRqdnxUz2tE+0+pZ
Z0TA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=bmixlp7YQskn8XLZNyskyxhbwQtBt0A28uS5+zjhVpk=;
b=UkYkM6FO076Fvajq5s8whylzCbb+PpiJnz1vKUeJJXZu6YCbEYmOvaEH6+8Ddzo48Z
TI3guaaJl9qnC428Yf6FHDGXp6NeOwEblCvtmM2G7+umy+SrfwybHn1bw50Lo872DXbJ
gYls4kvFU7JQc7MioauxTlqJLpTYk3NcULfKC0GiHMuK9jrn/IsdHkAmjv1ZmsU5rVoi
eYiTShjU5iY513/VeoflBCVf0ixDD4Cr5lmm93z+i5Ey1yfqM+TVJShH9XlNUFONylgl
TRTw7Ayvc0f+UlyZ1Xa33Rbw0PvwoKpCYxcb1nsFqUtIjWowX+qxSCaJQ1u1t4X2KqnJ
hJ9w==
X-Gm-Message-State: AOAM530ZLR/zJQAB2NNEhfhm5mkXL3qXLlx6Z2Tl7QIoprpbg2sjKICU
bChNTP+0Q6f93KyJAtViluogruaRpm8=
X-Google-Smtp-Source: ABdhPJxZXtgEcQpB3hP9KaxSvf/XzXOIauyaS8KaFnbmO18XK2qWP28shfFcib9xRh5nN+wBlrRX+XvdCw8=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:ad4:4894:: with SMTP id bv20mr10806518qvb.34.1618297021214;
Mon, 12 Apr 2021 23:57:01 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:31 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-15-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 14/16] mm: multigenerational lru: user interface
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-15-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Add a sysfs file /sys/kernel/mm/lru_gen/enabled so users can enable
and disable the multigenerational lru at runtime.
Add a sysfs file /sys/kernel/mm/lru_gen/spread so users can spread
pages out across multiple generations. More generations make the
background aging more aggressive.
Add a debugfs file /sys/kernel/debug/lru_gen so users can monitor the
multigenerational lru and trigger the aging and the eviction. This
file has the following output:
memcg memcg_id memcg_path
node node_id
min_gen birth_time anon_size file_size
...
max_gen birth_time anon_size file_size
Given a memcg and a node, "min_gen" is the oldest generation (number)
and "max_gen" is the youngest. Birth time is in milliseconds. The
sizes of anon and file types are in pages.
This file takes the following input:
+ memcg_id node_id gen [swappiness]
- memcg_id node_id gen [swappiness] [nr_to_reclaim]
The first command line accounts referenced pages to generation
"max_gen" and creates the next generation "max_gen"+1. In this case,
"gen" should be equal to "max_gen". A swap file and a non-zero
"swappiness" are required to scan anon type. If swapping is not
desired, set vm.swappiness to 0. The second command line evicts
generations less than or equal to "gen". In this case, "gen" should be
less than "max_gen"-1 as "max_gen" and "max_gen"-1 are active
generations and therefore protected from the eviction. Use
"nr_to_reclaim" to limit the number of pages to be evicted. Multiple
command lines are supported, so does concatenation with delimiters ","
and ";".
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
mm/vmscan.c | 405 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 405 insertions(+)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 01c475386379..284e32d897cf 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -51,6 +51,8 @@
#include <linux/psi.h>
#include <linux/memory.h>
#include <linux/pagewalk.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -6248,6 +6250,403 @@ static int __meminit __maybe_unused lru_gen_online_mem(struct notifier_block *se
return NOTIFY_DONE;
}
+/******************************************************************************
+ * sysfs interface
+ ******************************************************************************/
+
+static ssize_t show_lru_gen_spread(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", READ_ONCE(lru_gen_spread));
+}
+
+static ssize_t store_lru_gen_spread(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ int spread;
+
+ if (kstrtoint(buf, 10, &spread) || spread >= MAX_NR_GENS)
+ return -EINVAL;
+
+ WRITE_ONCE(lru_gen_spread, spread);
+
+ return len;
+}
+
+static struct kobj_attribute lru_gen_spread_attr = __ATTR(
+ spread, 0644, show_lru_gen_spread, store_lru_gen_spread
+);
+
+static ssize_t show_lru_gen_enabled(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%ld\n", lru_gen_enabled());
+}
+
+static ssize_t store_lru_gen_enabled(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ int enable;
+
+ if (kstrtoint(buf, 10, &enable))
+ return -EINVAL;
+
+ lru_gen_set_state(enable, true, false);
+
+ return len;
+}
+
+static struct kobj_attribute lru_gen_enabled_attr = __ATTR(
+ enabled, 0644, show_lru_gen_enabled, store_lru_gen_enabled
+);
+
+static struct attribute *lru_gen_attrs[] = {
+ &lru_gen_spread_attr.attr,
+ &lru_gen_enabled_attr.attr,
+ NULL
+};
+
+static struct attribute_group lru_gen_attr_group = {
+ .name = "lru_gen",
+ .attrs = lru_gen_attrs,
+};
+
+/******************************************************************************
+ * debugfs interface
+ ******************************************************************************/
+
+static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos)
+{
+ struct mem_cgroup *memcg;
+ loff_t nr_to_skip = *pos;
+
+ m->private = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!m->private)
+ return ERR_PTR(-ENOMEM);
+
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ do {
+ int nid;
+
+ for_each_node_state(nid, N_MEMORY) {
+ if (!nr_to_skip--)
+ return mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+ }
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
+
+ return NULL;
+}
+
+static void lru_gen_seq_stop(struct seq_file *m, void *v)
+{
+ if (!IS_ERR_OR_NULL(v))
+ mem_cgroup_iter_break(NULL, lruvec_memcg(v));
+
+ kfree(m->private);
+ m->private = NULL;
+}
+
+static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ int nid = lruvec_pgdat(v)->node_id;
+ struct mem_cgroup *memcg = lruvec_memcg(v);
+
+ ++*pos;
+
+ nid = next_memory_node(nid);
+ if (nid == MAX_NUMNODES) {
+ memcg = mem_cgroup_iter(NULL, memcg, NULL);
+ if (!memcg)
+ return NULL;
+
+ nid = first_memory_node;
+ }
+
+ return mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+}
+
+static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
+ unsigned long max_seq, unsigned long *min_seq,
+ unsigned long seq)
+{
+ int i;
+ int file, tier;
+ int sid = sid_from_seq_or_gen(seq);
+ struct lrugen *lrugen = &lruvec->evictable;
+ int nid = lruvec_pgdat(lruvec)->node_id;
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
+ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
+
+ for (tier = 0; tier < MAX_NR_TIERS; tier++) {
+ seq_printf(m, " %10d", tier);
+ for (file = 0; file < ANON_AND_FILE; file++) {
+ unsigned long n[3] = {};
+
+ if (seq == max_seq) {
+ n[0] = READ_ONCE(lrugen->avg_refaulted[file][tier]);
+ n[1] = READ_ONCE(lrugen->avg_total[file][tier]);
+
+ seq_printf(m, " %10luR %10luT %10lu ", n[0], n[1], n[2]);
+ } else if (seq == min_seq[file] || NR_STAT_GENS > 1) {
+ n[0] = atomic_long_read(&lrugen->refaulted[sid][file][tier]);
+ n[1] = atomic_long_read(&lrugen->evicted[sid][file][tier]);
+ if (tier)
+ n[2] = READ_ONCE(lrugen->activated[sid][file][tier - 1]);
+
+ seq_printf(m, " %10lur %10lue %10lua", n[0], n[1], n[2]);
+ } else
+ seq_puts(m, " 0 0 0 ");
+ }
+ seq_putc(m, '\n');
+ }
+
+ seq_puts(m, " ");
+ for (i = 0; i < NR_MM_STATS; i++) {
+ if (seq == max_seq && NR_STAT_GENS == 1)
+ seq_printf(m, " %10lu%c", READ_ONCE(mm_list->nodes[nid].stats[sid][i]),
+ toupper(MM_STAT_CODES[i]));
+ else if (seq != max_seq && NR_STAT_GENS > 1)
+ seq_printf(m, " %10lu%c", READ_ONCE(mm_list->nodes[nid].stats[sid][i]),
+ MM_STAT_CODES[i]);
+ else
+ seq_puts(m, " 0 ");
+ }
+ seq_putc(m, '\n');
+}
+
+static int lru_gen_seq_show(struct seq_file *m, void *v)
+{
+ unsigned long seq;
+ bool full = !debugfs_real_fops(m->file)->write;
+ struct lruvec *lruvec = v;
+ struct lrugen *lrugen = &lruvec->evictable;
+ int nid = lruvec_pgdat(lruvec)->node_id;
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
+ DEFINE_MAX_SEQ();
+ DEFINE_MIN_SEQ();
+
+ if (nid == first_memory_node) {
+#ifdef CONFIG_MEMCG
+ if (memcg)
+ cgroup_path(memcg->css.cgroup, m->private, PATH_MAX);
+#endif
+ seq_printf(m, "memcg %5hu %s\n",
+ mem_cgroup_id(memcg), (char *)m->private);
+ }
+
+ seq_printf(m, " node %5d %10d\n", nid, atomic_read(&lrugen->priority));
+
+ seq = full ? (max_seq < MAX_NR_GENS ? 0 : max_seq - MAX_NR_GENS + 1) :
+ min(min_seq[0], min_seq[1]);
+
+ for (; seq <= max_seq; seq++) {
+ int gen, file, zone;
+ unsigned int msecs;
+
+ gen = lru_gen_from_seq(seq);
+ msecs = jiffies_to_msecs(jiffies - READ_ONCE(lrugen->timestamps[gen]));
+
+ seq_printf(m, " %10lu %10u", seq, msecs);
+
+ for (file = 0; file < ANON_AND_FILE; file++) {
+ long size = 0;
+
+ if (seq < min_seq[file]) {
+ seq_puts(m, " -0 ");
+ continue;
+ }
+
+ for (zone = 0; zone < MAX_NR_ZONES; zone++)
+ size += READ_ONCE(lrugen->sizes[gen][file][zone]);
+
+ seq_printf(m, " %10lu ", max(size, 0L));
+ }
+
+ seq_putc(m, '\n');
+
+ if (full)
+ lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq);
+ }
+
+ return 0;
+}
+
+static const struct seq_operations lru_gen_seq_ops = {
+ .start = lru_gen_seq_start,
+ .stop = lru_gen_seq_stop,
+ .next = lru_gen_seq_next,
+ .show = lru_gen_seq_show,
+};
+
+static int advance_max_seq(struct lruvec *lruvec, unsigned long seq, int swappiness)
+{
+ struct mm_walk_args args = {};
+ struct scan_control sc = {
+ .target_mem_cgroup = lruvec_memcg(lruvec),
+ };
+ DEFINE_MAX_SEQ();
+
+ if (seq == max_seq)
+ walk_mm_list(lruvec, max_seq, &sc, swappiness, &args);
+
+ return seq > max_seq ? -EINVAL : 0;
+}
+
+static int advance_min_seq(struct lruvec *lruvec, unsigned long seq, int swappiness,
+ unsigned long nr_to_reclaim)
+{
+ struct blk_plug plug;
+ int err = -EINTR;
+ long nr_to_scan = LONG_MAX;
+ struct scan_control sc = {
+ .nr_to_reclaim = nr_to_reclaim,
+ .target_mem_cgroup = lruvec_memcg(lruvec),
+ .may_writepage = 1,
+ .may_unmap = 1,
+ .may_swap = 1,
+ .reclaim_idx = MAX_NR_ZONES - 1,
+ .gfp_mask = GFP_KERNEL,
+ };
+ DEFINE_MAX_SEQ();
+
+ if (seq >= max_seq - 1)
+ return -EINVAL;
+
+ blk_start_plug(&plug);
+
+ while (!signal_pending(current)) {
+ DEFINE_MIN_SEQ();
+
+ if (seq < min(min_seq[!swappiness], min_seq[swappiness < 200]) ||
+ !evict_lru_gen_pages(lruvec, &sc, swappiness, &nr_to_scan)) {
+ err = 0;
+ break;
+ }
+
+ cond_resched();
+ }
+
+ blk_finish_plug(&plug);
+
+ return err;
+}
+
+static int advance_seq(char cmd, int memcg_id, int nid, unsigned long seq,
+ int swappiness, unsigned long nr_to_reclaim)
+{
+ struct lruvec *lruvec;
+ int err = -EINVAL;
+ struct mem_cgroup *memcg = NULL;
+
+ if (!mem_cgroup_disabled()) {
+ rcu_read_lock();
+ memcg = mem_cgroup_from_id(memcg_id);
+#ifdef CONFIG_MEMCG
+ if (memcg && !css_tryget(&memcg->css))
+ memcg = NULL;
+#endif
+ rcu_read_unlock();
+
+ if (!memcg)
+ goto done;
+ }
+ if (memcg_id != mem_cgroup_id(memcg))
+ goto done;
+
+ if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY))
+ goto done;
+
+ lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+
+ if (swappiness == -1)
+ swappiness = get_swappiness(lruvec);
+ else if (swappiness > 200U)
+ goto done;
+
+ switch (cmd) {
+ case '+':
+ err = advance_max_seq(lruvec, seq, swappiness);
+ break;
+ case '-':
+ err = advance_min_seq(lruvec, seq, swappiness, nr_to_reclaim);
+ break;
+ }
+done:
+ mem_cgroup_put(memcg);
+
+ return err;
+}
+
+static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
+ size_t len, loff_t *pos)
+{
+ void *buf;
+ char *cur, *next;
+ int err = 0;
+
+ buf = kvmalloc(len + 1, GFP_USER);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user(buf, src, len)) {
+ kvfree(buf);
+ return -EFAULT;
+ }
+
+ next = buf;
+ next[len] = '\0';
+
+ while ((cur = strsep(&next, ",;\n"))) {
+ int n;
+ int end;
+ char cmd;
+ int memcg_id;
+ int nid;
+ unsigned long seq;
+ int swappiness = -1;
+ unsigned long nr_to_reclaim = -1;
+
+ cur = skip_spaces(cur);
+ if (!*cur)
+ continue;
+
+ n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid,
+ &seq, &end, &swappiness, &end, &nr_to_reclaim, &end);
+ if (n < 4 || cur[end]) {
+ err = -EINVAL;
+ break;
+ }
+
+ err = advance_seq(cmd, memcg_id, nid, seq, swappiness, nr_to_reclaim);
+ if (err)
+ break;
+ }
+
+ kvfree(buf);
+
+ return err ? : len;
+}
+
+static int lru_gen_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &lru_gen_seq_ops);
+}
+
+static const struct file_operations lru_gen_rw_fops = {
+ .open = lru_gen_seq_open,
+ .read = seq_read,
+ .write = lru_gen_seq_write,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static const struct file_operations lru_gen_ro_fops = {
+ .open = lru_gen_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
/******************************************************************************
* initialization
******************************************************************************/
@@ -6291,6 +6690,12 @@ static int __init init_lru_gen(void)
if (hotplug_memory_notifier(lru_gen_online_mem, 0))
pr_err("lru_gen: failed to subscribe hotplug notifications\n");
+ if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
+ pr_err("lru_gen: failed to create sysfs group\n");
+
+ debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops);
+ debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops);
+
return 0;
};
/*
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,175 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 922C4C43461
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:45 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 7572660FDB
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:45 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S1345185AbhDMG6D (ORCPT
<rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:58:03 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44240 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S1345121AbhDMG5X (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:23 -0400
Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 90316C061574
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:57:03 -0700 (PDT)
Received: by mail-yb1-xb4a.google.com with SMTP id c4so2057580ybp.6
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:57:03 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=OWomzY5O6nIPdL3TO8CK9fbu3idsDsiJdhlQbcNCNmk=;
b=hLHfxFzp5QFiDV0NCweRKZIoXrgJbYlQcW+yuS+vLMPcNKKc255Fg3tjNqfooV/OLd
U6CQ3iwK8H6zMls3pFdMBN0NLbmWj6RWEYNi/DCM+PrHNrSzMnt6S2Lg4zq0wvg3486H
+sx4x6j4kxGh5x9L9qgA+TxXylPtgpu5ds2+dsX0pD8ntrVyPxV7AvsnWB6UiW1V9ZVk
/LsyUFz5OtLMbBTake9P8xyrPjX9eTcGBEel6+oOeQ/dZObXKYPRK8qTg6fk2FWETrnD
Zbg2sgYJYwkCg4UC1pmuVjLWdyS1iObkTDP9YTfrBRXxxrrkE/8ced456rnZvUMSg1he
l4YQ==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=OWomzY5O6nIPdL3TO8CK9fbu3idsDsiJdhlQbcNCNmk=;
b=V7wMyHi072dce6ZnPpEv7/vgyxfGH4iYzC8xiwylgcN9u4SyLFR8AsWrgIpv2mVFrC
H9+fkRd2whFAERf06443LAgIA7SIiztKoG2b9INedj5rird9Kes1pDEafZP04/dNwIll
hJeAUb9N1qmeVv6vZIZsKpWDp0D/wa5gCBze6PfyzFRL82n1sUxPv6wP/l9ClegByA3J
8il8uC4X+iRjk3XACwZG+JrS7i4d2Q+qkj3ANVNNGNcDhaHbgsucUpMzpVDJleKoVoBL
Luvyo5PCSA38KyflkQS+SzfwNoU60rrlTa6oBMVzyUgoPqp3RNtFIp4yyJUcill3qvqi
5ymw==
X-Gm-Message-State: AOAM532nNDpt3iSLmHBos2xzSSPUScQwSS+AZ2hM1blhHygr52zHuQkq
triAdzH/rSQIePQ4klFd5q1eM3rRWnU=
X-Google-Smtp-Source: ABdhPJzyLPRGqf29+Ytj/xVq/duL5XVOMgJinIYyL+dmRy0rCrFAsDcush6F7fQT1oukQxSVakciHbYtiFU=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:a25:cc90:: with SMTP id l138mr2006126ybf.150.1618297022801;
Mon, 12 Apr 2021 23:57:02 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:32 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-16-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 15/16] mm: multigenerational lru: Kconfig
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-16-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Add configuration options for the multigenerational lru.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
mm/Kconfig | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 55 insertions(+)
diff --git a/mm/Kconfig b/mm/Kconfig
index 24c045b24b95..0be1c6c90cc0 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -872,4 +872,59 @@ config MAPPING_DIRTY_HELPERS
config KMAP_LOCAL
bool
+config LRU_GEN
+ bool "Multigenerational LRU"
+ depends on MMU
+ help
+ A high performance LRU implementation to heavily overcommit workloads
+ that are not IO bound. See Documentation/vm/multigen_lru.rst for
+ details.
+
+ Warning: do not enable this option unless you plan to use it because
+ it introduces a small per-process and per-memcg and per-node memory
+ overhead.
+
+config NR_LRU_GENS
+ int "Max number of generations"
+ depends on LRU_GEN
+ range 4 31
+ default 7
+ help
+ This will use order_base_2(N+1) spare bits from page flags.
+
+ Warning: do not use numbers larger than necessary because each
+ generation introduces a small per-node and per-memcg memory overhead.
+
+config TIERS_PER_GEN
+ int "Number of tiers per generation"
+ depends on LRU_GEN
+ range 2 5
+ default 4
+ help
+ This will use N-2 spare bits from page flags.
+
+ Higher values generally offer better protection to active pages under
+ heavy buffered I/O workloads.
+
+config LRU_GEN_ENABLED
+ bool "Turn on by default"
+ depends on LRU_GEN
+ help
+ The default value of /sys/kernel/mm/lru_gen/enabled is 0. This option
+ changes it to 1.
+
+ Warning: the default value is the fast path. See
+ Documentation/static-keys.txt for details.
+
+config LRU_GEN_STATS
+ bool "Full stats for debugging"
+ depends on LRU_GEN
+ help
+ This option keeps full stats for each generation, which can be read
+ from /sys/kernel/debug/lru_gen_full.
+
+ Warning: do not enable this option unless you plan to use it because
+ it introduces an additional small per-process and per-memcg and
+ per-node memory overhead.
+
endmenu
--
2.31.1.295.g9ea45b61b8-goog

View File

@ -0,0 +1,322 @@
From mboxrd@z Thu Jan 1 00:00:00 1970
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
aws-us-west-2-korg-lkml-1.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
version=3.4.0
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
by smtp.lore.kernel.org (Postfix) with ESMTP id 8D664C433B4
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:53 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
by mail.kernel.org (Postfix) with ESMTP id 5CED260FDB
for <linux-kernel@archiver.kernel.org>; Tue, 13 Apr 2021 06:57:53 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S244503AbhDMG6L (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
Tue, 13 Apr 2021 02:58:11 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44250 "EHLO
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S1345123AbhDMG5Y (ORCPT
<rfc822;linux-kernel@vger.kernel.org>);
Tue, 13 Apr 2021 02:57:24 -0400
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 14C06C061756
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:57:05 -0700 (PDT)
Received: by mail-yb1-xb49.google.com with SMTP id p75so9209574ybc.8
for <linux-kernel@vger.kernel.org>; Mon, 12 Apr 2021 23:57:05 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=google.com; s=20161025;
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
:cc;
bh=fZsS4S+ppDN6vse6LQilTb+995ZpejDyoXEkWEzhPiI=;
b=JPzEmLg8IXqkikE/b+k7FNKSdKIPd2lLmXlP9sfI87JvOkw09qdZ+KRrlaAD+a9Dhn
005sbjcbFZ0lFEPYPSKaDUzlN3hBr3DSo7pYAg76+SLl3Ga5vXEbxhKRzSwelQO0SjpX
rhHL0KytAzNOPmRXNi0zkAQkCW4EAqyrBAkMJuC7dTB6jIRG6ER1dzInKps5oaOL1wQs
HLIiBt2/Ahnea89fcjAFJPIS7nNG2lwTqqUVTkoanckNkavhBDYk0VsP07i7LdiYi9zN
+LOuJNV+snejmLdfr2/3+aMXbxqjF2clhWnkNv/9X/ng5LI35tZxiwJOcncdT6c0vONU
rPQA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20161025;
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
:references:subject:from:to:cc;
bh=fZsS4S+ppDN6vse6LQilTb+995ZpejDyoXEkWEzhPiI=;
b=Mmy7jkv8AlhXjPNjblEwvM3ZtDGk7NKvJ6rsLmF6f0BWgbZq1tIB6pdyHgFU312oCj
y4lT+2OfaNXkHdc1m9GGWuWIiWBODWDms6SOZyoSt3DzZKzcdOzZvjUSS2YPZRhtMBP8
dB9FKMTZmwSiNzB4tdOneaAVzDRY5bshb8bACVfCaWFqtKUYRJ7IUedFh3omjJHSY8FV
6STGtMN3VWQZjRvtH7TufrAvCfWEWJ4oYHPhHmGG2DIS+7aQ6CbYgjel6Xiw7E9VkAg2
JoiFRDcRNv+ByQW+uYw+Z96cYJm5wf4hkkC+/iCib2vWT1vXRgZ7CRYsjyRwZmHJd2Jy
fKJA==
X-Gm-Message-State: AOAM532ohDzhQEIUgvNgG4R8COEdtptVwp/WFnYFKQYURGql6xBpawoF
Y2GA+8fymXJP5OJ1UDw0RBDHBeXkM1Q=
X-Google-Smtp-Source: ABdhPJzHOTHYLMuXC88wBZEF39dm7Sun3+0TVIBRLg85pDR3z2FX1I51OcfzuM68n03ioC4rVU3FQw4etPM=
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9])
(user=yuzhao job=sendgmr) by 2002:a25:e00f:: with SMTP id x15mr25695207ybg.85.1618297024186;
Mon, 12 Apr 2021 23:57:04 -0700 (PDT)
Date: Tue, 13 Apr 2021 00:56:33 -0600
In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com>
Message-Id: <20210413065633.2782273-17-yuzhao@google.com>
Mime-Version: 1.0
References: <20210413065633.2782273-1-yuzhao@google.com>
X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog
Subject: [PATCH v2 16/16] mm: multigenerational lru: documentation
From: Yu Zhao <yuzhao@google.com>
To: linux-mm@kvack.org
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
Andrew Morton <akpm@linux-foundation.org>,
Benjamin Manes <ben.manes@gmail.com>,
Dave Chinner <david@fromorbit.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
Johannes Weiner <hannes@cmpxchg.org>,
Jonathan Corbet <corbet@lwn.net>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Matthew Wilcox <willy@infradead.org>,
Mel Gorman <mgorman@suse.de>,
Miaohe Lin <linmiaohe@huawei.com>,
Michael Larabel <michael@michaellarabel.com>,
Michal Hocko <mhocko@suse.com>,
Michel Lespinasse <michel@lespinasse.org>,
Rik van Riel <riel@surriel.com>,
Roman Gushchin <guro@fb.com>,
Rong Chen <rong.a.chen@intel.com>,
SeongJae Park <sjpark@amazon.de>,
Tim Chen <tim.c.chen@linux.intel.com>,
Vlastimil Babka <vbabka@suse.cz>,
Yang Shi <shy828301@gmail.com>,
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
linux-kernel@vger.kernel.org, lkp@lists.01.org,
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
Content-Type: text/plain; charset="UTF-8"
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Archived-At: <https://lore.kernel.org/lkml/20210413065633.2782273-17-yuzhao@google.com/>
List-Archive: <https://lore.kernel.org/lkml/>
List-Post: <mailto:linux-kernel@vger.kernel.org>
Add Documentation/vm/multigen_lru.rst.
Signed-off-by: Yu Zhao <yuzhao@google.com>
---
Documentation/vm/index.rst | 1 +
Documentation/vm/multigen_lru.rst | 192 ++++++++++++++++++++++++++++++
2 files changed, 193 insertions(+)
create mode 100644 Documentation/vm/multigen_lru.rst
diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
index eff5fbd492d0..c353b3f55924 100644
--- a/Documentation/vm/index.rst
+++ b/Documentation/vm/index.rst
@@ -17,6 +17,7 @@ various features of the Linux memory management
swap_numa
zswap
+ multigen_lru
Kernel developers MM documentation
==================================
diff --git a/Documentation/vm/multigen_lru.rst b/Documentation/vm/multigen_lru.rst
new file mode 100644
index 000000000000..cf772aeca317
--- /dev/null
+++ b/Documentation/vm/multigen_lru.rst
@@ -0,0 +1,192 @@
+=====================
+Multigenerational LRU
+=====================
+
+Quick Start
+===========
+Build Options
+-------------
+:Required: Set ``CONFIG_LRU_GEN=y``.
+
+:Optional: Change ``CONFIG_NR_LRU_GENS`` to a number ``X`` to support
+ a maximum of ``X`` generations.
+
+:Optional: Change ``CONFIG_TIERS_PER_GEN`` to a number ``Y`` to support
+ a maximum of ``Y`` tiers per generation.
+
+:Optional: Set ``CONFIG_LRU_GEN_ENABLED=y`` to turn the feature on by
+ default.
+
+Runtime Options
+---------------
+:Required: Write ``1`` to ``/sys/kernel/mm/lru_gen/enable`` if the
+ feature was not turned on by default.
+
+:Optional: Change ``/sys/kernel/mm/lru_gen/spread`` to a number ``N``
+ to spread pages out across ``N+1`` generations. ``N`` should be less
+ than ``X``. Larger values make the background aging more aggressive.
+
+:Optional: Read ``/sys/kernel/debug/lru_gen`` to verify the feature.
+ This file has the following output:
+
+::
+
+ memcg memcg_id memcg_path
+ node node_id
+ min_gen birth_time anon_size file_size
+ ...
+ max_gen birth_time anon_size file_size
+
+Given a memcg and a node, ``min_gen`` is the oldest generation
+(number) and ``max_gen`` is the youngest. Birth time is in
+milliseconds. The sizes of anon and file types are in pages.
+
+Recipes
+-------
+:Android on ARMv8.1+: ``X=4``, ``N=0``
+
+:Android on pre-ARMv8.1 CPUs: Not recommended due to the lack of
+ ``ARM64_HW_AFDBM``
+
+:Laptops running Chrome on x86_64: ``X=7``, ``N=2``
+
+:Working set estimation: Write ``+ memcg_id node_id gen [swappiness]``
+ to ``/sys/kernel/debug/lru_gen`` to account referenced pages to
+ generation ``max_gen`` and create the next generation ``max_gen+1``.
+ ``gen`` should be equal to ``max_gen``. A swap file and a non-zero
+ ``swappiness`` are required to scan anon type. If swapping is not
+ desired, set ``vm.swappiness`` to ``0``.
+
+:Proactive reclaim: Write ``- memcg_id node_id gen [swappiness]
+ [nr_to_reclaim]`` to ``/sys/kernel/debug/lru_gen`` to evict
+ generations less than or equal to ``gen``. ``gen`` should be less
+ than ``max_gen-1`` as ``max_gen`` and ``max_gen-1`` are active
+ generations and therefore protected from the eviction. Use
+ ``nr_to_reclaim`` to limit the number of pages to be evicted.
+ Multiple command lines are supported, so does concatenation with
+ delimiters ``,`` and ``;``.
+
+Framework
+=========
+For each ``lruvec``, evictable pages are divided into multiple
+generations. The youngest generation number is stored in ``max_seq``
+for both anon and file types as they are aged on an equal footing. The
+oldest generation numbers are stored in ``min_seq[2]`` separately for
+anon and file types as clean file pages can be evicted regardless of
+swap and write-back constraints. Generation numbers are truncated into
+``order_base_2(CONFIG_NR_LRU_GENS+1)`` bits in order to fit into
+``page->flags``. The sliding window technique is used to prevent
+truncated generation numbers from overlapping. Each truncated
+generation number is an index to an array of per-type and per-zone
+lists. Evictable pages are added to the per-zone lists indexed by
+``max_seq`` or ``min_seq[2]`` (modulo ``CONFIG_NR_LRU_GENS``),
+depending on whether they are being faulted in.
+
+Each generation is then divided into multiple tiers. Tiers represent
+levels of usage from file descriptors only. Pages accessed N times via
+file descriptors belong to tier order_base_2(N). In contrast to moving
+across generations which requires the lru lock, moving across tiers
+only involves an atomic operation on ``page->flags`` and therefore has
+a negligible cost.
+
+The workflow comprises two conceptually independent functions: the
+aging and the eviction.
+
+Aging
+-----
+The aging produces young generations. Given an ``lruvec``, the aging
+scans page tables for referenced pages of this ``lruvec``. Upon
+finding one, the aging updates its generation number to ``max_seq``.
+After each round of scan, the aging increments ``max_seq``.
+
+The aging maintains either a system-wide ``mm_struct`` list or
+per-memcg ``mm_struct`` lists, and it only scans page tables of
+processes that have been scheduled since the last scan. Since scans
+are differential with respect to referenced pages, the cost is roughly
+proportional to their number.
+
+The aging is due when both of ``min_seq[2]`` reaches ``max_seq-1``,
+assuming both anon and file types are reclaimable.
+
+Eviction
+--------
+The eviction consumes old generations. Given an ``lruvec``, the
+eviction scans the pages on the per-zone lists indexed by either of
+``min_seq[2]``. It first tries to select a type based on the values of
+``min_seq[2]``. When anon and file types are both available from the
+same generation, it selects the one that has a lower refault rate.
+
+During a scan, the eviction sorts pages according to their generation
+numbers, if the aging has found them referenced. It also moves pages
+from the tiers that have higher refault rates than tier 0 to the next
+generation.
+
+When it finds all the per-zone lists of a selected type are empty, the
+eviction increments ``min_seq[2]`` indexed by this selected type.
+
+Rationale
+=========
+Limitations of Current Implementation
+-------------------------------------
+Notion of Active/Inactive
+~~~~~~~~~~~~~~~~~~~~~~~~~
+For servers equipped with hundreds of gigabytes of memory, the
+granularity of the active/inactive is too coarse to be useful for job
+scheduling. False active/inactive rates are relatively high, and thus
+the assumed savings may not materialize.
+
+For phones and laptops, executable pages are frequently evicted
+despite the fact that there are many less recently used anon pages.
+Major faults on executable pages cause ``janks`` (slow UI renderings)
+and negatively impact user experience.
+
+For ``lruvec``\s from different memcgs or nodes, comparisons are
+impossible due to the lack of a common frame of reference.
+
+Incremental Scans via ``rmap``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Each incremental scan picks up at where the last scan left off and
+stops after it has found a handful of unreferenced pages. For
+workloads using a large amount of anon memory, incremental scans lose
+the advantage under sustained memory pressure due to high ratios of
+the number of scanned pages to the number of reclaimed pages. On top
+of that, the ``rmap`` has poor memory locality due to its complex data
+structures. The combined effects typically result in a high amount of
+CPU usage in the reclaim path.
+
+Benefits of Multigenerational LRU
+---------------------------------
+Notion of Generation Numbers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The notion of generation numbers introduces a quantitative approach to
+memory overcommit. A larger number of pages can be spread out across
+configurable generations, and thus they have relatively low false
+active/inactive rates. Each generation includes all pages that have
+been referenced since the last generation.
+
+Given an ``lruvec``, scans and the selections between anon and file
+types are all based on generation numbers, which are simple and yet
+effective. For different ``lruvec``\s, comparisons are still possible
+based on birth times of generations.
+
+Differential Scans via Page Tables
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Each differential scan discovers all pages that have been referenced
+since the last scan. Specifically, it walks the ``mm_struct`` list
+associated with an ``lruvec`` to scan page tables of processes that
+have been scheduled since the last scan. The cost of each differential
+scan is roughly proportional to the number of referenced pages it
+discovers. Unless address spaces are extremely sparse, page tables
+usually have better memory locality than the ``rmap``. The end result
+is generally a significant reduction in CPU usage, for workloads
+using a large amount of anon memory.
+
+To-do List
+==========
+KVM Optimization
+----------------
+Support shadow page table scanning.
+
+NUMA Optimization
+-----------------
+Support NUMA policies and per-node RSS counters.
--
2.31.1.295.g9ea45b61b8-goog

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,30 @@
From 298c18c5bdc5f1ff302e6c83d642a7ce6307c921 Mon Sep 17 00:00:00 2001
From: Martijn Braam <martijn@brixit.nl>
Date: Fri, 4 Sep 2020 17:35:39 +0200
Subject: [PATCH] media: gc2145: Added BGGR bayer mode
Not all raw bayer modes from the sensor match up with the ones defined
in v4l, mostly because they're mirrored.
---
drivers/media/i2c/gc2145.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/media/i2c/gc2145.c b/drivers/media/i2c/gc2145.c
index 40a50ee17fd3..bed611045de9 100644
--- a/drivers/media/i2c/gc2145.c
+++ b/drivers/media/i2c/gc2145.c
@@ -187,6 +187,11 @@ static const struct gc2145_pixfmt gc2145_formats[] = {
.colorspace = V4L2_COLORSPACE_SRGB,
.fmt_setup = 0x06,
},
+ {
+ .code = MEDIA_BUS_FMT_SBGGR8_1X8,
+ .colorspace = V4L2_COLORSPACE_RAW,
+ .fmt_setup = 0x17,
+ },
};
static const struct gc2145_pixfmt *gc2145_find_format(u32 code)
--
GitLab

View File

@ -0,0 +1,390 @@
From c2ea6ff2636e4e2bc88244c57197318b3d9d806b Mon Sep 17 00:00:00 2001
From: Martijn Braam <martijn@brixit.nl>
Date: Mon, 28 Sep 2020 14:26:11 +0200
Subject: [PATCH] media: ov5640: Implement autofocus
The autofocus functionality needs a firmware blob loaded into the
internal microcontroller.
V4L2 doesn't have an api to control all autofocus functionality, but
this at least makes it possible to focus on the center of the sensor.
Signed-off-by: Martijn Braam <martijn@brixit.nl>
---
drivers/media/i2c/ov5640.c | 254 +++++++++++++++++++++++++++++++++++++
1 file changed, 254 insertions(+)
diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c
index 16ecde24a192..d90fc35e8503 100644
--- a/drivers/media/i2c/ov5640.c
+++ b/drivers/media/i2c/ov5640.c
@@ -9,6 +9,7 @@
#include <linux/clkdev.h>
#include <linux/ctype.h>
#include <linux/delay.h>
+#include <linux/firmware.h>
#include <linux/device.h>
#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
@@ -31,7 +32,11 @@
#define OV5640_DEFAULT_SLAVE_ID 0x3c
+#define OV5640_REG_SYS_RESET00 0x3000
+#define OV5640_REG_SYS_RESET01 0x3001
#define OV5640_REG_SYS_RESET02 0x3002
+#define OV5640_REG_SYS_CLOCK_ENABLE00 0x3004
+#define OV5640_REG_SYS_CLOCK_ENABLE01 0x3005
#define OV5640_REG_SYS_CLOCK_ENABLE02 0x3006
#define OV5640_REG_SYS_CTRL0 0x3008
#define OV5640_REG_CHIP_ID 0x300a
@@ -39,6 +44,14 @@
#define OV5640_REG_PAD_OUTPUT_ENABLE01 0x3017
#define OV5640_REG_PAD_OUTPUT_ENABLE02 0x3018
#define OV5640_REG_PAD_OUTPUT00 0x3019
+#define OV5640_REG_FW_CMD_MAIN 0x3022
+#define OV5640_REG_FW_CMD_ACK 0x3023
+#define OV5640_REG_FW_CMD_PARA0 0x3024
+#define OV5640_REG_FW_CMD_PARA1 0x3025
+#define OV5640_REG_FW_CMD_PARA2 0x3026
+#define OV5640_REG_FW_CMD_PARA3 0x3027
+#define OV5640_REG_FW_CMD_PARA4 0x3028
+#define OV5640_REG_FW_STATUS 0x3029
#define OV5640_REG_SYSTEM_CONTROL1 0x302e
#define OV5640_REG_SC_PLL_CTRL0 0x3034
#define OV5640_REG_SC_PLL_CTRL1 0x3035
@@ -57,6 +70,7 @@
#define OV5640_REG_AEC_PK_MANUAL 0x3503
#define OV5640_REG_AEC_PK_REAL_GAIN 0x350a
#define OV5640_REG_AEC_PK_VTS 0x350c
+#define OV5640_REG_VCM_CONTROL4 0x3606
#define OV5640_REG_TIMING_DVPHO 0x3808
#define OV5640_REG_TIMING_DVPVO 0x380a
#define OV5640_REG_TIMING_HTS 0x380c
@@ -93,6 +107,20 @@
#define OV5640_REG_SDE_CTRL4 0x5584
#define OV5640_REG_SDE_CTRL5 0x5585
#define OV5640_REG_AVG_READOUT 0x56a1
+#define OV5640_REG_FIRMWARE_BASE 0x8000
+
+#define OV5640_FW_STATUS_S_FIRMWARE 0x7f
+#define OV5640_FW_STATUS_S_STARTUP 0x7e
+#define OV5640_FW_STATUS_S_IDLE 0x70
+#define OV5640_FW_STATUS_S_FOCUSING 0x00
+#define OV5640_FW_STATUS_S_FOCUSED 0x10
+
+#define OV5640_FW_CMD_TRIGGER_FOCUS 0x03
+#define OV5640_FW_CMD_CONTINUOUS_FOCUS 0x04
+#define OV5640_FW_CMD_GET_FOCUS_RESULT 0x07
+#define OV5640_FW_CMD_RELEASE_FOCUS 0x08
+#define OV5640_FW_CMD_ZONE_CONFIG 0x12
+#define OV5640_FW_CMD_DEFAULT_ZONES 0x80
enum ov5640_mode_id {
OV5640_MODE_QCIF_176_144 = 0,
@@ -216,6 +244,12 @@ struct ov5640_ctrls {
struct v4l2_ctrl *auto_gain;
struct v4l2_ctrl *gain;
};
+ struct {
+ struct v4l2_ctrl *focus_auto;
+ struct v4l2_ctrl *af_start;
+ struct v4l2_ctrl *af_stop;
+ struct v4l2_ctrl *af_status;
+ };
struct v4l2_ctrl *brightness;
struct v4l2_ctrl *light_freq;
struct v4l2_ctrl *saturation;
@@ -259,6 +293,8 @@ struct ov5640_dev {
bool pending_mode_change;
bool streaming;
+
+ bool af_initialized;
};
static inline struct ov5640_dev *to_ov5640_dev(struct v4l2_subdev *sd)
@@ -1982,6 +2018,99 @@ static void ov5640_reset(struct ov5640_dev *sensor)
usleep_range(20000, 25000);
}
+static int ov5640_copy_fw_to_device(struct ov5640_dev *sensor,
+ const struct firmware *fw)
+{
+ struct i2c_client *client = sensor->i2c_client;
+ const u8 *data = (const u8 *)fw->data;
+ u8 fw_status;
+ int i;
+ int ret;
+
+ // Putting MCU in reset state
+ ret = ov5640_write_reg(sensor, OV5640_REG_SYS_RESET00, 0x20);
+ if (ret)
+ return ret;
+
+ // Write firmware
+ for (i = 0; i < fw->size / sizeof(u8); i++)
+ ov5640_write_reg(sensor,
+ OV5640_REG_FIRMWARE_BASE + i,
+ data[i]);
+
+ // Reset MCU state
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_MAIN, 0x00);
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_ACK, 0x00);
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA0, 0x00);
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA1, 0x00);
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA2, 0x00);
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA3, 0x00);
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA4, 0x00);
+ ov5640_write_reg(sensor, OV5640_REG_FW_STATUS, 0x7f);
+
+ // Start AF MCU
+ ov5640_write_reg(sensor, OV5640_REG_SYS_RESET00, 0x00);
+ if (ret)
+ return ret;
+
+ dev_info(&client->dev, "firmware upload success\n");
+
+ // Wait for firmware to be ready
+ for (i = 0; i < 100; i++) {
+ ret = ov5640_read_reg(sensor, OV5640_REG_FW_STATUS, &fw_status);
+ if (fw_status == OV5640_FW_STATUS_S_IDLE) {
+ dev_info(&client->dev, "fw started after %d ms\n", i * 50);
+ return ret;
+ }
+ msleep(50);
+ }
+ dev_err(&client->dev, "uploaded firmware didn't start, got to 0x%x\n", fw_status);
+ return -ETIMEDOUT;
+}
+
+static int ov5640_af_init(struct ov5640_dev *sensor)
+{
+ struct i2c_client *client = sensor->i2c_client;
+ const char* fwname = "ov5640_af.bin";
+ const struct firmware *fw;
+ int ret;
+
+ if (sensor->af_initialized) {
+ return 0;
+ }
+
+ if (firmware_request_nowarn(&fw, fwname, &client->dev) == 0) {
+ ret = ov5640_copy_fw_to_device(sensor, fw);
+ if (ret == 0)
+ sensor->af_initialized = 1;
+ } else {
+ dev_warn(&client->dev, "%s: no autofocus firmware available (%s)\n",
+ __func__, fwname);
+ ret = -1;
+ }
+ release_firmware(fw);
+
+ if (ret)
+ return ret;
+
+ // Enable AF systems
+ ret = ov5640_mod_reg(sensor, OV5640_REG_SYS_CLOCK_ENABLE00,
+ (BIT(6) | BIT(5)), (BIT(6) | BIT(5)));
+ if (ret)
+ return ret;
+ ret = ov5640_mod_reg(sensor, OV5640_REG_SYS_CLOCK_ENABLE01,
+ BIT(6), BIT(6));
+ if (ret)
+ return ret;
+
+ // Set lens focus driver on
+ ov5640_write_reg(sensor, OV5640_REG_VCM_CONTROL4, 0x3f);
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
static int ov5640_set_power_on(struct ov5640_dev *sensor)
{
struct i2c_client *client = sensor->i2c_client;
@@ -2003,6 +2132,8 @@ static int ov5640_set_power_on(struct ov5640_dev *sensor)
goto xclk_off;
}
+ sensor->af_initialized = 0;
+
ov5640_reset(sensor);
ov5640_power(sensor, true);
@@ -2392,6 +2523,35 @@ static int ov5640_set_framefmt(struct ov5640_dev *sensor,
is_jpeg ? (BIT(5) | BIT(3)) : 0);
}
+static int ov5640_fw_command(struct ov5640_dev *sensor, int command)
+{
+ u8 fw_ack;
+ int i;
+ int ret;
+
+ ret = ov5640_write_reg(sensor, OV5640_REG_FW_CMD_ACK, 0x01);
+ if(ret)
+ return ret;
+
+ ret = ov5640_write_reg(sensor, OV5640_REG_FW_CMD_MAIN, command);
+ if(ret)
+ return ret;
+
+ for (i = 0; i < 100; i++) {
+ ret = ov5640_read_reg(sensor, OV5640_REG_FW_CMD_ACK, &fw_ack);
+ if (ret)
+ return ret;
+
+ if (fw_ack == 0){
+ return ret;
+ }
+
+ msleep(50);
+ }
+ return -ETIMEDOUT;
+}
+
+
/*
* Sensor Controls.
*/
@@ -2508,6 +2668,41 @@ static int ov5640_set_ctrl_exposure(struct ov5640_dev *sensor,
return ret;
}
+static int ov5640_set_ctrl_focus(struct ov5640_dev *sensor, int command)
+{
+ struct i2c_client *client = sensor->i2c_client;
+ int ret;
+
+ ret = ov5640_af_init(sensor);
+ if (ret) {
+ dev_err(&client->dev, "%s: no autofocus firmware loaded\n",
+ __func__);
+ return ret;
+ }
+
+ if (command == OV5640_FW_CMD_RELEASE_FOCUS) {
+ dev_dbg(&client->dev, "%s: Releasing autofocus\n",
+ __func__);
+ return ov5640_fw_command(sensor, OV5640_FW_CMD_RELEASE_FOCUS);
+ }
+
+ // Restart zone config
+ ret = ov5640_fw_command(sensor, OV5640_FW_CMD_ZONE_CONFIG);
+ if (ret)
+ return ret;
+
+ // Set default focus zones
+ ret = ov5640_fw_command(sensor, OV5640_FW_CMD_DEFAULT_ZONES);
+ if (ret)
+ return ret;
+
+ dev_dbg(&client->dev, "%s: Triggering autofocus\n",
+ __func__);
+
+ // Start focussing
+ return ov5640_fw_command(sensor, command);
+}
+
static int ov5640_set_ctrl_gain(struct ov5640_dev *sensor, bool auto_gain)
{
struct ov5640_ctrls *ctrls = &sensor->ctrls;
@@ -2614,6 +2809,32 @@ static int ov5640_set_ctrl_vflip(struct ov5640_dev *sensor, int value)
(BIT(2) | BIT(1)) : 0);
}
+static int ov5640_get_af_status(struct ov5640_dev *sensor)
+{
+ u8 fw_status;
+ int ret;
+
+ ret = ov5640_read_reg(sensor, OV5640_REG_FW_STATUS, &fw_status);
+ if (ret)
+ return ret;
+
+ switch (fw_status) {
+ case OV5640_FW_STATUS_S_FIRMWARE:
+ case OV5640_FW_STATUS_S_STARTUP:
+ return V4L2_AUTO_FOCUS_STATUS_FAILED;
+ break;
+ case OV5640_FW_STATUS_S_IDLE:
+ return V4L2_AUTO_FOCUS_STATUS_IDLE;
+ break;
+ case OV5640_FW_STATUS_S_FOCUSED:
+ return V4L2_AUTO_FOCUS_STATUS_REACHED;
+ break;
+ default:
+ return V4L2_AUTO_FOCUS_STATUS_BUSY;
+ break;
+ }
+}
+
static int ov5640_g_volatile_ctrl(struct v4l2_ctrl *ctrl)
{
struct v4l2_subdev *sd = ctrl_to_sd(ctrl);
@@ -2635,6 +2856,12 @@ static int ov5640_g_volatile_ctrl(struct v4l2_ctrl *ctrl)
return val;
sensor->ctrls.exposure->val = val;
break;
+ case V4L2_CID_FOCUS_AUTO:
+ val = ov5640_get_af_status(sensor);
+ if (val < 0)
+ return val;
+ sensor->ctrls.af_status->val = val;
+ break;
}
return 0;
@@ -2666,6 +2893,18 @@ static int ov5640_s_ctrl(struct v4l2_ctrl *ctrl)
case V4L2_CID_AUTO_WHITE_BALANCE:
ret = ov5640_set_ctrl_white_balance(sensor, ctrl->val);
break;
+ case V4L2_CID_FOCUS_AUTO:
+ if (ctrl->val)
+ ret = ov5640_set_ctrl_focus(sensor, OV5640_FW_CMD_CONTINUOUS_FOCUS);
+ else
+ ret = ov5640_set_ctrl_focus(sensor, OV5640_FW_CMD_RELEASE_FOCUS);
+ break;
+ case V4L2_CID_AUTO_FOCUS_START:
+ ret = ov5640_set_ctrl_focus(sensor, OV5640_FW_CMD_TRIGGER_FOCUS);
+ break;
+ case V4L2_CID_AUTO_FOCUS_STOP:
+ ret = ov5640_set_ctrl_focus(sensor, OV5640_FW_CMD_RELEASE_FOCUS);
+ break;
case V4L2_CID_HUE:
ret = ov5640_set_ctrl_hue(sensor, ctrl->val);
break;
@@ -2738,6 +2977,20 @@ static int ov5640_init_controls(struct ov5640_dev *sensor)
ctrls->gain = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_GAIN,
0, 1023, 1, 0);
+ /* Autofocus */
+ ctrls->focus_auto = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_FOCUS_AUTO,
+ 0, 1, 1, 0);
+ ctrls->af_start = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_AUTO_FOCUS_START,
+ 0, 1, 1, 0);
+ ctrls->af_stop = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_AUTO_FOCUS_STOP,
+ 0, 1, 1, 0);
+ ctrls->af_status = v4l2_ctrl_new_std(hdl, ops,
+ V4L2_CID_AUTO_FOCUS_STATUS, 0,
+ (V4L2_AUTO_FOCUS_STATUS_BUSY |
+ V4L2_AUTO_FOCUS_STATUS_REACHED |
+ V4L2_AUTO_FOCUS_STATUS_FAILED),
+ 0, V4L2_AUTO_FOCUS_STATUS_IDLE);
+
ctrls->saturation = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_SATURATION,
0, 255, 1, 64);
ctrls->hue = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_HUE,
@@ -2771,6 +3024,7 @@ static int ov5640_init_controls(struct ov5640_dev *sensor)
v4l2_ctrl_auto_cluster(3, &ctrls->auto_wb, 0, false);
v4l2_ctrl_auto_cluster(2, &ctrls->auto_gain, 0, true);
v4l2_ctrl_auto_cluster(2, &ctrls->auto_exp, 1, true);
+ v4l2_ctrl_cluster(4, &ctrls->focus_auto);
sensor->sd.ctrl_handler = hdl;
return 0;
--
GitLab

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More