Added kernel
This commit is contained in:
		@@ -0,0 +1,29 @@
 | 
			
		||||
From c965cb8a004c6cc370b4bf297c61fe5ac8ab0583 Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Martijn Braam <martijn@brixit.nl>
 | 
			
		||||
Date: Wed, 6 Jan 2021 03:11:17 +0100
 | 
			
		||||
Subject: [PATCH] arm64: dts: sunxi: Add mmc aliases for the PineTab
 | 
			
		||||
 | 
			
		||||
The order for the mmc devices changed in the kernel without this change.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Martijn Braam <martijn@brixit.nl>
 | 
			
		||||
---
 | 
			
		||||
 arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts | 3 +++
 | 
			
		||||
 1 file changed, 3 insertions(+)
 | 
			
		||||
 | 
			
		||||
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
 | 
			
		||||
index a87790df94b3..1cf3c3a9ad7f 100644
 | 
			
		||||
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
 | 
			
		||||
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
 | 
			
		||||
@@ -18,6 +18,9 @@ / {
 | 
			
		||||
 	compatible = "pine64,pinetab", "allwinner,sun50i-a64";
 | 
			
		||||
 
 | 
			
		||||
 	aliases {
 | 
			
		||||
+		mmc0 = &mmc0;
 | 
			
		||||
+		mmc1 = &mmc1;
 | 
			
		||||
+		mmc2 = &mmc2;
 | 
			
		||||
 		serial0 = &uart0;
 | 
			
		||||
 		ethernet0 = &rtl8723cs;
 | 
			
		||||
 	};
 | 
			
		||||
-- 
 | 
			
		||||
2.29.2
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										511
									
								
								sys-kernel/pinephone-sources/files/0012-bootsplash.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										511
									
								
								sys-kernel/pinephone-sources/files/0012-bootsplash.patch
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,511 @@
 | 
			
		||||
diff --git a/MAINTAINERS b/MAINTAINERS
 | 
			
		||||
index 7ffac272434e..ddff07cd794c 100644
 | 
			
		||||
--- a/MAINTAINERS
 | 
			
		||||
+++ b/MAINTAINERS
 | 
			
		||||
@@ -2715,6 +2715,7 @@ F:	drivers/video/fbdev/core/bootsplash*.*
 | 
			
		||||
 F:	drivers/video/fbdev/core/dummycon.c
 | 
			
		||||
 F:	include/linux/bootsplash.h
 | 
			
		||||
 F:	include/uapi/linux/bootsplash_file.h
 | 
			
		||||
+F:	tools/bootsplash/*
 | 
			
		||||
 
 | 
			
		||||
 BPF (Safe dynamic programs and tools)
 | 
			
		||||
 M:	Alexei Starovoitov <ast@kernel.org>
 | 
			
		||||
diff --git a/tools/bootsplash/.gitignore b/tools/bootsplash/.gitignore
 | 
			
		||||
new file mode 100644
 | 
			
		||||
index 000000000000..091b99a17567
 | 
			
		||||
--- /dev/null
 | 
			
		||||
+++ b/tools/bootsplash/.gitignore
 | 
			
		||||
@@ -0,0 +1 @@
 | 
			
		||||
+bootsplash-packer
 | 
			
		||||
diff --git a/tools/bootsplash/Makefile b/tools/bootsplash/Makefile
 | 
			
		||||
new file mode 100644
 | 
			
		||||
index 000000000000..0ad8e8a84942
 | 
			
		||||
--- /dev/null
 | 
			
		||||
+++ b/tools/bootsplash/Makefile
 | 
			
		||||
@@ -0,0 +1,9 @@
 | 
			
		||||
+CC := $(CROSS_COMPILE)gcc
 | 
			
		||||
+CFLAGS := -I../../usr/include
 | 
			
		||||
+
 | 
			
		||||
+PROGS := bootsplash-packer
 | 
			
		||||
+
 | 
			
		||||
+all: $(PROGS)
 | 
			
		||||
+
 | 
			
		||||
+clean:
 | 
			
		||||
+	rm -fr $(PROGS)
 | 
			
		||||
diff --git a/tools/bootsplash/bootsplash-packer.c b/tools/bootsplash/bootsplash-packer.c
 | 
			
		||||
new file mode 100644
 | 
			
		||||
index 000000000000..ffb6a8b69885
 | 
			
		||||
--- /dev/null
 | 
			
		||||
+++ b/tools/bootsplash/bootsplash-packer.c
 | 
			
		||||
@@ -0,0 +1,471 @@
 | 
			
		||||
+/*
 | 
			
		||||
+ * Kernel based bootsplash.
 | 
			
		||||
+ *
 | 
			
		||||
+ * (Splash file packer tool)
 | 
			
		||||
+ *
 | 
			
		||||
+ * Authors:
 | 
			
		||||
+ * Max Staudt <mstaudt@suse.de>
 | 
			
		||||
+ *
 | 
			
		||||
+ * SPDX-License-Identifier: GPL-2.0
 | 
			
		||||
+ */
 | 
			
		||||
+
 | 
			
		||||
+#include <endian.h>
 | 
			
		||||
+#include <getopt.h>
 | 
			
		||||
+#include <stdint.h>
 | 
			
		||||
+#include <stdio.h>
 | 
			
		||||
+#include <stdlib.h>
 | 
			
		||||
+#include <string.h>
 | 
			
		||||
+
 | 
			
		||||
+#include <linux/bootsplash_file.h>
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+static void print_help(char *progname)
 | 
			
		||||
+{
 | 
			
		||||
+	printf("Usage: %s [OPTIONS] outfile\n", progname);
 | 
			
		||||
+	printf("\n"
 | 
			
		||||
+	       "Options, executed in order given:\n"
 | 
			
		||||
+	       "  -h, --help                   Print this help message\n"
 | 
			
		||||
+	       "\n"
 | 
			
		||||
+	       "  --bg_red <u8>                Background color (red part)\n"
 | 
			
		||||
+	       "  --bg_green <u8>              Background color (green part)\n"
 | 
			
		||||
+	       "  --bg_blue <u8>               Background color (blue part)\n"
 | 
			
		||||
+	       "  --bg_reserved <u8>           (do not use)\n"
 | 
			
		||||
+	       "  --frame_ms <u16>             Minimum milliseconds between animation steps\n"
 | 
			
		||||
+	       "\n"
 | 
			
		||||
+	       "  --picture                    Start describing the next picture\n"
 | 
			
		||||
+	       "  --pic_width <u16>            Picture width in pixels\n"
 | 
			
		||||
+	       "  --pic_height <u16>           Picture height in pixels\n"
 | 
			
		||||
+	       "  --pic_position <u8>             Coarse picture placement:\n"
 | 
			
		||||
+	       "                                  0x00 - Top left\n"
 | 
			
		||||
+	       "                                  0x01 - Top\n"
 | 
			
		||||
+	       "                                  0x02 - Top right\n"
 | 
			
		||||
+	       "                                  0x03 - Right\n"
 | 
			
		||||
+	       "                                  0x04 - Bottom right\n"
 | 
			
		||||
+	       "                                  0x05 - Bottom\n"
 | 
			
		||||
+	       "                                  0x06 - Bottom left\n"
 | 
			
		||||
+	       "                                  0x07 - Left\n"
 | 
			
		||||
+	       "\n"
 | 
			
		||||
+	       "                                Flags:\n"
 | 
			
		||||
+	       "                                 0x10 - Calculate offset from corner towards center,\n"
 | 
			
		||||
+	       "                                         rather than from center towards corner\n"
 | 
			
		||||
+	       "  --pic_position_offset <u16>  Distance from base position in pixels\n"
 | 
			
		||||
+	       "  --pic_anim_type <u8>         Animation type:\n"
 | 
			
		||||
+	       "                                 0 - None\n"
 | 
			
		||||
+	       "                                 1 - Forward loop\n"
 | 
			
		||||
+	       "  --pic_anim_loop <u8>         Loop point for animation\n"
 | 
			
		||||
+	       "\n"
 | 
			
		||||
+	       "  --blob <filename>            Include next data stream\n"
 | 
			
		||||
+	       "  --blob_type <u16>            Type of data\n"
 | 
			
		||||
+	       "  --blob_picture_id <u8>       Picture to associate this blob with, starting at 0\n"
 | 
			
		||||
+	       "                                 (default: number of last --picture)\n"
 | 
			
		||||
+	       "\n");
 | 
			
		||||
+	printf("This tool will write %s files.\n\n",
 | 
			
		||||
+#if __BYTE_ORDER == __BIG_ENDIAN
 | 
			
		||||
+	       "Big Endian (BE)");
 | 
			
		||||
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
 | 
			
		||||
+	       "Little Endian (LE)");
 | 
			
		||||
+#else
 | 
			
		||||
+#error
 | 
			
		||||
+#endif
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+struct blob_entry {
 | 
			
		||||
+	struct blob_entry *next;
 | 
			
		||||
+
 | 
			
		||||
+	char *fn;
 | 
			
		||||
+
 | 
			
		||||
+	struct splash_blob_header header;
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+static void dump_file_header(struct splash_file_header *h)
 | 
			
		||||
+{
 | 
			
		||||
+	printf(" --- File header ---\n");
 | 
			
		||||
+	printf("\n");
 | 
			
		||||
+	printf("  version:     %5u\n", h->version);
 | 
			
		||||
+	printf("\n");
 | 
			
		||||
+	printf("  bg_red:      %5u\n", h->bg_red);
 | 
			
		||||
+	printf("  bg_green:    %5u\n", h->bg_green);
 | 
			
		||||
+	printf("  bg_blue:     %5u\n", h->bg_blue);
 | 
			
		||||
+	printf("  bg_reserved: %5u\n", h->bg_reserved);
 | 
			
		||||
+	printf("\n");
 | 
			
		||||
+	printf("  num_blobs:   %5u\n", h->num_blobs);
 | 
			
		||||
+	printf("  num_pics:    %5u\n", h->num_pics);
 | 
			
		||||
+	printf("\n");
 | 
			
		||||
+	printf("  frame_ms:    %5u\n", h->frame_ms);
 | 
			
		||||
+	printf("\n");
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void dump_pic_header(struct splash_pic_header *ph)
 | 
			
		||||
+{
 | 
			
		||||
+	printf(" --- Picture header ---\n");
 | 
			
		||||
+	printf("\n");
 | 
			
		||||
+	printf("  width:           %5u\n", ph->width);
 | 
			
		||||
+	printf("  height:          %5u\n", ph->height);
 | 
			
		||||
+	printf("\n");
 | 
			
		||||
+	printf("  num_blobs:       %5u\n", ph->num_blobs);
 | 
			
		||||
+	printf("\n");
 | 
			
		||||
+	printf("  position:        %0x3x\n", ph->position);
 | 
			
		||||
+	printf("  position_offset: %5u\n", ph->position_offset);
 | 
			
		||||
+	printf("\n");
 | 
			
		||||
+	printf("  anim_type:       %5u\n", ph->anim_type);
 | 
			
		||||
+	printf("  anim_loop:       %5u\n", ph->anim_loop);
 | 
			
		||||
+	printf("\n");
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void dump_blob(struct blob_entry *b)
 | 
			
		||||
+{
 | 
			
		||||
+	printf(" --- Blob header ---\n");
 | 
			
		||||
+	printf("\n");
 | 
			
		||||
+	printf("  length:     %7u\n", b->header.length);
 | 
			
		||||
+	printf("  type:       %7u\n", b->header.type);
 | 
			
		||||
+	printf("\n");
 | 
			
		||||
+	printf("  picture_id: %7u\n", b->header.picture_id);
 | 
			
		||||
+	printf("\n");
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+#define OPT_MAX(var, max) \
 | 
			
		||||
+	do { \
 | 
			
		||||
+		if ((var) > max) { \
 | 
			
		||||
+			fprintf(stderr, "--%s: Invalid value\n", \
 | 
			
		||||
+			long_options[option_index].name); \
 | 
			
		||||
+			break; \
 | 
			
		||||
+		} \
 | 
			
		||||
+	} while (0)
 | 
			
		||||
+
 | 
			
		||||
+static struct option long_options[] = {
 | 
			
		||||
+	{"help", 0, 0, 'h'},
 | 
			
		||||
+	{"bg_red", 1, 0, 10001},
 | 
			
		||||
+	{"bg_green", 1, 0, 10002},
 | 
			
		||||
+	{"bg_blue", 1, 0, 10003},
 | 
			
		||||
+	{"bg_reserved", 1, 0, 10004},
 | 
			
		||||
+	{"frame_ms", 1, 0, 10005},
 | 
			
		||||
+	{"picture", 0, 0, 20000},
 | 
			
		||||
+	{"pic_width", 1, 0, 20001},
 | 
			
		||||
+	{"pic_height", 1, 0, 20002},
 | 
			
		||||
+	{"pic_position", 1, 0, 20003},
 | 
			
		||||
+	{"pic_position_offset", 1, 0, 20004},
 | 
			
		||||
+	{"pic_anim_type", 1, 0, 20005},
 | 
			
		||||
+	{"pic_anim_loop", 1, 0, 20006},
 | 
			
		||||
+	{"blob", 1, 0, 30000},
 | 
			
		||||
+	{"blob_type", 1, 0, 30001},
 | 
			
		||||
+	{"blob_picture_id", 1, 0, 30002},
 | 
			
		||||
+	{NULL, 0, NULL, 0}
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+int main(int argc, char **argv)
 | 
			
		||||
+{
 | 
			
		||||
+	FILE *of;
 | 
			
		||||
+	char *ofn;
 | 
			
		||||
+	int c;
 | 
			
		||||
+	int option_index = 0;
 | 
			
		||||
+
 | 
			
		||||
+	unsigned long ul;
 | 
			
		||||
+	struct splash_file_header fh = {};
 | 
			
		||||
+	struct splash_pic_header ph[255];
 | 
			
		||||
+	struct blob_entry *blob_first = NULL;
 | 
			
		||||
+	struct blob_entry *blob_last = NULL;
 | 
			
		||||
+	struct blob_entry *blob_cur = NULL;
 | 
			
		||||
+
 | 
			
		||||
+	if (argc < 2) {
 | 
			
		||||
+		print_help(argv[0]);
 | 
			
		||||
+		return EXIT_FAILURE;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+	/* Parse and and execute user commands */
 | 
			
		||||
+	while ((c = getopt_long(argc, argv, "h",
 | 
			
		||||
+			  long_options, &option_index)) != -1) {
 | 
			
		||||
+		switch (c) {
 | 
			
		||||
+		case 10001:	/* bg_red */
 | 
			
		||||
+			ul = strtoul(optarg, NULL, 0);
 | 
			
		||||
+			OPT_MAX(ul, 255);
 | 
			
		||||
+			fh.bg_red = ul;
 | 
			
		||||
+			break;
 | 
			
		||||
+		case 10002:	/* bg_green */
 | 
			
		||||
+			ul = strtoul(optarg, NULL, 0);
 | 
			
		||||
+			OPT_MAX(ul, 255);
 | 
			
		||||
+			fh.bg_green = ul;
 | 
			
		||||
+			break;
 | 
			
		||||
+		case 10003:	/* bg_blue */
 | 
			
		||||
+			ul = strtoul(optarg, NULL, 0);
 | 
			
		||||
+			OPT_MAX(ul, 255);
 | 
			
		||||
+			fh.bg_blue = ul;
 | 
			
		||||
+			break;
 | 
			
		||||
+		case 10004:	/* bg_reserved */
 | 
			
		||||
+			ul = strtoul(optarg, NULL, 0);
 | 
			
		||||
+			OPT_MAX(ul, 255);
 | 
			
		||||
+			fh.bg_reserved = ul;
 | 
			
		||||
+			break;
 | 
			
		||||
+		case 10005:	/* frame_ms */
 | 
			
		||||
+			ul = strtoul(optarg, NULL, 0);
 | 
			
		||||
+			OPT_MAX(ul, 65535);
 | 
			
		||||
+			fh.frame_ms = ul;
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+		case 20000:	/* picture */
 | 
			
		||||
+			if (fh.num_pics >= 255) {
 | 
			
		||||
+				fprintf(stderr, "--%s: Picture array full\n",
 | 
			
		||||
+					long_options[option_index].name);
 | 
			
		||||
+				break;
 | 
			
		||||
+			}
 | 
			
		||||
+
 | 
			
		||||
+			fh.num_pics++;
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+		case 20001:	/* pic_width */
 | 
			
		||||
+			ul = strtoul(optarg, NULL, 0);
 | 
			
		||||
+			OPT_MAX(ul, 65535);
 | 
			
		||||
+			ph[fh.num_pics - 1].width = ul;
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+		case 20002:	/* pic_height */
 | 
			
		||||
+			ul = strtoul(optarg, NULL, 0);
 | 
			
		||||
+			OPT_MAX(ul, 65535);
 | 
			
		||||
+			ph[fh.num_pics - 1].height = ul;
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+		case 20003:	/* pic_position */
 | 
			
		||||
+			ul = strtoul(optarg, NULL, 0);
 | 
			
		||||
+			OPT_MAX(ul, 255);
 | 
			
		||||
+			ph[fh.num_pics - 1].position = ul;
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+		case 20004:	/* pic_position_offset */
 | 
			
		||||
+			ul = strtoul(optarg, NULL, 0);
 | 
			
		||||
+			OPT_MAX(ul, 255);
 | 
			
		||||
+			ph[fh.num_pics - 1].position_offset = ul;
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+		case 20005:	/* pic_anim_type */
 | 
			
		||||
+			ul = strtoul(optarg, NULL, 0);
 | 
			
		||||
+			OPT_MAX(ul, 255);
 | 
			
		||||
+			ph[fh.num_pics - 1].anim_type = ul;
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+		case 20006:	/* pic_anim_loop */
 | 
			
		||||
+			ul = strtoul(optarg, NULL, 0);
 | 
			
		||||
+			OPT_MAX(ul, 255);
 | 
			
		||||
+			ph[fh.num_pics - 1].anim_loop = ul;
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+		case 30000:	/* blob */
 | 
			
		||||
+			if (fh.num_blobs >= 65535) {
 | 
			
		||||
+				fprintf(stderr, "--%s: Blob array full\n",
 | 
			
		||||
+					long_options[option_index].name);
 | 
			
		||||
+				break;
 | 
			
		||||
+			}
 | 
			
		||||
+
 | 
			
		||||
+			blob_cur = calloc(1, sizeof(struct blob_entry));
 | 
			
		||||
+			if (!blob_cur) {
 | 
			
		||||
+				fprintf(stderr, "--%s: Out of memory\n",
 | 
			
		||||
+					long_options[option_index].name);
 | 
			
		||||
+				break;
 | 
			
		||||
+			}
 | 
			
		||||
+
 | 
			
		||||
+			blob_cur->fn = optarg;
 | 
			
		||||
+			if (fh.num_pics)
 | 
			
		||||
+				blob_cur->header.picture_id = fh.num_pics - 1;
 | 
			
		||||
+
 | 
			
		||||
+			if (!blob_first)
 | 
			
		||||
+				blob_first = blob_cur;
 | 
			
		||||
+			if (blob_last)
 | 
			
		||||
+				blob_last->next = blob_cur;
 | 
			
		||||
+			blob_last = blob_cur;
 | 
			
		||||
+			fh.num_blobs++;
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+		case 30001:	/* blob_type */
 | 
			
		||||
+			if (!blob_cur) {
 | 
			
		||||
+				fprintf(stderr, "--%s: No blob selected\n",
 | 
			
		||||
+					long_options[option_index].name);
 | 
			
		||||
+				break;
 | 
			
		||||
+			}
 | 
			
		||||
+
 | 
			
		||||
+			ul = strtoul(optarg, NULL, 0);
 | 
			
		||||
+			OPT_MAX(ul, 255);
 | 
			
		||||
+			blob_cur->header.type = ul;
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+		case 30002:	/* blob_picture_id */
 | 
			
		||||
+			if (!blob_cur) {
 | 
			
		||||
+				fprintf(stderr, "--%s: No blob selected\n",
 | 
			
		||||
+					long_options[option_index].name);
 | 
			
		||||
+				break;
 | 
			
		||||
+			}
 | 
			
		||||
+
 | 
			
		||||
+			ul = strtoul(optarg, NULL, 0);
 | 
			
		||||
+			OPT_MAX(ul, 255);
 | 
			
		||||
+			blob_cur->header.picture_id = ul;
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+		case 'h':
 | 
			
		||||
+		case '?':
 | 
			
		||||
+		default:
 | 
			
		||||
+			print_help(argv[0]);
 | 
			
		||||
+			goto EXIT;
 | 
			
		||||
+		} /* switch (c) */
 | 
			
		||||
+	} /* while ((c = getopt_long(...)) != -1) */
 | 
			
		||||
+
 | 
			
		||||
+	/* Consume and drop lone arguments */
 | 
			
		||||
+	while (optind < argc) {
 | 
			
		||||
+		ofn = argv[optind];
 | 
			
		||||
+		optind++;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+	/* Read file lengths */
 | 
			
		||||
+	for (blob_cur = blob_first; blob_cur; blob_cur = blob_cur->next) {
 | 
			
		||||
+		FILE *f;
 | 
			
		||||
+		long pos;
 | 
			
		||||
+		int i;
 | 
			
		||||
+
 | 
			
		||||
+		if (!blob_cur->fn)
 | 
			
		||||
+			continue;
 | 
			
		||||
+
 | 
			
		||||
+		f = fopen(blob_cur->fn, "rb");
 | 
			
		||||
+		if (!f)
 | 
			
		||||
+			goto ERR_FILE_LEN;
 | 
			
		||||
+
 | 
			
		||||
+		if (fseek(f, 0, SEEK_END))
 | 
			
		||||
+			goto ERR_FILE_LEN;
 | 
			
		||||
+
 | 
			
		||||
+		pos = ftell(f);
 | 
			
		||||
+		if (pos < 0 || pos > (1 << 30))
 | 
			
		||||
+			goto ERR_FILE_LEN;
 | 
			
		||||
+
 | 
			
		||||
+		blob_cur->header.length = pos;
 | 
			
		||||
+
 | 
			
		||||
+		fclose(f);
 | 
			
		||||
+		continue;
 | 
			
		||||
+
 | 
			
		||||
+ERR_FILE_LEN:
 | 
			
		||||
+		fprintf(stderr, "Error getting file length (or too long): %s\n",
 | 
			
		||||
+			blob_cur->fn);
 | 
			
		||||
+		if (f)
 | 
			
		||||
+			fclose(f);
 | 
			
		||||
+		continue;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+	/* Set magic headers */
 | 
			
		||||
+#if __BYTE_ORDER == __BIG_ENDIAN
 | 
			
		||||
+	memcpy(&fh.id[0], BOOTSPLASH_MAGIC_BE, 16);
 | 
			
		||||
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
 | 
			
		||||
+	memcpy(&fh.id[0], BOOTSPLASH_MAGIC_LE, 16);
 | 
			
		||||
+#else
 | 
			
		||||
+#error
 | 
			
		||||
+#endif
 | 
			
		||||
+	fh.version = BOOTSPLASH_VERSION;
 | 
			
		||||
+
 | 
			
		||||
+	/* Set blob counts */
 | 
			
		||||
+	for (blob_cur = blob_first; blob_cur; blob_cur = blob_cur->next) {
 | 
			
		||||
+		if (blob_cur->header.picture_id < fh.num_pics)
 | 
			
		||||
+			ph[blob_cur->header.picture_id].num_blobs++;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+	/* Dump structs */
 | 
			
		||||
+	dump_file_header(&fh);
 | 
			
		||||
+
 | 
			
		||||
+	for (ul = 0; ul < fh.num_pics; ul++)
 | 
			
		||||
+		dump_pic_header(&ph[ul]);
 | 
			
		||||
+
 | 
			
		||||
+	for (blob_cur = blob_first; blob_cur; blob_cur = blob_cur->next)
 | 
			
		||||
+		dump_blob(blob_cur);
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+	/* Write to file */
 | 
			
		||||
+	printf("Writing splash to file: %s\n", ofn);
 | 
			
		||||
+	of = fopen(ofn, "wb");
 | 
			
		||||
+	if (!of)
 | 
			
		||||
+		goto ERR_WRITING;
 | 
			
		||||
+
 | 
			
		||||
+	if (fwrite(&fh, sizeof(struct splash_file_header), 1, of) != 1)
 | 
			
		||||
+		goto ERR_WRITING;
 | 
			
		||||
+
 | 
			
		||||
+	for (ul = 0; ul < fh.num_pics; ul++) {
 | 
			
		||||
+		if (fwrite(&ph[ul], sizeof(struct splash_pic_header), 1, of)
 | 
			
		||||
+		    != 1)
 | 
			
		||||
+			goto ERR_WRITING;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	blob_cur = blob_first;
 | 
			
		||||
+	while (blob_cur) {
 | 
			
		||||
+		struct blob_entry *blob_old = blob_cur;
 | 
			
		||||
+		FILE *f;
 | 
			
		||||
+		char *buf[256];
 | 
			
		||||
+		uint32_t left;
 | 
			
		||||
+
 | 
			
		||||
+		if (fwrite(&blob_cur->header,
 | 
			
		||||
+			   sizeof(struct splash_blob_header), 1, of) != 1)
 | 
			
		||||
+			goto ERR_WRITING;
 | 
			
		||||
+
 | 
			
		||||
+		if (!blob_cur->header.length || !blob_cur->fn)
 | 
			
		||||
+			continue;
 | 
			
		||||
+
 | 
			
		||||
+		f = fopen(blob_cur->fn, "rb");
 | 
			
		||||
+		if (!f)
 | 
			
		||||
+			goto ERR_FILE_COPY;
 | 
			
		||||
+
 | 
			
		||||
+		left = blob_cur->header.length;
 | 
			
		||||
+		while (left >= sizeof(buf)) {
 | 
			
		||||
+			if (fread(buf, sizeof(buf), 1, f) != 1)
 | 
			
		||||
+				goto ERR_FILE_COPY;
 | 
			
		||||
+			if (fwrite(buf, sizeof(buf), 1, of) != 1)
 | 
			
		||||
+				goto ERR_FILE_COPY;
 | 
			
		||||
+			left -= sizeof(buf);
 | 
			
		||||
+		}
 | 
			
		||||
+		if (left) {
 | 
			
		||||
+			if (fread(buf, left, 1, f) != 1)
 | 
			
		||||
+				goto ERR_FILE_COPY;
 | 
			
		||||
+			if (fwrite(buf, left, 1, of) != 1)
 | 
			
		||||
+				goto ERR_FILE_COPY;
 | 
			
		||||
+		}
 | 
			
		||||
+
 | 
			
		||||
+		/* Pad data stream to 16 bytes */
 | 
			
		||||
+		if (left % 16) {
 | 
			
		||||
+			if (fwrite("\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0",
 | 
			
		||||
+					16 - (left % 16), 1, of) != 1)
 | 
			
		||||
+				goto ERR_FILE_COPY;
 | 
			
		||||
+		}
 | 
			
		||||
+
 | 
			
		||||
+		fclose(f);
 | 
			
		||||
+		blob_cur = blob_cur->next;
 | 
			
		||||
+		free(blob_old);
 | 
			
		||||
+		continue;
 | 
			
		||||
+
 | 
			
		||||
+ERR_FILE_COPY:
 | 
			
		||||
+		if (f)
 | 
			
		||||
+			fclose(f);
 | 
			
		||||
+		goto ERR_WRITING;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	fclose(of);
 | 
			
		||||
+
 | 
			
		||||
+EXIT:
 | 
			
		||||
+	return EXIT_SUCCESS;
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
+ERR_WRITING:
 | 
			
		||||
+	fprintf(stderr, "Error writing splash.\n");
 | 
			
		||||
+	fprintf(stderr, "The output file is probably corrupt.\n");
 | 
			
		||||
+	if (of)
 | 
			
		||||
+		fclose(of);
 | 
			
		||||
+
 | 
			
		||||
+	while (blob_cur) {
 | 
			
		||||
+		struct blob_entry *blob_old = blob_cur;
 | 
			
		||||
+
 | 
			
		||||
+		blob_cur = blob_cur->next;
 | 
			
		||||
+		free(blob_old);
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	return EXIT_FAILURE;
 | 
			
		||||
+}
 | 
			
		||||
@@ -0,0 +1,12 @@
 | 
			
		||||
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
 | 
			
		||||
index 03b83aa91277..dfc6c7d1b0e7 100644
 | 
			
		||||
--- a/drivers/bluetooth/btusb.c
 | 
			
		||||
+++ b/drivers/bluetooth/btusb.c
 | 
			
		||||
@@ -4070,6 +4070,7 @@ static int btusb_setup_qca(struct hci_dev *hdev)
 | 
			
		||||
 	}
 | 
			
		||||
 	if (!info) {
 | 
			
		||||
 		bt_dev_err(hdev, "don't support firmware rome 0x%x", ver_rom);
 | 
			
		||||
+		if (ver_rom & ~0xffffU) return 0;
 | 
			
		||||
 		return -ENODEV;
 | 
			
		||||
 	}
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,49 @@
 | 
			
		||||
From cb408fb65a08bd45543724c1e9b8f38ae1bebc4a Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Arnaud Ferraris <arnaud.ferraris@gmail.com>
 | 
			
		||||
Date: Tue, 4 Aug 2020 15:12:59 +0200
 | 
			
		||||
Subject: [PATCH 177/183] leds-gpio: make max_brightness configurable
 | 
			
		||||
 | 
			
		||||
---
 | 
			
		||||
 drivers/leds/leds-gpio.c | 4 ++++
 | 
			
		||||
 include/linux/leds.h     | 3 ++-
 | 
			
		||||
 2 files changed, 6 insertions(+), 1 deletion(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
 | 
			
		||||
index 93f5b1b60fde..f8483fab1164 100644
 | 
			
		||||
--- a/drivers/leds/leds-gpio.c
 | 
			
		||||
+++ b/drivers/leds/leds-gpio.c
 | 
			
		||||
@@ -108,6 +108,8 @@ static int create_gpio_led(const struct gpio_led *template,
 | 
			
		||||
 	if (ret < 0)
 | 
			
		||||
 		return ret;
 | 
			
		||||
 
 | 
			
		||||
+	led_dat->cdev.max_brightness = template->max_brightness;
 | 
			
		||||
+
 | 
			
		||||
 	if (template->name) {
 | 
			
		||||
 		led_dat->cdev.name = template->name;
 | 
			
		||||
 		ret = devm_led_classdev_register(parent, &led_dat->cdev);
 | 
			
		||||
@@ -177,6 +179,8 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev)
 | 
			
		||||
 		if (fwnode_property_present(child, "panic-indicator"))
 | 
			
		||||
 			led.panic_indicator = 1;
 | 
			
		||||
 
 | 
			
		||||
+		fwnode_property_read_u32(child, "max-brightness", &led.max_brightness);
 | 
			
		||||
+
 | 
			
		||||
 		ret = create_gpio_led(&led, led_dat, dev, child, NULL);
 | 
			
		||||
 		if (ret < 0) {
 | 
			
		||||
 			fwnode_handle_put(child);
 | 
			
		||||
diff --git a/include/linux/leds.h b/include/linux/leds.h
 | 
			
		||||
index 6a8d6409c993..99a80092114d 100644
 | 
			
		||||
--- a/include/linux/leds.h
 | 
			
		||||
+++ b/include/linux/leds.h
 | 
			
		||||
@@ -513,7 +513,8 @@ typedef int (*gpio_blink_set_t)(struct gpio_desc *desc, int state,
 | 
			
		||||
 struct gpio_led {
 | 
			
		||||
 	const char *name;
 | 
			
		||||
 	const char *default_trigger;
 | 
			
		||||
-	unsigned 	gpio;
 | 
			
		||||
+	unsigned	gpio;
 | 
			
		||||
+	unsigned	max_brightness;
 | 
			
		||||
 	unsigned	active_low : 1;
 | 
			
		||||
 	unsigned	retain_state_suspended : 1;
 | 
			
		||||
 	unsigned	panic_indicator : 1;
 | 
			
		||||
-- 
 | 
			
		||||
2.30.0
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1526
									
								
								sys-kernel/pinephone-sources/files/5.11.5.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1526
									
								
								sys-kernel/pinephone-sources/files/5.11.5.patch
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -0,0 +1,391 @@
 | 
			
		||||
From mboxrd@z Thu Jan  1 00:00:00 1970
 | 
			
		||||
Return-Path: <linux-kernel-owner@kernel.org>
 | 
			
		||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
 | 
			
		||||
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
 | 
			
		||||
X-Spam-Level: 
 | 
			
		||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
 | 
			
		||||
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
 | 
			
		||||
	INCLUDES_PATCH,MAILING_LIST_MULTI,MENTIONS_GIT_HOSTING,SPF_HELO_NONE,SPF_PASS,
 | 
			
		||||
	USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
 | 
			
		||||
	version=3.4.0
 | 
			
		||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
 | 
			
		||||
	by smtp.lore.kernel.org (Postfix) with ESMTP id BA09AC433ED
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:04 +0000 (UTC)
 | 
			
		||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
 | 
			
		||||
	by mail.kernel.org (Postfix) with ESMTP id 99A326108C
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:04 +0000 (UTC)
 | 
			
		||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 | 
			
		||||
        id S230359AbhETGzY (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:24 -0400
 | 
			
		||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37854 "EHLO
 | 
			
		||||
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 | 
			
		||||
        with ESMTP id S229534AbhETGzX (ORCPT
 | 
			
		||||
        <rfc822;linux-kernel@vger.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:23 -0400
 | 
			
		||||
Received: from mail-qk1-x74a.google.com (mail-qk1-x74a.google.com [IPv6:2607:f8b0:4864:20::74a])
 | 
			
		||||
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2DB47C061574
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:01 -0700 (PDT)
 | 
			
		||||
Received: by mail-qk1-x74a.google.com with SMTP id z2-20020a3765020000b02903a5f51b1c74so684222qkb.7
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:01 -0700 (PDT)
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=google.com; s=20161025;
 | 
			
		||||
        h=date:message-id:mime-version:subject:from:to:cc;
 | 
			
		||||
        bh=Y3hJAMwzbf34YQU8QX5BsSV2xoCmy36DYto5ZLIStJc=;
 | 
			
		||||
        b=r/V1aR1KHSQ2RwrGIEEbdDV0RqV+tdHJLBnCnPMLdI4quvTDua13dKOHpxS2Rc7bc4
 | 
			
		||||
         6ON9rpxOpEhBMPLS8798xqa4jQBTINTCKNlIi3TpaV8t/shwlViCb4Y9bZ4ng8VEsXp3
 | 
			
		||||
         H2s3DQbb47Iio7YrOnBahF4qBDJl2fkHL257Ao4wgzgG/ZCK2oy5dcipOFrEpQqPk5vO
 | 
			
		||||
         hhTC4Zr1DE3XI+Y+uTozfI8CoAtllv6qL31gAWcycyeN72teVQa9ilaeTdglxhCO9DVG
 | 
			
		||||
         BFkiZH+21Eo3M8PRz4OztnGgRtMvbgNnuUWZ68bnZkO4wMyL6mX2520HA9NQNkGSXLnP
 | 
			
		||||
         74Zg==
 | 
			
		||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=1e100.net; s=20161025;
 | 
			
		||||
        h=x-gm-message-state:date:message-id:mime-version:subject:from:to:cc;
 | 
			
		||||
        bh=Y3hJAMwzbf34YQU8QX5BsSV2xoCmy36DYto5ZLIStJc=;
 | 
			
		||||
        b=h+lPKp7mQ6QF8fW3fzT7HQgoaLvOfkKtGjwFNvWMOi8UMz94CGWpTgC4tsEX0PenoK
 | 
			
		||||
         snCz9kDMIR35YO9Dlhz1Ci/04htNK9p+rnvGn7ri/Oin5fFeyVQ15qh33Bgut5m3SKR2
 | 
			
		||||
         imeFBLkWsXGtFd23XCBmjIcNrqZA0LxhIwoYCbrVWSq5H29Eo6C9ab0gmJ1oY0DCPOL/
 | 
			
		||||
         Fi8M2neMwLN09EebwZONh8AGuP0XiL0oSnAGDZAhaaAimfHrPBMMYCrxpjnaGxPG2hY0
 | 
			
		||||
         gvju/bIag6Ug8urHdAAGWsdLaNIsdrIKWlaL76FjcULVwdAARKQifiMMTwJ2JU5y5jMG
 | 
			
		||||
         OKRg==
 | 
			
		||||
X-Gm-Message-State: AOAM5322gu+Tvm1pCjTiKdWMNb3cz1Z6+VCfYHkB7vDvNRYItvu08gEA
 | 
			
		||||
        /W/WlY6Lc6/4O5nrreOspbq5n77XobE=
 | 
			
		||||
X-Google-Smtp-Source: ABdhPJy+4EmI1VvFDhlB3errX+0774OdClFY8nQyFqDe9Pqq8FOdLBnXamEbn+N9M1F/HG6sJ6Mw/n7qw/8=
 | 
			
		||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
 | 
			
		||||
 (user=yuzhao job=sendgmr) by 2002:a0c:e4cd:: with SMTP id g13mr3727631qvm.34.1621493640278;
 | 
			
		||||
 Wed, 19 May 2021 23:54:00 -0700 (PDT)
 | 
			
		||||
Date:   Thu, 20 May 2021 00:53:41 -0600
 | 
			
		||||
Message-Id: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
Mime-Version: 1.0
 | 
			
		||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
Subject: [PATCH v3 00/14] Multigenerational LRU Framework
 | 
			
		||||
From:   Yu Zhao <yuzhao@google.com>
 | 
			
		||||
To:     linux-mm@kvack.org
 | 
			
		||||
Cc:     Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
 | 
			
		||||
        Andrew Morton <akpm@linux-foundation.org>,
 | 
			
		||||
        Dave Chinner <david@fromorbit.com>,
 | 
			
		||||
        Dave Hansen <dave.hansen@linux.intel.com>,
 | 
			
		||||
        Donald Carr <sirspudd@gmail.com>,
 | 
			
		||||
        Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
 | 
			
		||||
        Johannes Weiner <hannes@cmpxchg.org>,
 | 
			
		||||
        Jonathan Corbet <corbet@lwn.net>,
 | 
			
		||||
        Joonsoo Kim <iamjoonsoo.kim@lge.com>,
 | 
			
		||||
        Konstantin Kharlamov <hi-angel@yandex.ru>,
 | 
			
		||||
        Marcus Seyfarth <m.seyfarth@gmail.com>,
 | 
			
		||||
        Matthew Wilcox <willy@infradead.org>,
 | 
			
		||||
        Mel Gorman <mgorman@suse.de>,
 | 
			
		||||
        Miaohe Lin <linmiaohe@huawei.com>,
 | 
			
		||||
        Michael Larabel <michael@michaellarabel.com>,
 | 
			
		||||
        Michal Hocko <mhocko@suse.com>,
 | 
			
		||||
        Michel Lespinasse <michel@lespinasse.org>,
 | 
			
		||||
        Rik van Riel <riel@surriel.com>,
 | 
			
		||||
        Roman Gushchin <guro@fb.com>,
 | 
			
		||||
        Tim Chen <tim.c.chen@linux.intel.com>,
 | 
			
		||||
        Vlastimil Babka <vbabka@suse.cz>,
 | 
			
		||||
        Yang Shi <shy828301@gmail.com>,
 | 
			
		||||
        Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
 | 
			
		||||
        linux-kernel@vger.kernel.org, lkp@lists.01.org,
 | 
			
		||||
        page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
 | 
			
		||||
Content-Type: text/plain; charset="UTF-8"
 | 
			
		||||
Precedence: bulk
 | 
			
		||||
List-ID: <linux-kernel.vger.kernel.org>
 | 
			
		||||
X-Mailing-List: linux-kernel@vger.kernel.org
 | 
			
		||||
List-Archive: <https://lore.kernel.org/lkml/>
 | 
			
		||||
 | 
			
		||||
What's new in v3
 | 
			
		||||
================
 | 
			
		||||
1) Fixed a bug reported by the Arch Linux kernel team:
 | 
			
		||||
   https://github.com/zen-kernel/zen-kernel/issues/207
 | 
			
		||||
2) Rebased to v5.13-rc2.
 | 
			
		||||
 | 
			
		||||
Highlights from v2
 | 
			
		||||
==================
 | 
			
		||||
Konstantin Kharlamov <hi-angel@yandex.ru> reported:
 | 
			
		||||
  My success story: I have Archlinux with 8G RAM + zswap + swap. While
 | 
			
		||||
  developing, I have lots of apps opened such as multiple LSP-servers
 | 
			
		||||
  for different langs, chats, two browsers, etc. Usually, my system
 | 
			
		||||
  gets quickly to a point of SWAP-storms, where I have to kill
 | 
			
		||||
  LSP-servers, restart browsers to free memory, etc, otherwise the
 | 
			
		||||
  system lags heavily and is barely usable.
 | 
			
		||||
 
 | 
			
		||||
  1.5 day ago I migrated from 5.11.15 kernel to 5.12 + the LRU
 | 
			
		||||
  patchset, and I started up by opening lots of apps to create memory
 | 
			
		||||
  pressure, and worked for a day like this. Till now I had *not a
 | 
			
		||||
  single SWAP-storm*, and mind you I got 3.4G in SWAP. I was never
 | 
			
		||||
  getting to the point of 3G in SWAP before without a single
 | 
			
		||||
  SWAP-storm.
 | 
			
		||||
 | 
			
		||||
TLDR
 | 
			
		||||
====
 | 
			
		||||
The current page reclaim is too expensive in terms of CPU usage and
 | 
			
		||||
often making poor choices about what to evict. We would like to offer
 | 
			
		||||
an alternative framework that is performant, versatile and
 | 
			
		||||
straightforward.
 | 
			
		||||
 | 
			
		||||
Repo
 | 
			
		||||
====
 | 
			
		||||
git fetch https://linux-mm.googlesource.com/page-reclaim refs/changes/53/1253/1
 | 
			
		||||
 | 
			
		||||
Problems
 | 
			
		||||
========
 | 
			
		||||
Notion of active/inactive
 | 
			
		||||
-------------------------
 | 
			
		||||
Data centers need to predict whether a job can successfully land on a
 | 
			
		||||
machine without actually impacting the existing jobs. The granularity
 | 
			
		||||
of the active/inactive is too coarse to be useful for job schedulers
 | 
			
		||||
to make such decisions. In addition, data centers need to monitor
 | 
			
		||||
their memory utilization for horizontal scaling. The active/inactive
 | 
			
		||||
cannot give any insight into a pool of machines because aggregating
 | 
			
		||||
them across multiple machines without a common frame of reference
 | 
			
		||||
yields no meaningful results.
 | 
			
		||||
 | 
			
		||||
Phones and laptops need to make good choices about what to evict,
 | 
			
		||||
since they are more sensitive to the major faults and the power
 | 
			
		||||
consumption. Major faults can cause "janks" (slow UI renderings) and
 | 
			
		||||
negatively impact user experience. The selection between anon and file
 | 
			
		||||
types has been suboptimal because direct comparisons between them are
 | 
			
		||||
infeasible based on the notion of active/inactive. On phones and
 | 
			
		||||
laptops, executable pages are frequently evicted despite the fact that
 | 
			
		||||
there are many less recently used anon pages. Conversely, on
 | 
			
		||||
workstations building large projects, anon pages are occasionally
 | 
			
		||||
swapped out while page cache contains many less recently used pages.
 | 
			
		||||
 | 
			
		||||
Fundamentally, the notion of active/inactive has very limited ability
 | 
			
		||||
to measure temporal locality.
 | 
			
		||||
 | 
			
		||||
Incremental scans via rmap
 | 
			
		||||
--------------------------
 | 
			
		||||
Each incremental scan picks up at where the last scan left off and
 | 
			
		||||
stops after it has found a handful of unreferenced pages. For
 | 
			
		||||
workloads using a large amount of anon memory, incremental scans lose
 | 
			
		||||
the advantage under sustained memory pressure due to high ratios of
 | 
			
		||||
the number of scanned pages to the number of reclaimed pages. On top
 | 
			
		||||
of this, the rmap has complex data structures. And the combined
 | 
			
		||||
effects typically result in a high amount of CPU usage in the reclaim
 | 
			
		||||
path.
 | 
			
		||||
 | 
			
		||||
Simply put, incremental scans via rmap have no regard for spatial
 | 
			
		||||
locality.
 | 
			
		||||
 | 
			
		||||
Solutions
 | 
			
		||||
=========
 | 
			
		||||
Notion of generation numbers
 | 
			
		||||
----------------------------
 | 
			
		||||
The notion of generation numbers introduces a temporal dimension. Each
 | 
			
		||||
generation is a dot on the timeline and it includes all pages that
 | 
			
		||||
have been referenced since it was created.
 | 
			
		||||
 | 
			
		||||
Given an lruvec, scans of anon and file types and selections between
 | 
			
		||||
them are all based on direct comparisons of generation numbers, which
 | 
			
		||||
are simple and yet effective.
 | 
			
		||||
 | 
			
		||||
A larger number of pages can be spread out across a configurable
 | 
			
		||||
number of generations, which are associated with timestamps and
 | 
			
		||||
therefore aggregatable. This is specifically designed for data centers
 | 
			
		||||
that require working set estimation and proactive reclaim.
 | 
			
		||||
 | 
			
		||||
Differential scans via page tables
 | 
			
		||||
----------------------------------
 | 
			
		||||
Each differential scan discovers all pages that have been referenced
 | 
			
		||||
since the last scan. It walks the mm_struct list associated with an
 | 
			
		||||
lruvec to scan page tables of processes that have been scheduled since
 | 
			
		||||
the last scan. The cost of each differential scan is roughly
 | 
			
		||||
proportional to the number of referenced pages it discovers. Page
 | 
			
		||||
tables usually have good memory locality. The end result is generally
 | 
			
		||||
a significant reduction in CPU usage, for workloads using a large
 | 
			
		||||
amount of anon memory.
 | 
			
		||||
 | 
			
		||||
For workloads that have extremely sparse page tables, it is still
 | 
			
		||||
possible to fall back to incremental scans via rmap.
 | 
			
		||||
 | 
			
		||||
Framework
 | 
			
		||||
=========
 | 
			
		||||
For each lruvec, evictable pages are divided into multiple
 | 
			
		||||
generations. The youngest generation number is stored in
 | 
			
		||||
lrugen->max_seq for both anon and file types as they are aged on an
 | 
			
		||||
equal footing. The oldest generation numbers are stored in
 | 
			
		||||
lrugen->min_seq[2] separately for anon and file types as clean file
 | 
			
		||||
pages can be evicted regardless of may_swap or may_writepage. These
 | 
			
		||||
three variables are monotonically increasing. Generation numbers are
 | 
			
		||||
truncated into order_base_2(MAX_NR_GENS+1) bits in order to fit into
 | 
			
		||||
page->flags. The sliding window technique is used to prevent truncated
 | 
			
		||||
generation numbers from overlapping. Each truncated generation number
 | 
			
		||||
is an index to
 | 
			
		||||
lrugen->lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]. Evictable
 | 
			
		||||
pages are added to the per-zone lists indexed by lrugen->max_seq or
 | 
			
		||||
lrugen->min_seq[2] (modulo MAX_NR_GENS), depending on their types.
 | 
			
		||||
 | 
			
		||||
Each generation is then divided into multiple tiers. Tiers represent
 | 
			
		||||
levels of usage from file descriptors only. Pages accessed N times via
 | 
			
		||||
file descriptors belong to tier order_base_2(N). Each generation
 | 
			
		||||
contains at most MAX_NR_TIERS tiers, and they require additional
 | 
			
		||||
MAX_NR_TIERS-2 bits in page->flags. In contrast to moving across
 | 
			
		||||
generations which requires the lru lock for the list operations,
 | 
			
		||||
moving across tiers only involves an atomic operation on page->flags
 | 
			
		||||
and therefore has a negligible cost. A feedback loop modeled after the
 | 
			
		||||
PID controller monitors the refault rates across all tiers and decides
 | 
			
		||||
when to activate pages from which tiers in the reclaim path.
 | 
			
		||||
 | 
			
		||||
The framework comprises two conceptually independent components: the
 | 
			
		||||
aging and the eviction, which can be invoked separately from user
 | 
			
		||||
space for the purpose of working set estimation and proactive reclaim.
 | 
			
		||||
 | 
			
		||||
Aging
 | 
			
		||||
-----
 | 
			
		||||
The aging produces young generations. Given an lruvec, the aging scans
 | 
			
		||||
page tables for referenced pages of this lruvec. Upon finding one, the
 | 
			
		||||
aging updates its generation number to max_seq. After each round of
 | 
			
		||||
scan, the aging increments max_seq.
 | 
			
		||||
 | 
			
		||||
The aging maintains either a system-wide mm_struct list or per-memcg
 | 
			
		||||
mm_struct lists, and it only scans page tables of processes that have
 | 
			
		||||
been scheduled since the last scan.
 | 
			
		||||
 | 
			
		||||
The aging is due when both of min_seq[2] reaches max_seq-1, assuming
 | 
			
		||||
both anon and file types are reclaimable.
 | 
			
		||||
 | 
			
		||||
Eviction
 | 
			
		||||
--------
 | 
			
		||||
The eviction consumes old generations. Given an lruvec, the eviction
 | 
			
		||||
scans the pages on the per-zone lists indexed by either of min_seq[2].
 | 
			
		||||
It first tries to select a type based on the values of min_seq[2].
 | 
			
		||||
When anon and file types are both available from the same generation,
 | 
			
		||||
it selects the one that has a lower refault rate.
 | 
			
		||||
 | 
			
		||||
During a scan, the eviction sorts pages according to their new
 | 
			
		||||
generation numbers, if the aging has found them referenced. It also
 | 
			
		||||
moves pages from the tiers that have higher refault rates than tier 0
 | 
			
		||||
to the next generation.
 | 
			
		||||
 | 
			
		||||
When it finds all the per-zone lists of a selected type are empty, the
 | 
			
		||||
eviction increments min_seq[2] indexed by this selected type.
 | 
			
		||||
 | 
			
		||||
Use cases
 | 
			
		||||
=========
 | 
			
		||||
High anon workloads
 | 
			
		||||
-------------------
 | 
			
		||||
Our real-world benchmark that browses popular websites in multiple
 | 
			
		||||
Chrome tabs demonstrates 51% less CPU usage from kswapd and 52% (full)
 | 
			
		||||
less PSI.
 | 
			
		||||
 | 
			
		||||
Without this patchset, the profile of kswapd looks like:
 | 
			
		||||
  31.03%  page_vma_mapped_walk
 | 
			
		||||
  25.59%  lzo1x_1_do_compress
 | 
			
		||||
   4.63%  do_raw_spin_lock
 | 
			
		||||
   3.89%  vma_interval_tree_iter_next
 | 
			
		||||
   3.33%  vma_interval_tree_subtree_search
 | 
			
		||||
 | 
			
		||||
With this patchset, it looks like:
 | 
			
		||||
  49.36%  lzo1x_1_do_compress
 | 
			
		||||
   4.54%  page_vma_mapped_walk
 | 
			
		||||
   4.45%  memset_erms
 | 
			
		||||
   3.47%  walk_pte_range
 | 
			
		||||
   2.88%  zram_bvec_rw
 | 
			
		||||
 | 
			
		||||
In addition, direct reclaim latency is reduced by 22% at 99th
 | 
			
		||||
percentile and the number of refaults is reduced by 7%. Both metrics
 | 
			
		||||
are important to phones and laptops as they are highly correlated to
 | 
			
		||||
user experience.
 | 
			
		||||
 | 
			
		||||
High page cache workloads
 | 
			
		||||
-------------------------
 | 
			
		||||
Tiers are specifically designed to improve the performance of page
 | 
			
		||||
cache under memory pressure. The fio/io_uring benchmark shows 14%
 | 
			
		||||
increase in IOPS when randomly accessing in buffered I/O mode.
 | 
			
		||||
 | 
			
		||||
Without this patchset, the profile of fio/io_uring looks like:
 | 
			
		||||
  Children  Self   Symbol
 | 
			
		||||
  -----------------------------------
 | 
			
		||||
  12.03%    0.03%  __page_cache_alloc
 | 
			
		||||
   6.53%    0.83%  shrink_active_list
 | 
			
		||||
   2.53%    0.44%  mark_page_accessed
 | 
			
		||||
 | 
			
		||||
With this patchset, it looks like:
 | 
			
		||||
  Children  Self   Symbol
 | 
			
		||||
  -----------------------------------
 | 
			
		||||
  9.45%     0.03%  __page_cache_alloc
 | 
			
		||||
  0.52%     0.46%  mark_page_accessed
 | 
			
		||||
 | 
			
		||||
Working set estimation
 | 
			
		||||
----------------------
 | 
			
		||||
User space can invoke the aging by writing "+ memcg_id node_id gen
 | 
			
		||||
[swappiness]" to /sys/kernel/debug/lru_gen. This debugfs interface
 | 
			
		||||
also provides the birth time and the size of each generation.
 | 
			
		||||
 | 
			
		||||
For example, given a pool of machines, a job scheduler periodically
 | 
			
		||||
invokes the aging to estimate the working set of each machine. And it
 | 
			
		||||
ranks the machines based on the sizes of their working sets and
 | 
			
		||||
selects the most ideal ones to land new jobs.
 | 
			
		||||
 | 
			
		||||
Proactive reclaim
 | 
			
		||||
-----------------
 | 
			
		||||
User space can invoke the eviction by writing "- memcg_id node_id gen
 | 
			
		||||
[swappiness] [nr_to_reclaim]" to /sys/kernel/debug/lru_gen. Multiple
 | 
			
		||||
command lines are supported, so does concatenation with delimiters.
 | 
			
		||||
 | 
			
		||||
For example, a job scheduler can invoke the eviction if it anticipates
 | 
			
		||||
new jobs. The savings from proactive reclaim may provide certain SLA
 | 
			
		||||
when new jobs actually land.
 | 
			
		||||
 | 
			
		||||
Yu Zhao (14):
 | 
			
		||||
  include/linux/memcontrol.h: do not warn in page_memcg_rcu() if
 | 
			
		||||
    !CONFIG_MEMCG
 | 
			
		||||
  include/linux/nodemask.h: define next_memory_node() if !CONFIG_NUMA
 | 
			
		||||
  include/linux/cgroup.h: export cgroup_mutex
 | 
			
		||||
  mm, x86: support the access bit on non-leaf PMD entries
 | 
			
		||||
  mm/vmscan.c: refactor shrink_node()
 | 
			
		||||
  mm/workingset.c: refactor pack_shadow() and unpack_shadow()
 | 
			
		||||
  mm: multigenerational lru: groundwork
 | 
			
		||||
  mm: multigenerational lru: activation
 | 
			
		||||
  mm: multigenerational lru: mm_struct list
 | 
			
		||||
  mm: multigenerational lru: aging
 | 
			
		||||
  mm: multigenerational lru: eviction
 | 
			
		||||
  mm: multigenerational lru: user interface
 | 
			
		||||
  mm: multigenerational lru: Kconfig
 | 
			
		||||
  mm: multigenerational lru: documentation
 | 
			
		||||
 | 
			
		||||
 Documentation/vm/index.rst        |    1 +
 | 
			
		||||
 Documentation/vm/multigen_lru.rst |  143 ++
 | 
			
		||||
 arch/Kconfig                      |    9 +
 | 
			
		||||
 arch/x86/Kconfig                  |    1 +
 | 
			
		||||
 arch/x86/include/asm/pgtable.h    |    2 +-
 | 
			
		||||
 arch/x86/mm/pgtable.c             |    5 +-
 | 
			
		||||
 fs/exec.c                         |    2 +
 | 
			
		||||
 fs/fuse/dev.c                     |    3 +-
 | 
			
		||||
 include/linux/cgroup.h            |   15 +-
 | 
			
		||||
 include/linux/memcontrol.h        |    7 +-
 | 
			
		||||
 include/linux/mm.h                |    2 +
 | 
			
		||||
 include/linux/mm_inline.h         |  234 +++
 | 
			
		||||
 include/linux/mm_types.h          |  107 ++
 | 
			
		||||
 include/linux/mmzone.h            |  117 ++
 | 
			
		||||
 include/linux/nodemask.h          |    1 +
 | 
			
		||||
 include/linux/page-flags-layout.h |   19 +-
 | 
			
		||||
 include/linux/page-flags.h        |    4 +-
 | 
			
		||||
 include/linux/pgtable.h           |    4 +-
 | 
			
		||||
 include/linux/swap.h              |    4 +-
 | 
			
		||||
 kernel/bounds.c                   |    6 +
 | 
			
		||||
 kernel/events/uprobes.c           |    2 +-
 | 
			
		||||
 kernel/exit.c                     |    1 +
 | 
			
		||||
 kernel/fork.c                     |   10 +
 | 
			
		||||
 kernel/kthread.c                  |    1 +
 | 
			
		||||
 kernel/sched/core.c               |    2 +
 | 
			
		||||
 mm/Kconfig                        |   58 +
 | 
			
		||||
 mm/huge_memory.c                  |    5 +-
 | 
			
		||||
 mm/khugepaged.c                   |    2 +-
 | 
			
		||||
 mm/memcontrol.c                   |   28 +
 | 
			
		||||
 mm/memory.c                       |   10 +-
 | 
			
		||||
 mm/migrate.c                      |    2 +-
 | 
			
		||||
 mm/mm_init.c                      |    6 +-
 | 
			
		||||
 mm/mmzone.c                       |    2 +
 | 
			
		||||
 mm/rmap.c                         |    6 +
 | 
			
		||||
 mm/swap.c                         |   22 +-
 | 
			
		||||
 mm/swapfile.c                     |    6 +-
 | 
			
		||||
 mm/userfaultfd.c                  |    2 +-
 | 
			
		||||
 mm/vmscan.c                       | 2638 ++++++++++++++++++++++++++++-
 | 
			
		||||
 mm/workingset.c                   |  169 +-
 | 
			
		||||
 39 files changed, 3498 insertions(+), 160 deletions(-)
 | 
			
		||||
 create mode 100644 Documentation/vm/multigen_lru.rst
 | 
			
		||||
 | 
			
		||||
-- 
 | 
			
		||||
2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,146 @@
 | 
			
		||||
From mboxrd@z Thu Jan  1 00:00:00 1970
 | 
			
		||||
Return-Path: <linux-kernel-owner@kernel.org>
 | 
			
		||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
 | 
			
		||||
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
 | 
			
		||||
X-Spam-Level: 
 | 
			
		||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
 | 
			
		||||
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
 | 
			
		||||
	INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
 | 
			
		||||
	USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=ham autolearn_force=no
 | 
			
		||||
	version=3.4.0
 | 
			
		||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
 | 
			
		||||
	by smtp.lore.kernel.org (Postfix) with ESMTP id D67E4C433B4
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:09 +0000 (UTC)
 | 
			
		||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
 | 
			
		||||
	by mail.kernel.org (Postfix) with ESMTP id BF3A360E0B
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:09 +0000 (UTC)
 | 
			
		||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 | 
			
		||||
        id S230431AbhETGz2 (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:28 -0400
 | 
			
		||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37864 "EHLO
 | 
			
		||||
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 | 
			
		||||
        with ESMTP id S229534AbhETGzY (ORCPT
 | 
			
		||||
        <rfc822;linux-kernel@vger.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:24 -0400
 | 
			
		||||
Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a])
 | 
			
		||||
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id DD592C061574
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:02 -0700 (PDT)
 | 
			
		||||
Received: by mail-yb1-xb4a.google.com with SMTP id h67-20020a25d0460000b0290517e5f14ba4so2155434ybg.18
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:02 -0700 (PDT)
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=google.com; s=20161025;
 | 
			
		||||
        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
 | 
			
		||||
         :cc;
 | 
			
		||||
        bh=rcGZDNyJ6vYh3nv1NbyctjSVuVLx0bLzs+7ceU1fwm4=;
 | 
			
		||||
        b=W+uE07dbkPRm0EI7rt0odOA402xB5GM5xsOKzyKKjPQdnq9FzvuhBH2EmYJx+e3w2P
 | 
			
		||||
         M+GjA/Y/0N577Zt0vRn1fv9k1GS93aX/OLI3asM1EluD+bF6m15Qua90BDPhuN6RLdFt
 | 
			
		||||
         9XaT7ugKFU1Zb0CN5pODFmCE1L4eWk8Idy1/MbWRtRICoOacDrCOBD3XXG+gene95EAz
 | 
			
		||||
         h6RenUUXrHuOEIq+2ZT1q6P10VKHqSaPsyoiUDDSBllpMLW3kYmkOWBQGnRaPndswvZ6
 | 
			
		||||
         VxYMBaR/6WNfgBuQGLp6vrXdw55euSCrNkjy2sf+vVpzlTPTbCCa8UgSnsOUvdDUidvY
 | 
			
		||||
         K/+A==
 | 
			
		||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=1e100.net; s=20161025;
 | 
			
		||||
        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
 | 
			
		||||
         :references:subject:from:to:cc;
 | 
			
		||||
        bh=rcGZDNyJ6vYh3nv1NbyctjSVuVLx0bLzs+7ceU1fwm4=;
 | 
			
		||||
        b=jzFekar0HP/0GPy5CL4U72iLLcQoeWxnnbnFRr4z4BTc2/hrpionzQD3GXHT9lyHMR
 | 
			
		||||
         /YmLr75qJXGcWb0dXLEPIxzZ7UjTYADt32Jy07ZfWITJ2+jRh2k1W3Wgty9YjwFX9wmd
 | 
			
		||||
         /MWxRyZj/674SBQZu12SYNlqqqh/WZxiziHwdgJEnbvX2lzsMWl2I1dL0e+tJwPr9OlS
 | 
			
		||||
         sG5TsZvw0fciDahseafGyx4m1dmrCykPWBTkpCu+BJmF7Bt4PV/ogCZO0TIxn9ezbYr0
 | 
			
		||||
         AkpMnHQnddjXudvfygGW20ymE3ieIKJU8hRbuF0+DfL6jdFdbczkyxRLk2dp6uiavglX
 | 
			
		||||
         Eajg==
 | 
			
		||||
X-Gm-Message-State: AOAM533a36jLBDzDifmN467aiI0KlSL/85xe4UvoMJPjnzoeRbTl9eck
 | 
			
		||||
        Ndk0rslscVb76VSHIoD/Mmkup4p5pgk=
 | 
			
		||||
X-Google-Smtp-Source: ABdhPJxDZYdk2SRkfTUl/Zu3nhhCt+1mIXIL72HTmroHynNzAjrSh46FnvoimDPO7nUIugXPbacDy3rCHfQ=
 | 
			
		||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
 | 
			
		||||
 (user=yuzhao job=sendgmr) by 2002:a5b:b92:: with SMTP id l18mr5146859ybq.414.1621493641980;
 | 
			
		||||
 Wed, 19 May 2021 23:54:01 -0700 (PDT)
 | 
			
		||||
Date:   Thu, 20 May 2021 00:53:42 -0600
 | 
			
		||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
Message-Id: <20210520065355.2736558-2-yuzhao@google.com>
 | 
			
		||||
Mime-Version: 1.0
 | 
			
		||||
References: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
Subject: [PATCH v3 01/14] include/linux/memcontrol.h: do not warn in
 | 
			
		||||
 page_memcg_rcu() if !CONFIG_MEMCG
 | 
			
		||||
From:   Yu Zhao <yuzhao@google.com>
 | 
			
		||||
To:     linux-mm@kvack.org
 | 
			
		||||
Cc:     Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
 | 
			
		||||
        Andrew Morton <akpm@linux-foundation.org>,
 | 
			
		||||
        Dave Chinner <david@fromorbit.com>,
 | 
			
		||||
        Dave Hansen <dave.hansen@linux.intel.com>,
 | 
			
		||||
        Donald Carr <sirspudd@gmail.com>,
 | 
			
		||||
        Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
 | 
			
		||||
        Johannes Weiner <hannes@cmpxchg.org>,
 | 
			
		||||
        Jonathan Corbet <corbet@lwn.net>,
 | 
			
		||||
        Joonsoo Kim <iamjoonsoo.kim@lge.com>,
 | 
			
		||||
        Konstantin Kharlamov <hi-angel@yandex.ru>,
 | 
			
		||||
        Marcus Seyfarth <m.seyfarth@gmail.com>,
 | 
			
		||||
        Matthew Wilcox <willy@infradead.org>,
 | 
			
		||||
        Mel Gorman <mgorman@suse.de>,
 | 
			
		||||
        Miaohe Lin <linmiaohe@huawei.com>,
 | 
			
		||||
        Michael Larabel <michael@michaellarabel.com>,
 | 
			
		||||
        Michal Hocko <mhocko@suse.com>,
 | 
			
		||||
        Michel Lespinasse <michel@lespinasse.org>,
 | 
			
		||||
        Rik van Riel <riel@surriel.com>,
 | 
			
		||||
        Roman Gushchin <guro@fb.com>,
 | 
			
		||||
        Tim Chen <tim.c.chen@linux.intel.com>,
 | 
			
		||||
        Vlastimil Babka <vbabka@suse.cz>,
 | 
			
		||||
        Yang Shi <shy828301@gmail.com>,
 | 
			
		||||
        Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
 | 
			
		||||
        linux-kernel@vger.kernel.org, lkp@lists.01.org,
 | 
			
		||||
        page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
 | 
			
		||||
        Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
Content-Type: text/plain; charset="UTF-8"
 | 
			
		||||
Precedence: bulk
 | 
			
		||||
List-ID: <linux-kernel.vger.kernel.org>
 | 
			
		||||
X-Mailing-List: linux-kernel@vger.kernel.org
 | 
			
		||||
List-Archive: <https://lore.kernel.org/lkml/>
 | 
			
		||||
 | 
			
		||||
page_memcg_rcu() warns on !rcu_read_lock_held() regardless of
 | 
			
		||||
CONFIG_MEMCG. The following legit code trips the warning when
 | 
			
		||||
!CONFIG_MEMCG, since lock_page_memcg() and unlock_page_memcg() are
 | 
			
		||||
empty for this config.
 | 
			
		||||
 | 
			
		||||
  memcg = lock_page_memcg(page1)
 | 
			
		||||
    (rcu_read_lock() if CONFIG_MEMCG=y)
 | 
			
		||||
 | 
			
		||||
  do something to page1
 | 
			
		||||
 | 
			
		||||
  if (page_memcg_rcu(page2) == memcg)
 | 
			
		||||
    do something to page2 too as it cannot be migrated away from the
 | 
			
		||||
    memcg either.
 | 
			
		||||
 | 
			
		||||
  unlock_page_memcg(page1)
 | 
			
		||||
    (rcu_read_unlock() if CONFIG_MEMCG=y)
 | 
			
		||||
 | 
			
		||||
Locking/unlocking rcu consistently for both configs is rigorous but it
 | 
			
		||||
also forces unnecessary locking upon users who have no interest in
 | 
			
		||||
CONFIG_MEMCG.
 | 
			
		||||
 | 
			
		||||
This patch removes the assertion for !CONFIG_MEMCG, because
 | 
			
		||||
page_memcg_rcu() has a few callers and there are no concerns regarding
 | 
			
		||||
their correctness at the moment.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
 | 
			
		||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
---
 | 
			
		||||
 include/linux/memcontrol.h | 1 -
 | 
			
		||||
 1 file changed, 1 deletion(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
 | 
			
		||||
index c193be760709..6bcac3d91dd1 100644
 | 
			
		||||
--- a/include/linux/memcontrol.h
 | 
			
		||||
+++ b/include/linux/memcontrol.h
 | 
			
		||||
@@ -1131,7 +1131,6 @@ static inline struct mem_cgroup *page_memcg(struct page *page)
 | 
			
		||||
 
 | 
			
		||||
 static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
 | 
			
		||||
 {
 | 
			
		||||
-	WARN_ON_ONCE(!rcu_read_lock_held());
 | 
			
		||||
 	return NULL;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
-- 
 | 
			
		||||
2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,124 @@
 | 
			
		||||
From mboxrd@z Thu Jan  1 00:00:00 1970
 | 
			
		||||
Return-Path: <linux-kernel-owner@kernel.org>
 | 
			
		||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
 | 
			
		||||
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
 | 
			
		||||
X-Spam-Level: 
 | 
			
		||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
 | 
			
		||||
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
 | 
			
		||||
	INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
 | 
			
		||||
	USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
 | 
			
		||||
	version=3.4.0
 | 
			
		||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
 | 
			
		||||
	by smtp.lore.kernel.org (Postfix) with ESMTP id 4FED7C433B4
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:14 +0000 (UTC)
 | 
			
		||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
 | 
			
		||||
	by mail.kernel.org (Postfix) with ESMTP id 33A1260E0B
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:14 +0000 (UTC)
 | 
			
		||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 | 
			
		||||
        id S229681AbhETGzd (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:33 -0400
 | 
			
		||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37868 "EHLO
 | 
			
		||||
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 | 
			
		||||
        with ESMTP id S230365AbhETGzZ (ORCPT
 | 
			
		||||
        <rfc822;linux-kernel@vger.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:25 -0400
 | 
			
		||||
Received: from mail-qv1-xf4a.google.com (mail-qv1-xf4a.google.com [IPv6:2607:f8b0:4864:20::f4a])
 | 
			
		||||
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6DFF8C061574
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:04 -0700 (PDT)
 | 
			
		||||
Received: by mail-qv1-xf4a.google.com with SMTP id e15-20020a0caa4f0000b02901eedbb09299so10877261qvb.15
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:04 -0700 (PDT)
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=google.com; s=20161025;
 | 
			
		||||
        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
 | 
			
		||||
         :cc;
 | 
			
		||||
        bh=Vxs+IsfkjudRR9AMrF0VuRYwRpXVOuThAM5t2BO4Wtw=;
 | 
			
		||||
        b=gEeAhMTYbAQslSO01e0o8YIym/dFnsQfB9lZrmyFhl8uqTqmQEDIIu/28e1BOlyvt9
 | 
			
		||||
         rJm5/8Caqo1KIaunpdBy2LPtOXmfi9ZJDnuwRnb21JoByNrFkCChT4Z5xyeBqut1yHQm
 | 
			
		||||
         /TlMDm6OPoewZgMjaOhRgLuU1i+Q2viLaBK5TcX/f4jp7CkEtCTn5SioWFrXLpHFPgfg
 | 
			
		||||
         kYO7g2IN+CR6iy3EfEzmDy81m8wakeRxZZOx4HjJ7gGFFDfSfK4SyZnaOFS2lmsp1BrE
 | 
			
		||||
         F/LSnBPYHDzzGJCEqa0RfGIu9OFnYG7fyBb1AzMWt5UOwD1Z5Gw0p3PCAJBE1ykqsTQ7
 | 
			
		||||
         ooLQ==
 | 
			
		||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=1e100.net; s=20161025;
 | 
			
		||||
        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
 | 
			
		||||
         :references:subject:from:to:cc;
 | 
			
		||||
        bh=Vxs+IsfkjudRR9AMrF0VuRYwRpXVOuThAM5t2BO4Wtw=;
 | 
			
		||||
        b=WqdLL8JjD4oD11umCae6nAxvQ9zl0qjRO1psbjN4FhRRkc/CQM+QJ7TCbayd5VCaUu
 | 
			
		||||
         rtoZfhuozKEHvBBclv4jQzorRkiOVjwZGIX52HwBCvcXpPAQF69jkqG0WRjQKrUK9laH
 | 
			
		||||
         TcyQDkdTbJztqqLLgoaMke0Pvw3qiJvr4a19wbHUQt+uwR4UXoL2NTQBotaVS1/Ji2zn
 | 
			
		||||
         MhoxUD+f8pgFoCCCn/G5DgDvOIOCN2Ed8dcKb+fMyQn2Lhjc9xLUnlaGN6WdKwBpOq9Z
 | 
			
		||||
         AFd/Ld6gumYWjtW7nBuum/ysfqbF5Au47sGCuLngPSTq/x4OIApMHQLjOikk1jB9ydlN
 | 
			
		||||
         GRzA==
 | 
			
		||||
X-Gm-Message-State: AOAM5302cFNM+/KQLI85uCrC18olBzO35TXBlTOBABeaNnESgoIvYml8
 | 
			
		||||
        1LlliNnJr3ok2uXDGXaBzjDPiIU7ddM=
 | 
			
		||||
X-Google-Smtp-Source: ABdhPJzxHNbGXEeZppZputW4eyMKgrpzCcHkInnC6cFjCLcGtMdRmVBNnbFHvD2nDRL//K+y6INSV9VSvtY=
 | 
			
		||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
 | 
			
		||||
 (user=yuzhao job=sendgmr) by 2002:ad4:4184:: with SMTP id e4mr4097411qvp.13.1621493643517;
 | 
			
		||||
 Wed, 19 May 2021 23:54:03 -0700 (PDT)
 | 
			
		||||
Date:   Thu, 20 May 2021 00:53:43 -0600
 | 
			
		||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
Message-Id: <20210520065355.2736558-3-yuzhao@google.com>
 | 
			
		||||
Mime-Version: 1.0
 | 
			
		||||
References: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
Subject: [PATCH v3 02/14] include/linux/nodemask.h: define next_memory_node()
 | 
			
		||||
 if !CONFIG_NUMA
 | 
			
		||||
From:   Yu Zhao <yuzhao@google.com>
 | 
			
		||||
To:     linux-mm@kvack.org
 | 
			
		||||
Cc:     Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
 | 
			
		||||
        Andrew Morton <akpm@linux-foundation.org>,
 | 
			
		||||
        Dave Chinner <david@fromorbit.com>,
 | 
			
		||||
        Dave Hansen <dave.hansen@linux.intel.com>,
 | 
			
		||||
        Donald Carr <sirspudd@gmail.com>,
 | 
			
		||||
        Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
 | 
			
		||||
        Johannes Weiner <hannes@cmpxchg.org>,
 | 
			
		||||
        Jonathan Corbet <corbet@lwn.net>,
 | 
			
		||||
        Joonsoo Kim <iamjoonsoo.kim@lge.com>,
 | 
			
		||||
        Konstantin Kharlamov <hi-angel@yandex.ru>,
 | 
			
		||||
        Marcus Seyfarth <m.seyfarth@gmail.com>,
 | 
			
		||||
        Matthew Wilcox <willy@infradead.org>,
 | 
			
		||||
        Mel Gorman <mgorman@suse.de>,
 | 
			
		||||
        Miaohe Lin <linmiaohe@huawei.com>,
 | 
			
		||||
        Michael Larabel <michael@michaellarabel.com>,
 | 
			
		||||
        Michal Hocko <mhocko@suse.com>,
 | 
			
		||||
        Michel Lespinasse <michel@lespinasse.org>,
 | 
			
		||||
        Rik van Riel <riel@surriel.com>,
 | 
			
		||||
        Roman Gushchin <guro@fb.com>,
 | 
			
		||||
        Tim Chen <tim.c.chen@linux.intel.com>,
 | 
			
		||||
        Vlastimil Babka <vbabka@suse.cz>,
 | 
			
		||||
        Yang Shi <shy828301@gmail.com>,
 | 
			
		||||
        Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
 | 
			
		||||
        linux-kernel@vger.kernel.org, lkp@lists.01.org,
 | 
			
		||||
        page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
 | 
			
		||||
        Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
Content-Type: text/plain; charset="UTF-8"
 | 
			
		||||
Precedence: bulk
 | 
			
		||||
List-ID: <linux-kernel.vger.kernel.org>
 | 
			
		||||
X-Mailing-List: linux-kernel@vger.kernel.org
 | 
			
		||||
List-Archive: <https://lore.kernel.org/lkml/>
 | 
			
		||||
 | 
			
		||||
Currently next_memory_node only exists when CONFIG_NUMA=y. This patch
 | 
			
		||||
adds the macro for !CONFIG_NUMA.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
 | 
			
		||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
---
 | 
			
		||||
 include/linux/nodemask.h | 1 +
 | 
			
		||||
 1 file changed, 1 insertion(+)
 | 
			
		||||
 | 
			
		||||
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
 | 
			
		||||
index ac398e143c9a..89fe4e3592f9 100644
 | 
			
		||||
--- a/include/linux/nodemask.h
 | 
			
		||||
+++ b/include/linux/nodemask.h
 | 
			
		||||
@@ -486,6 +486,7 @@ static inline int num_node_state(enum node_states state)
 | 
			
		||||
 #define first_online_node	0
 | 
			
		||||
 #define first_memory_node	0
 | 
			
		||||
 #define next_online_node(nid)	(MAX_NUMNODES)
 | 
			
		||||
+#define next_memory_node(nid)	(MAX_NUMNODES)
 | 
			
		||||
 #define nr_node_ids		1U
 | 
			
		||||
 #define nr_online_nodes		1U
 | 
			
		||||
 
 | 
			
		||||
-- 
 | 
			
		||||
2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,150 @@
 | 
			
		||||
From mboxrd@z Thu Jan  1 00:00:00 1970
 | 
			
		||||
Return-Path: <linux-kernel-owner@kernel.org>
 | 
			
		||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
 | 
			
		||||
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
 | 
			
		||||
X-Spam-Level: 
 | 
			
		||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
 | 
			
		||||
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
 | 
			
		||||
	INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
 | 
			
		||||
	USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
 | 
			
		||||
	version=3.4.0
 | 
			
		||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
 | 
			
		||||
	by smtp.lore.kernel.org (Postfix) with ESMTP id 5894CC43460
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:18 +0000 (UTC)
 | 
			
		||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
 | 
			
		||||
	by mail.kernel.org (Postfix) with ESMTP id 3DF1B61355
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:18 +0000 (UTC)
 | 
			
		||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 | 
			
		||||
        id S230492AbhETGzi (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:38 -0400
 | 
			
		||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37880 "EHLO
 | 
			
		||||
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 | 
			
		||||
        with ESMTP id S230430AbhETGz2 (ORCPT
 | 
			
		||||
        <rfc822;linux-kernel@vger.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:28 -0400
 | 
			
		||||
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
 | 
			
		||||
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id F2439C061761
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:05 -0700 (PDT)
 | 
			
		||||
Received: by mail-yb1-xb49.google.com with SMTP id p138-20020a2542900000b029051304a381d9so9042617yba.20
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:05 -0700 (PDT)
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=google.com; s=20161025;
 | 
			
		||||
        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
 | 
			
		||||
         :cc;
 | 
			
		||||
        bh=u+gVIGEYIPA/2ZnKQ/jn+syjhmo8lEqvUYZxU0dhdmw=;
 | 
			
		||||
        b=ilLDWOeIOTKbkZAyTcceDtOpp2Z6oTFrQhsVyqk7X5l9/6+8NJYYI+dDrFdNy8GHPn
 | 
			
		||||
         TXNwPsD+oRKvYdGx4axZhlkFzOkdcr+xYDHDYwdfV6GJubHW0qUcVuNhCtoKbC5rA7Rc
 | 
			
		||||
         HLlOQqtRJZ/ivTzUig8CQccV040hHCbuz35dLgXbD1dVokwc1cOuKZaTLQpYVLsUP3Bu
 | 
			
		||||
         MGJAAygLFemJO4Lj2rtnjvJG8CDZr9Z0uZhqKEqHkyenPQKZNhlA4Evgi1wYHSSLSqnJ
 | 
			
		||||
         48ySo0abwH067PuNMNMETfFX32LpXeIda/dgmAGMAOCqUYbqyCKHzmjDuutRjkgtmoG5
 | 
			
		||||
         3meg==
 | 
			
		||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=1e100.net; s=20161025;
 | 
			
		||||
        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
 | 
			
		||||
         :references:subject:from:to:cc;
 | 
			
		||||
        bh=u+gVIGEYIPA/2ZnKQ/jn+syjhmo8lEqvUYZxU0dhdmw=;
 | 
			
		||||
        b=kGPHggyxo0QzLlCv4G7r06Qb9zfq6cYJu4n3UotivJGpIGltx02xhr3dmw5myvMMTB
 | 
			
		||||
         +ZbHFgrPBlvHRFOzV44X/BNunb0sZE08NWoL7Ukl2CPGFW5C4ojk1f5ULcVpNMkwMHLM
 | 
			
		||||
         a8+V1TCPtJTZj/8tHL+9HnYHlYz/Bigq1ANURCC14mJjIXuCM5eCU6+JTuizAZzNiIIr
 | 
			
		||||
         u8eXfLqGhtBe68cHudHsJtza4h2srgcHTDTznQxwVhruHz3nU10Sni6vFJNWMxYzEfHy
 | 
			
		||||
         179XSwdgavLRIWc5CxN1biYoS2EkJnVyEf1eYcyMtFUArZKIHQtx6DsDN2wNQ2P6YXD5
 | 
			
		||||
         BUtw==
 | 
			
		||||
X-Gm-Message-State: AOAM533YpaJcNo3c5ia3Q6gdyS41kyWgd490xt0zYDyNkyhH3hi4hj2m
 | 
			
		||||
        uiBbgl6yA1VtsOKBod8/iMtZhtd4aXg=
 | 
			
		||||
X-Google-Smtp-Source: ABdhPJw2fnax4tdT5Wsay5+rPu2/CLppUyd3dEee27l274OvNNlEZeyxb+pNstV/LKb7q5/PkQmoAivSEq8=
 | 
			
		||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
 | 
			
		||||
 (user=yuzhao job=sendgmr) by 2002:a05:6902:513:: with SMTP id
 | 
			
		||||
 x19mr5279236ybs.129.1621493645039; Wed, 19 May 2021 23:54:05 -0700 (PDT)
 | 
			
		||||
Date:   Thu, 20 May 2021 00:53:44 -0600
 | 
			
		||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
Message-Id: <20210520065355.2736558-4-yuzhao@google.com>
 | 
			
		||||
Mime-Version: 1.0
 | 
			
		||||
References: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
Subject: [PATCH v3 03/14] include/linux/cgroup.h: export cgroup_mutex
 | 
			
		||||
From:   Yu Zhao <yuzhao@google.com>
 | 
			
		||||
To:     linux-mm@kvack.org
 | 
			
		||||
Cc:     Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
 | 
			
		||||
        Andrew Morton <akpm@linux-foundation.org>,
 | 
			
		||||
        Dave Chinner <david@fromorbit.com>,
 | 
			
		||||
        Dave Hansen <dave.hansen@linux.intel.com>,
 | 
			
		||||
        Donald Carr <sirspudd@gmail.com>,
 | 
			
		||||
        Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
 | 
			
		||||
        Johannes Weiner <hannes@cmpxchg.org>,
 | 
			
		||||
        Jonathan Corbet <corbet@lwn.net>,
 | 
			
		||||
        Joonsoo Kim <iamjoonsoo.kim@lge.com>,
 | 
			
		||||
        Konstantin Kharlamov <hi-angel@yandex.ru>,
 | 
			
		||||
        Marcus Seyfarth <m.seyfarth@gmail.com>,
 | 
			
		||||
        Matthew Wilcox <willy@infradead.org>,
 | 
			
		||||
        Mel Gorman <mgorman@suse.de>,
 | 
			
		||||
        Miaohe Lin <linmiaohe@huawei.com>,
 | 
			
		||||
        Michael Larabel <michael@michaellarabel.com>,
 | 
			
		||||
        Michal Hocko <mhocko@suse.com>,
 | 
			
		||||
        Michel Lespinasse <michel@lespinasse.org>,
 | 
			
		||||
        Rik van Riel <riel@surriel.com>,
 | 
			
		||||
        Roman Gushchin <guro@fb.com>,
 | 
			
		||||
        Tim Chen <tim.c.chen@linux.intel.com>,
 | 
			
		||||
        Vlastimil Babka <vbabka@suse.cz>,
 | 
			
		||||
        Yang Shi <shy828301@gmail.com>,
 | 
			
		||||
        Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
 | 
			
		||||
        linux-kernel@vger.kernel.org, lkp@lists.01.org,
 | 
			
		||||
        page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
 | 
			
		||||
        Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
Content-Type: text/plain; charset="UTF-8"
 | 
			
		||||
Precedence: bulk
 | 
			
		||||
List-ID: <linux-kernel.vger.kernel.org>
 | 
			
		||||
X-Mailing-List: linux-kernel@vger.kernel.org
 | 
			
		||||
List-Archive: <https://lore.kernel.org/lkml/>
 | 
			
		||||
 | 
			
		||||
cgroup_mutex is needed to synchronize with memcg creations.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
 | 
			
		||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
---
 | 
			
		||||
 include/linux/cgroup.h | 15 ++++++++++++++-
 | 
			
		||||
 1 file changed, 14 insertions(+), 1 deletion(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
 | 
			
		||||
index 4f2f79de083e..bd5744360cfa 100644
 | 
			
		||||
--- a/include/linux/cgroup.h
 | 
			
		||||
+++ b/include/linux/cgroup.h
 | 
			
		||||
@@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgroup *cgrp)
 | 
			
		||||
 	css_put(&cgrp->self);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+extern struct mutex cgroup_mutex;
 | 
			
		||||
+
 | 
			
		||||
+static inline void cgroup_lock(void)
 | 
			
		||||
+{
 | 
			
		||||
+	mutex_lock(&cgroup_mutex);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static inline void cgroup_unlock(void)
 | 
			
		||||
+{
 | 
			
		||||
+	mutex_unlock(&cgroup_mutex);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 /**
 | 
			
		||||
  * task_css_set_check - obtain a task's css_set with extra access conditions
 | 
			
		||||
  * @task: the task to obtain css_set for
 | 
			
		||||
@@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgroup *cgrp)
 | 
			
		||||
  * as locks used during the cgroup_subsys::attach() methods.
 | 
			
		||||
  */
 | 
			
		||||
 #ifdef CONFIG_PROVE_RCU
 | 
			
		||||
-extern struct mutex cgroup_mutex;
 | 
			
		||||
 extern spinlock_t css_set_lock;
 | 
			
		||||
 #define task_css_set_check(task, __c)					\
 | 
			
		||||
 	rcu_dereference_check((task)->cgroups,				\
 | 
			
		||||
@@ -704,6 +715,8 @@ struct cgroup;
 | 
			
		||||
 static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
 | 
			
		||||
 static inline void css_get(struct cgroup_subsys_state *css) {}
 | 
			
		||||
 static inline void css_put(struct cgroup_subsys_state *css) {}
 | 
			
		||||
+static inline void cgroup_lock(void) {}
 | 
			
		||||
+static inline void cgroup_unlock(void) {}
 | 
			
		||||
 static inline int cgroup_attach_task_all(struct task_struct *from,
 | 
			
		||||
 					 struct task_struct *t) { return 0; }
 | 
			
		||||
 static inline int cgroupstats_build(struct cgroupstats *stats,
 | 
			
		||||
-- 
 | 
			
		||||
2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,214 @@
 | 
			
		||||
From mboxrd@z Thu Jan  1 00:00:00 1970
 | 
			
		||||
Return-Path: <linux-kernel-owner@kernel.org>
 | 
			
		||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
 | 
			
		||||
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
 | 
			
		||||
X-Spam-Level: 
 | 
			
		||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
 | 
			
		||||
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
 | 
			
		||||
	INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
 | 
			
		||||
	USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
 | 
			
		||||
	version=3.4.0
 | 
			
		||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
 | 
			
		||||
	by smtp.lore.kernel.org (Postfix) with ESMTP id 3FBCEC433B4
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:18 +0000 (UTC)
 | 
			
		||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
 | 
			
		||||
	by mail.kernel.org (Postfix) with ESMTP id 2678D6108C
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:18 +0000 (UTC)
 | 
			
		||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 | 
			
		||||
        id S230467AbhETGzf (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:35 -0400
 | 
			
		||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37890 "EHLO
 | 
			
		||||
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 | 
			
		||||
        with ESMTP id S230435AbhETGz2 (ORCPT
 | 
			
		||||
        <rfc822;linux-kernel@vger.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:28 -0400
 | 
			
		||||
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
 | 
			
		||||
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id DC741C0613CE
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:07 -0700 (PDT)
 | 
			
		||||
Received: by mail-yb1-xb49.google.com with SMTP id 129-20020a2504870000b0290513326cc5e0so8674080ybe.10
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:07 -0700 (PDT)
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=google.com; s=20161025;
 | 
			
		||||
        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
 | 
			
		||||
         :cc;
 | 
			
		||||
        bh=SyTpiiLQc7KnB8Q1E8X3BxXutjquJ4KYbVlpDQgVi/g=;
 | 
			
		||||
        b=F/5vtOLfn2hWqJCcZJILEPNvAi4G3UC/HROV8n8s10GH7JhHnrGHdEho6MiIGVETaO
 | 
			
		||||
         sHn+wn+lopXgJMLEqp5WqaQ769JJNG7YB4Pq15oo9pv+HRPYGP/d500+gP+KrGyChFzI
 | 
			
		||||
         iRtkvAcNwlgumar+mpa5HZRGCb08Jm1ZBJ5134Kg6M2RP3KBMa9LpRBW+jA/uB2ZH6dY
 | 
			
		||||
         SHmfSiGBjz0MLdKbjMO0ZC+E0iCgLKKyI3liy35dgrf7U0uAsmS+Tq+vBabFfUY8cvI2
 | 
			
		||||
         9S4m0Grod6BK2vh7Cxh9tBxuiOnpUkk6GOwodZ5MXTpgU9J25Ztod8Cas20KXVuUUu5L
 | 
			
		||||
         0caQ==
 | 
			
		||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=1e100.net; s=20161025;
 | 
			
		||||
        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
 | 
			
		||||
         :references:subject:from:to:cc;
 | 
			
		||||
        bh=SyTpiiLQc7KnB8Q1E8X3BxXutjquJ4KYbVlpDQgVi/g=;
 | 
			
		||||
        b=I/lOZpe4tWCS0mala3TZsfeszZC2f6ezlF6QsZ4a/ik+ur09NYRA+x5bJGwlW5GDlq
 | 
			
		||||
         UnLRSTXE7lfK71dSWsWOnbdahNawKurDhQQHAWYazRhXZbx8wQr8tVsCQRJt62tjdH10
 | 
			
		||||
         3hySRpK88u8siMlEwsnnjOD9xPsxcVvC9q60ppd6Eg2OjbtByyWxV86qM2x45/Wp9SgS
 | 
			
		||||
         1K0/jwK6L1A8aN+ccrl6RiewC05OjfkXbKE3qp2X4jLzxo0Z5jchuk4yzUI9a9UtqjR2
 | 
			
		||||
         jluXY0tCVPqedtSHAZ1h+oHspBuclW26af/5c7EGck5IwrADYKN8hL9LfA0GmuxthLa8
 | 
			
		||||
         CT+g==
 | 
			
		||||
X-Gm-Message-State: AOAM530i7XGlRCP9jxfMmcb0QrUXw8gBhH02/rSREHScRnCCUJvVu8zh
 | 
			
		||||
        smjHRFK1+f6BrShUrxMXHYEhJlEv03M=
 | 
			
		||||
X-Google-Smtp-Source: ABdhPJxef10azDAzK7UEGotre/hx9MbP+rt8RSM5uDH+7LMog0h8qkdAugFcXq/qcLN78UEvnUxUn/nobaI=
 | 
			
		||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
 | 
			
		||||
 (user=yuzhao job=sendgmr) by 2002:a25:2009:: with SMTP id g9mr4983935ybg.198.1621493646666;
 | 
			
		||||
 Wed, 19 May 2021 23:54:06 -0700 (PDT)
 | 
			
		||||
Date:   Thu, 20 May 2021 00:53:45 -0600
 | 
			
		||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
Message-Id: <20210520065355.2736558-5-yuzhao@google.com>
 | 
			
		||||
Mime-Version: 1.0
 | 
			
		||||
References: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
Subject: [PATCH v3 04/14] mm, x86: support the access bit on non-leaf PMD entries
 | 
			
		||||
From:   Yu Zhao <yuzhao@google.com>
 | 
			
		||||
To:     linux-mm@kvack.org
 | 
			
		||||
Cc:     Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
 | 
			
		||||
        Andrew Morton <akpm@linux-foundation.org>,
 | 
			
		||||
        Dave Chinner <david@fromorbit.com>,
 | 
			
		||||
        Dave Hansen <dave.hansen@linux.intel.com>,
 | 
			
		||||
        Donald Carr <sirspudd@gmail.com>,
 | 
			
		||||
        Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
 | 
			
		||||
        Johannes Weiner <hannes@cmpxchg.org>,
 | 
			
		||||
        Jonathan Corbet <corbet@lwn.net>,
 | 
			
		||||
        Joonsoo Kim <iamjoonsoo.kim@lge.com>,
 | 
			
		||||
        Konstantin Kharlamov <hi-angel@yandex.ru>,
 | 
			
		||||
        Marcus Seyfarth <m.seyfarth@gmail.com>,
 | 
			
		||||
        Matthew Wilcox <willy@infradead.org>,
 | 
			
		||||
        Mel Gorman <mgorman@suse.de>,
 | 
			
		||||
        Miaohe Lin <linmiaohe@huawei.com>,
 | 
			
		||||
        Michael Larabel <michael@michaellarabel.com>,
 | 
			
		||||
        Michal Hocko <mhocko@suse.com>,
 | 
			
		||||
        Michel Lespinasse <michel@lespinasse.org>,
 | 
			
		||||
        Rik van Riel <riel@surriel.com>,
 | 
			
		||||
        Roman Gushchin <guro@fb.com>,
 | 
			
		||||
        Tim Chen <tim.c.chen@linux.intel.com>,
 | 
			
		||||
        Vlastimil Babka <vbabka@suse.cz>,
 | 
			
		||||
        Yang Shi <shy828301@gmail.com>,
 | 
			
		||||
        Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
 | 
			
		||||
        linux-kernel@vger.kernel.org, lkp@lists.01.org,
 | 
			
		||||
        page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
 | 
			
		||||
        Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
Content-Type: text/plain; charset="UTF-8"
 | 
			
		||||
Precedence: bulk
 | 
			
		||||
List-ID: <linux-kernel.vger.kernel.org>
 | 
			
		||||
X-Mailing-List: linux-kernel@vger.kernel.org
 | 
			
		||||
List-Archive: <https://lore.kernel.org/lkml/>
 | 
			
		||||
 | 
			
		||||
Some architectures support the accessed bit on non-leaf PMD entries
 | 
			
		||||
(parents) in addition to leaf PTE entries (children) where pages are
 | 
			
		||||
mapped, e.g., x86_64 sets the accessed bit on a parent when using it
 | 
			
		||||
as part of linear-address translation [1]. Page table walkers who are
 | 
			
		||||
interested in the accessed bit on children can take advantage of this:
 | 
			
		||||
they do not need to search the children when the accessed bit is not
 | 
			
		||||
set on a parent, given that they have previously cleared the accessed
 | 
			
		||||
bit on this parent.
 | 
			
		||||
 | 
			
		||||
[1]: Intel 64 and IA-32 Architectures Software Developer's Manual
 | 
			
		||||
     Volume 3 (October 2019), section 4.8
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
 | 
			
		||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
---
 | 
			
		||||
 arch/Kconfig                   | 9 +++++++++
 | 
			
		||||
 arch/x86/Kconfig               | 1 +
 | 
			
		||||
 arch/x86/include/asm/pgtable.h | 2 +-
 | 
			
		||||
 arch/x86/mm/pgtable.c          | 5 ++++-
 | 
			
		||||
 include/linux/pgtable.h        | 4 ++--
 | 
			
		||||
 5 files changed, 17 insertions(+), 4 deletions(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/arch/Kconfig b/arch/Kconfig
 | 
			
		||||
index c45b770d3579..e3812adc69f7 100644
 | 
			
		||||
--- a/arch/Kconfig
 | 
			
		||||
+++ b/arch/Kconfig
 | 
			
		||||
@@ -826,6 +826,15 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE
 | 
			
		||||
 config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 | 
			
		||||
 	bool
 | 
			
		||||
 
 | 
			
		||||
+config HAVE_ARCH_PARENT_PMD_YOUNG
 | 
			
		||||
+	bool
 | 
			
		||||
+	depends on PGTABLE_LEVELS > 2
 | 
			
		||||
+	help
 | 
			
		||||
+	  Architectures that select this are able to set the accessed bit on
 | 
			
		||||
+	  non-leaf PMD entries in addition to leaf PTE entries where pages are
 | 
			
		||||
+	  mapped. For them, page table walkers that clear the accessed bit may
 | 
			
		||||
+	  stop at non-leaf PMD entries if they do not see the accessed bit.
 | 
			
		||||
+
 | 
			
		||||
 config HAVE_ARCH_HUGE_VMAP
 | 
			
		||||
 	bool
 | 
			
		||||
 
 | 
			
		||||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
 | 
			
		||||
index 0045e1b44190..f619055c4537 100644
 | 
			
		||||
--- a/arch/x86/Kconfig
 | 
			
		||||
+++ b/arch/x86/Kconfig
 | 
			
		||||
@@ -170,6 +170,7 @@ config X86
 | 
			
		||||
 	select HAVE_ARCH_TRACEHOOK
 | 
			
		||||
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 | 
			
		||||
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
 | 
			
		||||
+	select HAVE_ARCH_PARENT_PMD_YOUNG	if X86_64
 | 
			
		||||
 	select HAVE_ARCH_USERFAULTFD_WP         if X86_64 && USERFAULTFD
 | 
			
		||||
 	select HAVE_ARCH_USERFAULTFD_MINOR	if X86_64 && USERFAULTFD
 | 
			
		||||
 	select HAVE_ARCH_VMAP_STACK		if X86_64
 | 
			
		||||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
 | 
			
		||||
index b1099f2d9800..3a24d2af4e9b 100644
 | 
			
		||||
--- a/arch/x86/include/asm/pgtable.h
 | 
			
		||||
+++ b/arch/x86/include/asm/pgtable.h
 | 
			
		||||
@@ -846,7 +846,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 | 
			
		||||
 
 | 
			
		||||
 static inline int pmd_bad(pmd_t pmd)
 | 
			
		||||
 {
 | 
			
		||||
-	return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
 | 
			
		||||
+	return ((pmd_flags(pmd) | _PAGE_ACCESSED) & ~_PAGE_USER) != _KERNPG_TABLE;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
 static inline unsigned long pages_to_mb(unsigned long npg)
 | 
			
		||||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
 | 
			
		||||
index d27cf69e811d..b968d6bd28b6 100644
 | 
			
		||||
--- a/arch/x86/mm/pgtable.c
 | 
			
		||||
+++ b/arch/x86/mm/pgtable.c
 | 
			
		||||
@@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
 | 
			
		||||
 	return ret;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 | 
			
		||||
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG)
 | 
			
		||||
 int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 | 
			
		||||
 			      unsigned long addr, pmd_t *pmdp)
 | 
			
		||||
 {
 | 
			
		||||
@@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 | 
			
		||||
 
 | 
			
		||||
 	return ret;
 | 
			
		||||
 }
 | 
			
		||||
+#endif
 | 
			
		||||
+
 | 
			
		||||
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 | 
			
		||||
 int pudp_test_and_clear_young(struct vm_area_struct *vma,
 | 
			
		||||
 			      unsigned long addr, pud_t *pudp)
 | 
			
		||||
 {
 | 
			
		||||
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
 | 
			
		||||
index 46b13780c2c8..94ecc1d277a2 100644
 | 
			
		||||
--- a/include/linux/pgtable.h
 | 
			
		||||
+++ b/include/linux/pgtable.h
 | 
			
		||||
@@ -193,7 +193,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 | 
			
		||||
 #endif
 | 
			
		||||
 
 | 
			
		||||
 #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
 | 
			
		||||
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 | 
			
		||||
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG)
 | 
			
		||||
 static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 | 
			
		||||
 					    unsigned long address,
 | 
			
		||||
 					    pmd_t *pmdp)
 | 
			
		||||
@@ -214,7 +214,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 | 
			
		||||
 	BUILD_BUG();
 | 
			
		||||
 	return 0;
 | 
			
		||||
 }
 | 
			
		||||
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 | 
			
		||||
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG */
 | 
			
		||||
 #endif
 | 
			
		||||
 
 | 
			
		||||
 #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 | 
			
		||||
-- 
 | 
			
		||||
2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,323 @@
 | 
			
		||||
From mboxrd@z Thu Jan  1 00:00:00 1970
 | 
			
		||||
Return-Path: <linux-kernel-owner@kernel.org>
 | 
			
		||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
 | 
			
		||||
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
 | 
			
		||||
X-Spam-Level: 
 | 
			
		||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
 | 
			
		||||
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
 | 
			
		||||
	INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
 | 
			
		||||
	USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
 | 
			
		||||
	version=3.4.0
 | 
			
		||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
 | 
			
		||||
	by smtp.lore.kernel.org (Postfix) with ESMTP id 614A3C43461
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:20 +0000 (UTC)
 | 
			
		||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
 | 
			
		||||
	by mail.kernel.org (Postfix) with ESMTP id 44F1D61186
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:20 +0000 (UTC)
 | 
			
		||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 | 
			
		||||
        id S230499AbhETGzj (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:39 -0400
 | 
			
		||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37910 "EHLO
 | 
			
		||||
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 | 
			
		||||
        with ESMTP id S230452AbhETGzb (ORCPT
 | 
			
		||||
        <rfc822;linux-kernel@vger.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:31 -0400
 | 
			
		||||
Received: from mail-qv1-xf4a.google.com (mail-qv1-xf4a.google.com [IPv6:2607:f8b0:4864:20::f4a])
 | 
			
		||||
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 245E9C06138B
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:09 -0700 (PDT)
 | 
			
		||||
Received: by mail-qv1-xf4a.google.com with SMTP id c5-20020a0ca9c50000b02901aede9b5061so12455193qvb.14
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:09 -0700 (PDT)
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=google.com; s=20161025;
 | 
			
		||||
        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
 | 
			
		||||
         :cc;
 | 
			
		||||
        bh=Mvah71zeYWGfuEGRbEsLqflL7nXzJ5AdEYR+UovaRYY=;
 | 
			
		||||
        b=EbaEOCfalGO/Os4OKgi4M0ux2tbj/9YV7PKsVCGQdr/8gcQO1wsCl7ywZY/pNC7eXz
 | 
			
		||||
         NoDBi8g1D9jnfogpVvkt+RSkZlQ/wIQfMR8guk0/qk6EZebG/utx01m5VEv0G0jHv0Zr
 | 
			
		||||
         k6d+sXr5o4NS2Kl/7Ur6tOhmyQYo1mJS8W6wy8htCD9qRhKO9rljjjcNNoQFh7jF53I2
 | 
			
		||||
         oqJdy/ZRwC1k/6/iastZquGfCQ1ZDPp9qbDEfPp6RfaePLHAvS2mEcu3b5IlddG8UjMG
 | 
			
		||||
         gnQkyzTi9RZ60CdCTtFo/33uy+SQMY1vKs2glF5gunlSHFA1EaqvtsVi1W2ngxGWiGKh
 | 
			
		||||
         ajyA==
 | 
			
		||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=1e100.net; s=20161025;
 | 
			
		||||
        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
 | 
			
		||||
         :references:subject:from:to:cc;
 | 
			
		||||
        bh=Mvah71zeYWGfuEGRbEsLqflL7nXzJ5AdEYR+UovaRYY=;
 | 
			
		||||
        b=fKmHfVBojnVD7v0HScBBju/h6BeYHqtiGWztHdkYBBMbAblj6OoCIkOdzbET71QBoY
 | 
			
		||||
         R3bKjlFy/3PcckxPJrbFRhSVsTk4faV9uQfJlZuedG4G3O2EY6PeqhlGN722JnrxjpVk
 | 
			
		||||
         og7sFMeaPJCcbmkZyC+jm4xmnS5Ox2CVjXqgw96+ViRsnfjSJ3Vvu0mOk/ab+jfZ3/ZB
 | 
			
		||||
         HW3plDWWuAO8ijU0AazsQOOG3rvfr/szKKmeJs3e5a7HPpChkX9wvZKAnlyPw+6MvtLe
 | 
			
		||||
         ssM2BgmFkGzG2yd8AzvepX/afdU14K4bigWqSjN8IRR7JVDnGdBKYR4N/tH0JI/apOet
 | 
			
		||||
         Rn1g==
 | 
			
		||||
X-Gm-Message-State: AOAM5337ZUQnuxydxDF/VBFRzHtx51o3/N5HDpf9MYMdQrx5kcdbVyhf
 | 
			
		||||
        HEyO2/+GFcfRnTIPxodPADdKQi6qUwA=
 | 
			
		||||
X-Google-Smtp-Source: ABdhPJxyjS6uG8i8Sad50t/5Pf/9RTagtFbxDvAuxBuu8l0odJlhqIGjN9aFII0GYF+uFWIxSdphl51ZKHc=
 | 
			
		||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
 | 
			
		||||
 (user=yuzhao job=sendgmr) by 2002:a0c:d84d:: with SMTP id i13mr3839330qvj.32.1621493648268;
 | 
			
		||||
 Wed, 19 May 2021 23:54:08 -0700 (PDT)
 | 
			
		||||
Date:   Thu, 20 May 2021 00:53:46 -0600
 | 
			
		||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
Message-Id: <20210520065355.2736558-6-yuzhao@google.com>
 | 
			
		||||
Mime-Version: 1.0
 | 
			
		||||
References: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
Subject: [PATCH v3 05/14] mm/vmscan.c: refactor shrink_node()
 | 
			
		||||
From:   Yu Zhao <yuzhao@google.com>
 | 
			
		||||
To:     linux-mm@kvack.org
 | 
			
		||||
Cc:     Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
 | 
			
		||||
        Andrew Morton <akpm@linux-foundation.org>,
 | 
			
		||||
        Dave Chinner <david@fromorbit.com>,
 | 
			
		||||
        Dave Hansen <dave.hansen@linux.intel.com>,
 | 
			
		||||
        Donald Carr <sirspudd@gmail.com>,
 | 
			
		||||
        Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
 | 
			
		||||
        Johannes Weiner <hannes@cmpxchg.org>,
 | 
			
		||||
        Jonathan Corbet <corbet@lwn.net>,
 | 
			
		||||
        Joonsoo Kim <iamjoonsoo.kim@lge.com>,
 | 
			
		||||
        Konstantin Kharlamov <hi-angel@yandex.ru>,
 | 
			
		||||
        Marcus Seyfarth <m.seyfarth@gmail.com>,
 | 
			
		||||
        Matthew Wilcox <willy@infradead.org>,
 | 
			
		||||
        Mel Gorman <mgorman@suse.de>,
 | 
			
		||||
        Miaohe Lin <linmiaohe@huawei.com>,
 | 
			
		||||
        Michael Larabel <michael@michaellarabel.com>,
 | 
			
		||||
        Michal Hocko <mhocko@suse.com>,
 | 
			
		||||
        Michel Lespinasse <michel@lespinasse.org>,
 | 
			
		||||
        Rik van Riel <riel@surriel.com>,
 | 
			
		||||
        Roman Gushchin <guro@fb.com>,
 | 
			
		||||
        Tim Chen <tim.c.chen@linux.intel.com>,
 | 
			
		||||
        Vlastimil Babka <vbabka@suse.cz>,
 | 
			
		||||
        Yang Shi <shy828301@gmail.com>,
 | 
			
		||||
        Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
 | 
			
		||||
        linux-kernel@vger.kernel.org, lkp@lists.01.org,
 | 
			
		||||
        page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
 | 
			
		||||
        Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
Content-Type: text/plain; charset="UTF-8"
 | 
			
		||||
Precedence: bulk
 | 
			
		||||
List-ID: <linux-kernel.vger.kernel.org>
 | 
			
		||||
X-Mailing-List: linux-kernel@vger.kernel.org
 | 
			
		||||
List-Archive: <https://lore.kernel.org/lkml/>
 | 
			
		||||
 | 
			
		||||
Heuristics that determine scan balance between anon and file LRUs are
 | 
			
		||||
rather independent. Move them into a separate function to improve
 | 
			
		||||
readability.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
 | 
			
		||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
---
 | 
			
		||||
 mm/vmscan.c | 186 +++++++++++++++++++++++++++-------------------------
 | 
			
		||||
 1 file changed, 98 insertions(+), 88 deletions(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
 | 
			
		||||
index 5199b9696bab..2339459c97d4 100644
 | 
			
		||||
--- a/mm/vmscan.c
 | 
			
		||||
+++ b/mm/vmscan.c
 | 
			
		||||
@@ -2421,6 +2421,103 @@ enum scan_balance {
 | 
			
		||||
 	SCAN_FILE,
 | 
			
		||||
 };
 | 
			
		||||
 
 | 
			
		||||
+static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
 | 
			
		||||
+{
 | 
			
		||||
+	unsigned long file;
 | 
			
		||||
+	struct lruvec *target_lruvec;
 | 
			
		||||
+
 | 
			
		||||
+	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
 | 
			
		||||
+
 | 
			
		||||
+	/*
 | 
			
		||||
+	 * Determine the scan balance between anon and file LRUs.
 | 
			
		||||
+	 */
 | 
			
		||||
+	spin_lock_irq(&target_lruvec->lru_lock);
 | 
			
		||||
+	sc->anon_cost = target_lruvec->anon_cost;
 | 
			
		||||
+	sc->file_cost = target_lruvec->file_cost;
 | 
			
		||||
+	spin_unlock_irq(&target_lruvec->lru_lock);
 | 
			
		||||
+
 | 
			
		||||
+	/*
 | 
			
		||||
+	 * Target desirable inactive:active list ratios for the anon
 | 
			
		||||
+	 * and file LRU lists.
 | 
			
		||||
+	 */
 | 
			
		||||
+	if (!sc->force_deactivate) {
 | 
			
		||||
+		unsigned long refaults;
 | 
			
		||||
+
 | 
			
		||||
+		refaults = lruvec_page_state(target_lruvec,
 | 
			
		||||
+				WORKINGSET_ACTIVATE_ANON);
 | 
			
		||||
+		if (refaults != target_lruvec->refaults[0] ||
 | 
			
		||||
+			inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
 | 
			
		||||
+			sc->may_deactivate |= DEACTIVATE_ANON;
 | 
			
		||||
+		else
 | 
			
		||||
+			sc->may_deactivate &= ~DEACTIVATE_ANON;
 | 
			
		||||
+
 | 
			
		||||
+		/*
 | 
			
		||||
+		 * When refaults are being observed, it means a new
 | 
			
		||||
+		 * workingset is being established. Deactivate to get
 | 
			
		||||
+		 * rid of any stale active pages quickly.
 | 
			
		||||
+		 */
 | 
			
		||||
+		refaults = lruvec_page_state(target_lruvec,
 | 
			
		||||
+				WORKINGSET_ACTIVATE_FILE);
 | 
			
		||||
+		if (refaults != target_lruvec->refaults[1] ||
 | 
			
		||||
+		    inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
 | 
			
		||||
+			sc->may_deactivate |= DEACTIVATE_FILE;
 | 
			
		||||
+		else
 | 
			
		||||
+			sc->may_deactivate &= ~DEACTIVATE_FILE;
 | 
			
		||||
+	} else
 | 
			
		||||
+		sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
 | 
			
		||||
+
 | 
			
		||||
+	/*
 | 
			
		||||
+	 * If we have plenty of inactive file pages that aren't
 | 
			
		||||
+	 * thrashing, try to reclaim those first before touching
 | 
			
		||||
+	 * anonymous pages.
 | 
			
		||||
+	 */
 | 
			
		||||
+	file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
 | 
			
		||||
+	if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
 | 
			
		||||
+		sc->cache_trim_mode = 1;
 | 
			
		||||
+	else
 | 
			
		||||
+		sc->cache_trim_mode = 0;
 | 
			
		||||
+
 | 
			
		||||
+	/*
 | 
			
		||||
+	 * Prevent the reclaimer from falling into the cache trap: as
 | 
			
		||||
+	 * cache pages start out inactive, every cache fault will tip
 | 
			
		||||
+	 * the scan balance towards the file LRU.  And as the file LRU
 | 
			
		||||
+	 * shrinks, so does the window for rotation from references.
 | 
			
		||||
+	 * This means we have a runaway feedback loop where a tiny
 | 
			
		||||
+	 * thrashing file LRU becomes infinitely more attractive than
 | 
			
		||||
+	 * anon pages.  Try to detect this based on file LRU size.
 | 
			
		||||
+	 */
 | 
			
		||||
+	if (!cgroup_reclaim(sc)) {
 | 
			
		||||
+		unsigned long total_high_wmark = 0;
 | 
			
		||||
+		unsigned long free, anon;
 | 
			
		||||
+		int z;
 | 
			
		||||
+
 | 
			
		||||
+		free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
 | 
			
		||||
+		file = node_page_state(pgdat, NR_ACTIVE_FILE) +
 | 
			
		||||
+			   node_page_state(pgdat, NR_INACTIVE_FILE);
 | 
			
		||||
+
 | 
			
		||||
+		for (z = 0; z < MAX_NR_ZONES; z++) {
 | 
			
		||||
+			struct zone *zone = &pgdat->node_zones[z];
 | 
			
		||||
+
 | 
			
		||||
+			if (!managed_zone(zone))
 | 
			
		||||
+				continue;
 | 
			
		||||
+
 | 
			
		||||
+			total_high_wmark += high_wmark_pages(zone);
 | 
			
		||||
+		}
 | 
			
		||||
+
 | 
			
		||||
+		/*
 | 
			
		||||
+		 * Consider anon: if that's low too, this isn't a
 | 
			
		||||
+		 * runaway file reclaim problem, but rather just
 | 
			
		||||
+		 * extreme pressure. Reclaim as per usual then.
 | 
			
		||||
+		 */
 | 
			
		||||
+		anon = node_page_state(pgdat, NR_INACTIVE_ANON);
 | 
			
		||||
+
 | 
			
		||||
+		sc->file_is_tiny =
 | 
			
		||||
+			file + free <= total_high_wmark &&
 | 
			
		||||
+			!(sc->may_deactivate & DEACTIVATE_ANON) &&
 | 
			
		||||
+			anon >> sc->priority;
 | 
			
		||||
+	}
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 /*
 | 
			
		||||
  * Determine how aggressively the anon and file LRU lists should be
 | 
			
		||||
  * scanned.  The relative value of each set of LRU lists is determined
 | 
			
		||||
@@ -2866,7 +2963,6 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 | 
			
		||||
 	unsigned long nr_reclaimed, nr_scanned;
 | 
			
		||||
 	struct lruvec *target_lruvec;
 | 
			
		||||
 	bool reclaimable = false;
 | 
			
		||||
-	unsigned long file;
 | 
			
		||||
 
 | 
			
		||||
 	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
 | 
			
		||||
 
 | 
			
		||||
@@ -2876,93 +2972,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 | 
			
		||||
 	nr_reclaimed = sc->nr_reclaimed;
 | 
			
		||||
 	nr_scanned = sc->nr_scanned;
 | 
			
		||||
 
 | 
			
		||||
-	/*
 | 
			
		||||
-	 * Determine the scan balance between anon and file LRUs.
 | 
			
		||||
-	 */
 | 
			
		||||
-	spin_lock_irq(&target_lruvec->lru_lock);
 | 
			
		||||
-	sc->anon_cost = target_lruvec->anon_cost;
 | 
			
		||||
-	sc->file_cost = target_lruvec->file_cost;
 | 
			
		||||
-	spin_unlock_irq(&target_lruvec->lru_lock);
 | 
			
		||||
-
 | 
			
		||||
-	/*
 | 
			
		||||
-	 * Target desirable inactive:active list ratios for the anon
 | 
			
		||||
-	 * and file LRU lists.
 | 
			
		||||
-	 */
 | 
			
		||||
-	if (!sc->force_deactivate) {
 | 
			
		||||
-		unsigned long refaults;
 | 
			
		||||
-
 | 
			
		||||
-		refaults = lruvec_page_state(target_lruvec,
 | 
			
		||||
-				WORKINGSET_ACTIVATE_ANON);
 | 
			
		||||
-		if (refaults != target_lruvec->refaults[0] ||
 | 
			
		||||
-			inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
 | 
			
		||||
-			sc->may_deactivate |= DEACTIVATE_ANON;
 | 
			
		||||
-		else
 | 
			
		||||
-			sc->may_deactivate &= ~DEACTIVATE_ANON;
 | 
			
		||||
-
 | 
			
		||||
-		/*
 | 
			
		||||
-		 * When refaults are being observed, it means a new
 | 
			
		||||
-		 * workingset is being established. Deactivate to get
 | 
			
		||||
-		 * rid of any stale active pages quickly.
 | 
			
		||||
-		 */
 | 
			
		||||
-		refaults = lruvec_page_state(target_lruvec,
 | 
			
		||||
-				WORKINGSET_ACTIVATE_FILE);
 | 
			
		||||
-		if (refaults != target_lruvec->refaults[1] ||
 | 
			
		||||
-		    inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
 | 
			
		||||
-			sc->may_deactivate |= DEACTIVATE_FILE;
 | 
			
		||||
-		else
 | 
			
		||||
-			sc->may_deactivate &= ~DEACTIVATE_FILE;
 | 
			
		||||
-	} else
 | 
			
		||||
-		sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
 | 
			
		||||
-
 | 
			
		||||
-	/*
 | 
			
		||||
-	 * If we have plenty of inactive file pages that aren't
 | 
			
		||||
-	 * thrashing, try to reclaim those first before touching
 | 
			
		||||
-	 * anonymous pages.
 | 
			
		||||
-	 */
 | 
			
		||||
-	file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
 | 
			
		||||
-	if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
 | 
			
		||||
-		sc->cache_trim_mode = 1;
 | 
			
		||||
-	else
 | 
			
		||||
-		sc->cache_trim_mode = 0;
 | 
			
		||||
-
 | 
			
		||||
-	/*
 | 
			
		||||
-	 * Prevent the reclaimer from falling into the cache trap: as
 | 
			
		||||
-	 * cache pages start out inactive, every cache fault will tip
 | 
			
		||||
-	 * the scan balance towards the file LRU.  And as the file LRU
 | 
			
		||||
-	 * shrinks, so does the window for rotation from references.
 | 
			
		||||
-	 * This means we have a runaway feedback loop where a tiny
 | 
			
		||||
-	 * thrashing file LRU becomes infinitely more attractive than
 | 
			
		||||
-	 * anon pages.  Try to detect this based on file LRU size.
 | 
			
		||||
-	 */
 | 
			
		||||
-	if (!cgroup_reclaim(sc)) {
 | 
			
		||||
-		unsigned long total_high_wmark = 0;
 | 
			
		||||
-		unsigned long free, anon;
 | 
			
		||||
-		int z;
 | 
			
		||||
-
 | 
			
		||||
-		free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
 | 
			
		||||
-		file = node_page_state(pgdat, NR_ACTIVE_FILE) +
 | 
			
		||||
-			   node_page_state(pgdat, NR_INACTIVE_FILE);
 | 
			
		||||
-
 | 
			
		||||
-		for (z = 0; z < MAX_NR_ZONES; z++) {
 | 
			
		||||
-			struct zone *zone = &pgdat->node_zones[z];
 | 
			
		||||
-			if (!managed_zone(zone))
 | 
			
		||||
-				continue;
 | 
			
		||||
-
 | 
			
		||||
-			total_high_wmark += high_wmark_pages(zone);
 | 
			
		||||
-		}
 | 
			
		||||
-
 | 
			
		||||
-		/*
 | 
			
		||||
-		 * Consider anon: if that's low too, this isn't a
 | 
			
		||||
-		 * runaway file reclaim problem, but rather just
 | 
			
		||||
-		 * extreme pressure. Reclaim as per usual then.
 | 
			
		||||
-		 */
 | 
			
		||||
-		anon = node_page_state(pgdat, NR_INACTIVE_ANON);
 | 
			
		||||
-
 | 
			
		||||
-		sc->file_is_tiny =
 | 
			
		||||
-			file + free <= total_high_wmark &&
 | 
			
		||||
-			!(sc->may_deactivate & DEACTIVATE_ANON) &&
 | 
			
		||||
-			anon >> sc->priority;
 | 
			
		||||
-	}
 | 
			
		||||
+	prepare_scan_count(pgdat, sc);
 | 
			
		||||
 
 | 
			
		||||
 	shrink_node_memcgs(pgdat, sc);
 | 
			
		||||
 
 | 
			
		||||
-- 
 | 
			
		||||
2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,233 @@
 | 
			
		||||
From mboxrd@z Thu Jan  1 00:00:00 1970
 | 
			
		||||
Return-Path: <linux-kernel-owner@kernel.org>
 | 
			
		||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
 | 
			
		||||
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
 | 
			
		||||
X-Spam-Level: 
 | 
			
		||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
 | 
			
		||||
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
 | 
			
		||||
	INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
 | 
			
		||||
	USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
 | 
			
		||||
	version=3.4.0
 | 
			
		||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
 | 
			
		||||
	by smtp.lore.kernel.org (Postfix) with ESMTP id 173FBC433B4
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:22 +0000 (UTC)
 | 
			
		||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
 | 
			
		||||
	by mail.kernel.org (Postfix) with ESMTP id EA57F61261
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:21 +0000 (UTC)
 | 
			
		||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 | 
			
		||||
        id S230452AbhETGzl (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:41 -0400
 | 
			
		||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37912 "EHLO
 | 
			
		||||
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 | 
			
		||||
        with ESMTP id S230466AbhETGzc (ORCPT
 | 
			
		||||
        <rfc822;linux-kernel@vger.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:32 -0400
 | 
			
		||||
Received: from mail-qk1-x749.google.com (mail-qk1-x749.google.com [IPv6:2607:f8b0:4864:20::749])
 | 
			
		||||
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C46CEC061574
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:10 -0700 (PDT)
 | 
			
		||||
Received: by mail-qk1-x749.google.com with SMTP id d201-20020ae9efd20000b02902e9e9d8d9dcso11687575qkg.10
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:10 -0700 (PDT)
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=google.com; s=20161025;
 | 
			
		||||
        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
 | 
			
		||||
         :cc;
 | 
			
		||||
        bh=Yl+DjwsBODMfyiL17kQw/9BLrEC9zXFz8Mxu59WzP4Y=;
 | 
			
		||||
        b=Mrzy2M2k9QAzqf4Qlq2wFgC1uycH9/GSOy89uVYR+gUD3oaKMWpOn95M8hs2EzT1FG
 | 
			
		||||
         /N40V1/URD7mP48ZsP72lzG3rMvI0SepioQCu+0asiEBUJUrtY6kEz2CKTJEB4MwAGRU
 | 
			
		||||
         xnH/e/C5szSot199E2rMI+ZUJo/y8pBDfNIzZ7XzQ811Wxr6oM1C4DVA6DHQSWtdqS5J
 | 
			
		||||
         VMMjdjvsXW8hHCzs+5W06EYb73kJeqPHOFZ+XFMWXFrm8l/F2qujro4FMOgux0JB/XLW
 | 
			
		||||
         32qxH7ovQyCHL8Gg6vGigkolgFZhe6oag4JfCx0cj6eFlP+2j2w5ryU0kRvvHVILA9Xq
 | 
			
		||||
         e6Hg==
 | 
			
		||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=1e100.net; s=20161025;
 | 
			
		||||
        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
 | 
			
		||||
         :references:subject:from:to:cc;
 | 
			
		||||
        bh=Yl+DjwsBODMfyiL17kQw/9BLrEC9zXFz8Mxu59WzP4Y=;
 | 
			
		||||
        b=Nel79d/ExWs1HdZZs12GqadGHxgI3W8FnqvRED1IBxJnCtiluti6ndyhp/JXtjDIct
 | 
			
		||||
         YtIRkMNVwmwMk6EFx5QOv0Br5VYQ/72hZsMd8kNj9z/m8CUtpMnJluKVNeyNT+Livkww
 | 
			
		||||
         y2hwgGJiuvWxBIy+ja/GH64SkCJTuttiOxpNFaRxB1STfhM2PjwwwiQG5GTxbqkkn3Dg
 | 
			
		||||
         fKBlHYI17sQ05tZRovJETs0f+1wBQPftjwjm6PJzpZ3ooNcBXdB6hRUrGZ6Pmyf2bRBR
 | 
			
		||||
         BptRYxbaQdSprCABGMW/2ySltaJaFitv6fShejQMxDX8xe+JDYJ0kEn9/3aMVEt+Vy+X
 | 
			
		||||
         rTAw==
 | 
			
		||||
X-Gm-Message-State: AOAM532IBC74aEi91Xqgl2rYw1QINB7mrdZT3v/EvERcHHCbI6v9/2cU
 | 
			
		||||
        Qe2UGq6f4OIKykMrwADvTCo5whMI+DQ=
 | 
			
		||||
X-Google-Smtp-Source: ABdhPJyyAX33aocRYynTtALpNyjv0w+Wa7lDS9awJiNK6me024wMLg+4FL2RHzwNDLwZg9DFBZ+B1LiDSsc=
 | 
			
		||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
 | 
			
		||||
 (user=yuzhao job=sendgmr) by 2002:a05:6214:18d:: with SMTP id
 | 
			
		||||
 q13mr3804726qvr.60.1621493649877; Wed, 19 May 2021 23:54:09 -0700 (PDT)
 | 
			
		||||
Date:   Thu, 20 May 2021 00:53:47 -0600
 | 
			
		||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
Message-Id: <20210520065355.2736558-7-yuzhao@google.com>
 | 
			
		||||
Mime-Version: 1.0
 | 
			
		||||
References: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
Subject: [PATCH v3 06/14] mm/workingset.c: refactor pack_shadow() and unpack_shadow()
 | 
			
		||||
From:   Yu Zhao <yuzhao@google.com>
 | 
			
		||||
To:     linux-mm@kvack.org
 | 
			
		||||
Cc:     Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
 | 
			
		||||
        Andrew Morton <akpm@linux-foundation.org>,
 | 
			
		||||
        Dave Chinner <david@fromorbit.com>,
 | 
			
		||||
        Dave Hansen <dave.hansen@linux.intel.com>,
 | 
			
		||||
        Donald Carr <sirspudd@gmail.com>,
 | 
			
		||||
        Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
 | 
			
		||||
        Johannes Weiner <hannes@cmpxchg.org>,
 | 
			
		||||
        Jonathan Corbet <corbet@lwn.net>,
 | 
			
		||||
        Joonsoo Kim <iamjoonsoo.kim@lge.com>,
 | 
			
		||||
        Konstantin Kharlamov <hi-angel@yandex.ru>,
 | 
			
		||||
        Marcus Seyfarth <m.seyfarth@gmail.com>,
 | 
			
		||||
        Matthew Wilcox <willy@infradead.org>,
 | 
			
		||||
        Mel Gorman <mgorman@suse.de>,
 | 
			
		||||
        Miaohe Lin <linmiaohe@huawei.com>,
 | 
			
		||||
        Michael Larabel <michael@michaellarabel.com>,
 | 
			
		||||
        Michal Hocko <mhocko@suse.com>,
 | 
			
		||||
        Michel Lespinasse <michel@lespinasse.org>,
 | 
			
		||||
        Rik van Riel <riel@surriel.com>,
 | 
			
		||||
        Roman Gushchin <guro@fb.com>,
 | 
			
		||||
        Tim Chen <tim.c.chen@linux.intel.com>,
 | 
			
		||||
        Vlastimil Babka <vbabka@suse.cz>,
 | 
			
		||||
        Yang Shi <shy828301@gmail.com>,
 | 
			
		||||
        Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
 | 
			
		||||
        linux-kernel@vger.kernel.org, lkp@lists.01.org,
 | 
			
		||||
        page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
 | 
			
		||||
        Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
Content-Type: text/plain; charset="UTF-8"
 | 
			
		||||
Precedence: bulk
 | 
			
		||||
List-ID: <linux-kernel.vger.kernel.org>
 | 
			
		||||
X-Mailing-List: linux-kernel@vger.kernel.org
 | 
			
		||||
List-Archive: <https://lore.kernel.org/lkml/>
 | 
			
		||||
 | 
			
		||||
This patches moves the bucket order and PageWorkingset() out of
 | 
			
		||||
pack_shadow() and unpack_shadow(). It has no merits on its own but
 | 
			
		||||
makes the upcoming changes to mm/workingset.c less diffy.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
 | 
			
		||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
---
 | 
			
		||||
 mm/workingset.c | 53 ++++++++++++++++++++-----------------------------
 | 
			
		||||
 1 file changed, 22 insertions(+), 31 deletions(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/mm/workingset.c b/mm/workingset.c
 | 
			
		||||
index b7cdeca5a76d..edb8aed2587e 100644
 | 
			
		||||
--- a/mm/workingset.c
 | 
			
		||||
+++ b/mm/workingset.c
 | 
			
		||||
@@ -168,9 +168,9 @@
 | 
			
		||||
  * refault distance will immediately activate the refaulting page.
 | 
			
		||||
  */
 | 
			
		||||
 
 | 
			
		||||
-#define EVICTION_SHIFT	((BITS_PER_LONG - BITS_PER_XA_VALUE) +	\
 | 
			
		||||
-			 1 + NODES_SHIFT + MEM_CGROUP_ID_SHIFT)
 | 
			
		||||
-#define EVICTION_MASK	(~0UL >> EVICTION_SHIFT)
 | 
			
		||||
+#define EVICTION_SHIFT		(BITS_PER_XA_VALUE - MEM_CGROUP_ID_SHIFT - NODES_SHIFT)
 | 
			
		||||
+#define EVICTION_MASK		(BIT(EVICTION_SHIFT) - 1)
 | 
			
		||||
+#define WORKINGSET_WIDTH	1
 | 
			
		||||
 
 | 
			
		||||
 /*
 | 
			
		||||
  * Eviction timestamps need to be able to cover the full range of
 | 
			
		||||
@@ -182,36 +182,23 @@
 | 
			
		||||
  */
 | 
			
		||||
 static unsigned int bucket_order __read_mostly;
 | 
			
		||||
 
 | 
			
		||||
-static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction,
 | 
			
		||||
-			 bool workingset)
 | 
			
		||||
+static void *pack_shadow(int memcg_id, struct pglist_data *pgdat, unsigned long val)
 | 
			
		||||
 {
 | 
			
		||||
-	eviction >>= bucket_order;
 | 
			
		||||
-	eviction &= EVICTION_MASK;
 | 
			
		||||
-	eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
 | 
			
		||||
-	eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
 | 
			
		||||
-	eviction = (eviction << 1) | workingset;
 | 
			
		||||
+	val = (val << MEM_CGROUP_ID_SHIFT) | memcg_id;
 | 
			
		||||
+	val = (val << NODES_SHIFT) | pgdat->node_id;
 | 
			
		||||
 
 | 
			
		||||
-	return xa_mk_value(eviction);
 | 
			
		||||
+	return xa_mk_value(val);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
-static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
 | 
			
		||||
-			  unsigned long *evictionp, bool *workingsetp)
 | 
			
		||||
+static unsigned long unpack_shadow(void *shadow, int *memcg_id, struct pglist_data **pgdat)
 | 
			
		||||
 {
 | 
			
		||||
-	unsigned long entry = xa_to_value(shadow);
 | 
			
		||||
-	int memcgid, nid;
 | 
			
		||||
-	bool workingset;
 | 
			
		||||
+	unsigned long val = xa_to_value(shadow);
 | 
			
		||||
 
 | 
			
		||||
-	workingset = entry & 1;
 | 
			
		||||
-	entry >>= 1;
 | 
			
		||||
-	nid = entry & ((1UL << NODES_SHIFT) - 1);
 | 
			
		||||
-	entry >>= NODES_SHIFT;
 | 
			
		||||
-	memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
 | 
			
		||||
-	entry >>= MEM_CGROUP_ID_SHIFT;
 | 
			
		||||
+	*pgdat = NODE_DATA(val & (BIT(NODES_SHIFT) - 1));
 | 
			
		||||
+	val >>= NODES_SHIFT;
 | 
			
		||||
+	*memcg_id = val & (BIT(MEM_CGROUP_ID_SHIFT) - 1);
 | 
			
		||||
 
 | 
			
		||||
-	*memcgidp = memcgid;
 | 
			
		||||
-	*pgdat = NODE_DATA(nid);
 | 
			
		||||
-	*evictionp = entry << bucket_order;
 | 
			
		||||
-	*workingsetp = workingset;
 | 
			
		||||
+	return val >> MEM_CGROUP_ID_SHIFT;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
 /**
 | 
			
		||||
@@ -266,8 +253,10 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
 | 
			
		||||
 	/* XXX: target_memcg can be NULL, go through lruvec */
 | 
			
		||||
 	memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
 | 
			
		||||
 	eviction = atomic_long_read(&lruvec->nonresident_age);
 | 
			
		||||
+	eviction >>= bucket_order;
 | 
			
		||||
+	eviction = (eviction << WORKINGSET_WIDTH) | PageWorkingset(page);
 | 
			
		||||
 	workingset_age_nonresident(lruvec, thp_nr_pages(page));
 | 
			
		||||
-	return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
 | 
			
		||||
+	return pack_shadow(memcgid, pgdat, eviction);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
 /**
 | 
			
		||||
@@ -294,7 +283,7 @@ void workingset_refault(struct page *page, void *shadow)
 | 
			
		||||
 	bool workingset;
 | 
			
		||||
 	int memcgid;
 | 
			
		||||
 
 | 
			
		||||
-	unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
 | 
			
		||||
+	eviction = unpack_shadow(shadow, &memcgid, &pgdat);
 | 
			
		||||
 
 | 
			
		||||
 	rcu_read_lock();
 | 
			
		||||
 	/*
 | 
			
		||||
@@ -318,6 +307,8 @@ void workingset_refault(struct page *page, void *shadow)
 | 
			
		||||
 		goto out;
 | 
			
		||||
 	eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
 | 
			
		||||
 	refault = atomic_long_read(&eviction_lruvec->nonresident_age);
 | 
			
		||||
+	workingset = eviction & (BIT(WORKINGSET_WIDTH) - 1);
 | 
			
		||||
+	eviction = (eviction >> WORKINGSET_WIDTH) << bucket_order;
 | 
			
		||||
 
 | 
			
		||||
 	/*
 | 
			
		||||
 	 * Calculate the refault distance
 | 
			
		||||
@@ -335,7 +326,7 @@ void workingset_refault(struct page *page, void *shadow)
 | 
			
		||||
 	 * longest time, so the occasional inappropriate activation
 | 
			
		||||
 	 * leading to pressure on the active list is not a problem.
 | 
			
		||||
 	 */
 | 
			
		||||
-	refault_distance = (refault - eviction) & EVICTION_MASK;
 | 
			
		||||
+	refault_distance = (refault - eviction) & (EVICTION_MASK >> WORKINGSET_WIDTH);
 | 
			
		||||
 
 | 
			
		||||
 	/*
 | 
			
		||||
 	 * The activation decision for this page is made at the level
 | 
			
		||||
@@ -593,7 +584,7 @@ static int __init workingset_init(void)
 | 
			
		||||
 	unsigned int max_order;
 | 
			
		||||
 	int ret;
 | 
			
		||||
 
 | 
			
		||||
-	BUILD_BUG_ON(BITS_PER_LONG < EVICTION_SHIFT);
 | 
			
		||||
+	BUILD_BUG_ON(EVICTION_SHIFT < WORKINGSET_WIDTH);
 | 
			
		||||
 	/*
 | 
			
		||||
 	 * Calculate the eviction bucket size to cover the longest
 | 
			
		||||
 	 * actionable refault distance, which is currently half of
 | 
			
		||||
@@ -601,7 +592,7 @@ static int __init workingset_init(void)
 | 
			
		||||
 	 * some more pages at runtime, so keep working with up to
 | 
			
		||||
 	 * double the initial memory by using totalram_pages as-is.
 | 
			
		||||
 	 */
 | 
			
		||||
-	timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT;
 | 
			
		||||
+	timestamp_bits = EVICTION_SHIFT - WORKINGSET_WIDTH;
 | 
			
		||||
 	max_order = fls_long(totalram_pages() - 1);
 | 
			
		||||
 	if (max_order > timestamp_bits)
 | 
			
		||||
 		bucket_order = max_order - timestamp_bits;
 | 
			
		||||
-- 
 | 
			
		||||
2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -0,0 +1,686 @@
 | 
			
		||||
From mboxrd@z Thu Jan  1 00:00:00 1970
 | 
			
		||||
Return-Path: <linux-kernel-owner@kernel.org>
 | 
			
		||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
 | 
			
		||||
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
 | 
			
		||||
X-Spam-Level: 
 | 
			
		||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
 | 
			
		||||
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
 | 
			
		||||
	INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
 | 
			
		||||
	USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
 | 
			
		||||
	version=3.4.0
 | 
			
		||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
 | 
			
		||||
	by smtp.lore.kernel.org (Postfix) with ESMTP id AEEEDC43461
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:26 +0000 (UTC)
 | 
			
		||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
 | 
			
		||||
	by mail.kernel.org (Postfix) with ESMTP id 4D74961186
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:26 +0000 (UTC)
 | 
			
		||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 | 
			
		||||
        id S231144AbhETGzp (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:45 -0400
 | 
			
		||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37910 "EHLO
 | 
			
		||||
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 | 
			
		||||
        with ESMTP id S230478AbhETGzg (ORCPT
 | 
			
		||||
        <rfc822;linux-kernel@vger.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:36 -0400
 | 
			
		||||
Received: from mail-qv1-xf4a.google.com (mail-qv1-xf4a.google.com [IPv6:2607:f8b0:4864:20::f4a])
 | 
			
		||||
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5CD6FC061763
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:14 -0700 (PDT)
 | 
			
		||||
Received: by mail-qv1-xf4a.google.com with SMTP id x2-20020a0cda020000b02901edb4c412fdso12424236qvj.11
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:14 -0700 (PDT)
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=google.com; s=20161025;
 | 
			
		||||
        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
 | 
			
		||||
         :cc;
 | 
			
		||||
        bh=Jb580jSe4IcT6fVqPR22jrL3z+VNcMEKM2UgbfL90k4=;
 | 
			
		||||
        b=rTxj5e7tRY5wx29jetDGP8dUly4vBHNX0SBJeZKRsCOEiHaQ+coy05du1f4bT6oCWw
 | 
			
		||||
         rJWrdbUyp5aci9MKmCQ2Z5qPBf7F+zDTL+8wpoufyGbRvdGkfwDkAgQV6LLsi9xZzdyr
 | 
			
		||||
         bpcyHItG1lIReRXOkR0GKWNz8GfEVNO7lE+G6Sc1sHPUEEfw3FF5Vl/Wta1OxKsGQQe4
 | 
			
		||||
         02oeo8STGdqGF0yOczRyqWZ/SBFcNGiPQ7nrGaWA3FguRBAwZ2dOrTrmM5ug10rbOQmf
 | 
			
		||||
         L/m3eja1mOwffFkrgumZ0Sm9KZ5sbKJNbLAjPYQAmAcoXhU/NVnLrMVtxSGppGFwdyOz
 | 
			
		||||
         NMsw==
 | 
			
		||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=1e100.net; s=20161025;
 | 
			
		||||
        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
 | 
			
		||||
         :references:subject:from:to:cc;
 | 
			
		||||
        bh=Jb580jSe4IcT6fVqPR22jrL3z+VNcMEKM2UgbfL90k4=;
 | 
			
		||||
        b=i0Py5qwrQv4OOBWcpJcYxjG5lgHvV4Gq3X3fG5L0aB3lLLnS3mObKdM6XG+uYC1b1G
 | 
			
		||||
         z/Sfx4n/1+/0EPZnFoo80K1ry0Y7SD/W30OUEPR8PValuCLHEHzVeoVK2+TPI8DMEzz1
 | 
			
		||||
         r+jWpxZkah8B613QrPIvvcZSIb0lxcsV6JxpYjFixO/mizct7mrdls35j1Thb7ehgWtO
 | 
			
		||||
         W5aAGiMIxBprDhKHJ2D2Oz85hWRyQYND4jEA68bzh9ybz4cYMVIX3C+9uH+cVIhZ6JZL
 | 
			
		||||
         febwADPME4CsH8gMntK/GWzf5Yu+sdeBYn+6VJKrG/4c7dWi0xgFGWYrgCtSlk8kVgOe
 | 
			
		||||
         bH1w==
 | 
			
		||||
X-Gm-Message-State: AOAM530OUX0GiyYChE/1C1GuJXPP4zDS9QrWZKB+3aIFDiz73ADIQaxu
 | 
			
		||||
        gJxNX12VvCvNdCSId0kuSWl88ETTfcg=
 | 
			
		||||
X-Google-Smtp-Source: ABdhPJz4m/yxkWn5wBamzXd/wEoVvHq3AOPsnc1+c/ewg4oojPM6XcGKJYYybO2Mtsb6BDRPtu5ccAJRcHw=
 | 
			
		||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
 | 
			
		||||
 (user=yuzhao job=sendgmr) by 2002:a0c:edcf:: with SMTP id i15mr4021372qvr.10.1621493653456;
 | 
			
		||||
 Wed, 19 May 2021 23:54:13 -0700 (PDT)
 | 
			
		||||
Date:   Thu, 20 May 2021 00:53:49 -0600
 | 
			
		||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
Message-Id: <20210520065355.2736558-9-yuzhao@google.com>
 | 
			
		||||
Mime-Version: 1.0
 | 
			
		||||
References: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
Subject: [PATCH v3 08/14] mm: multigenerational lru: activation
 | 
			
		||||
From:   Yu Zhao <yuzhao@google.com>
 | 
			
		||||
To:     linux-mm@kvack.org
 | 
			
		||||
Cc:     Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
 | 
			
		||||
        Andrew Morton <akpm@linux-foundation.org>,
 | 
			
		||||
        Dave Chinner <david@fromorbit.com>,
 | 
			
		||||
        Dave Hansen <dave.hansen@linux.intel.com>,
 | 
			
		||||
        Donald Carr <sirspudd@gmail.com>,
 | 
			
		||||
        Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
 | 
			
		||||
        Johannes Weiner <hannes@cmpxchg.org>,
 | 
			
		||||
        Jonathan Corbet <corbet@lwn.net>,
 | 
			
		||||
        Joonsoo Kim <iamjoonsoo.kim@lge.com>,
 | 
			
		||||
        Konstantin Kharlamov <hi-angel@yandex.ru>,
 | 
			
		||||
        Marcus Seyfarth <m.seyfarth@gmail.com>,
 | 
			
		||||
        Matthew Wilcox <willy@infradead.org>,
 | 
			
		||||
        Mel Gorman <mgorman@suse.de>,
 | 
			
		||||
        Miaohe Lin <linmiaohe@huawei.com>,
 | 
			
		||||
        Michael Larabel <michael@michaellarabel.com>,
 | 
			
		||||
        Michal Hocko <mhocko@suse.com>,
 | 
			
		||||
        Michel Lespinasse <michel@lespinasse.org>,
 | 
			
		||||
        Rik van Riel <riel@surriel.com>,
 | 
			
		||||
        Roman Gushchin <guro@fb.com>,
 | 
			
		||||
        Tim Chen <tim.c.chen@linux.intel.com>,
 | 
			
		||||
        Vlastimil Babka <vbabka@suse.cz>,
 | 
			
		||||
        Yang Shi <shy828301@gmail.com>,
 | 
			
		||||
        Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
 | 
			
		||||
        linux-kernel@vger.kernel.org, lkp@lists.01.org,
 | 
			
		||||
        page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
 | 
			
		||||
        Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
Content-Type: text/plain; charset="UTF-8"
 | 
			
		||||
Precedence: bulk
 | 
			
		||||
List-ID: <linux-kernel.vger.kernel.org>
 | 
			
		||||
X-Mailing-List: linux-kernel@vger.kernel.org
 | 
			
		||||
List-Archive: <https://lore.kernel.org/lkml/>
 | 
			
		||||
 | 
			
		||||
For pages accessed multiple times via file descriptors, instead of
 | 
			
		||||
activating them upon the second access, we activate them based on the
 | 
			
		||||
refault rates of their tiers. Each generation contains at most
 | 
			
		||||
MAX_NR_TIERS tiers, and they require additional MAX_NR_TIERS-2 bits in
 | 
			
		||||
page->flags. Pages accessed N times via file descriptors belong to
 | 
			
		||||
tier order_base_2(N). Tier 0 is the base tier and it contains pages
 | 
			
		||||
read ahead, accessed once via file descriptors and accessed only via
 | 
			
		||||
page tables. Pages from the base tier are evicted regardless of the
 | 
			
		||||
refault rate. Pages from upper tiers that have higher refault rates
 | 
			
		||||
than the base tier will be moved to the next generation. A feedback
 | 
			
		||||
loop modeled after the PID controller monitors refault rates across
 | 
			
		||||
all tiers and decides when to activate pages from which upper tiers
 | 
			
		||||
in the reclaim path. The advantages of this model are:
 | 
			
		||||
  1) It has a negligible cost in the buffered IO access path because
 | 
			
		||||
  activations are done optionally in the reclaim path.
 | 
			
		||||
  2) It takes mapped pages into account and avoids overprotecting
 | 
			
		||||
  pages accessed multiple times via file descriptors.
 | 
			
		||||
  3) More tiers offer better protection to pages accessed more than
 | 
			
		||||
  twice when workloads doing intensive buffered IO are under memory
 | 
			
		||||
  pressure.
 | 
			
		||||
 | 
			
		||||
For pages mapped upon page faults, the accessed bit is set during the
 | 
			
		||||
initial faults. Ideally we add them to the per-zone lists index by
 | 
			
		||||
max_seq, i.e., the youngest generation, so that eviction will not
 | 
			
		||||
consider them before the aging has scanned them. For anon pages not in
 | 
			
		||||
swap cache, this can be done easily in the page fault path: we rename
 | 
			
		||||
lru_cache_add_inactive_or_unevictable() to lru_cache_add_page_vma()
 | 
			
		||||
and add a new parameter, which is set to true for pages mapped upon
 | 
			
		||||
page faults. For pages in page cache or swap cache, we cannot
 | 
			
		||||
differentiate the page fault path from the read ahead path at the time
 | 
			
		||||
we call lru_cache_add(). So we add them to the per-zone lists index by
 | 
			
		||||
min_seq, i.e., the oldest generation, for now.
 | 
			
		||||
 | 
			
		||||
Finally, we need to make sure deactivation works when the
 | 
			
		||||
multigenerational lru is enabled. We cannot use PageActive() because
 | 
			
		||||
it is not set on pages from active generations, in order to spare the
 | 
			
		||||
aging the trouble of clearing it when active generations become
 | 
			
		||||
inactive. So we deactivate pages unconditionally since deactivation is
 | 
			
		||||
not a hot code path worth additional optimizations.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
 | 
			
		||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
---
 | 
			
		||||
 include/linux/mm_inline.h |  40 ++++++++++++++
 | 
			
		||||
 include/linux/swap.h      |   4 +-
 | 
			
		||||
 kernel/events/uprobes.c   |   2 +-
 | 
			
		||||
 mm/huge_memory.c          |   2 +-
 | 
			
		||||
 mm/khugepaged.c           |   2 +-
 | 
			
		||||
 mm/memory.c               |  10 ++--
 | 
			
		||||
 mm/migrate.c              |   2 +-
 | 
			
		||||
 mm/swap.c                 |  22 +++++---
 | 
			
		||||
 mm/swapfile.c             |   2 +-
 | 
			
		||||
 mm/userfaultfd.c          |   2 +-
 | 
			
		||||
 mm/vmscan.c               |  91 ++++++++++++++++++++++++++++++-
 | 
			
		||||
 mm/workingset.c           | 112 ++++++++++++++++++++++++++++++++++++++
 | 
			
		||||
 12 files changed, 269 insertions(+), 22 deletions(-)
 | 
			
		||||
 | 
			
		||||
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
 | 
			
		||||
index ae3e3826dd7f..f3b99f65a652 100644
 | 
			
		||||
--- a/include/linux/mm_inline.h
 | 
			
		||||
+++ b/include/linux/mm_inline.h
 | 
			
		||||
@@ -103,6 +103,12 @@ static inline int lru_gen_from_seq(unsigned long seq)
 | 
			
		||||
 	return seq % MAX_NR_GENS;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+/* Convert the level of usage to a tier. See the comment on MAX_NR_TIERS. */
 | 
			
		||||
+static inline int lru_tier_from_usage(int usage)
 | 
			
		||||
+{
 | 
			
		||||
+	return order_base_2(usage + 1);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 /* Return a proper index regardless whether we keep a full history of stats. */
 | 
			
		||||
 static inline int hist_from_seq_or_gen(int seq_or_gen)
 | 
			
		||||
 {
 | 
			
		||||
@@ -245,6 +251,36 @@ static inline bool lru_gen_deletion(struct page *page, struct lruvec *lruvec)
 | 
			
		||||
 	return true;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+/* Return the level of usage of a page. See the comment on MAX_NR_TIERS. */
 | 
			
		||||
+static inline int page_tier_usage(struct page *page)
 | 
			
		||||
+{
 | 
			
		||||
+	unsigned long flags = READ_ONCE(page->flags);
 | 
			
		||||
+
 | 
			
		||||
+	return flags & BIT(PG_workingset) ?
 | 
			
		||||
+	       ((flags & LRU_USAGE_MASK) >> LRU_USAGE_PGOFF) + 1 : 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+/* Increment the usage counter after a page is accessed via file descriptors. */
 | 
			
		||||
+static inline void page_inc_usage(struct page *page)
 | 
			
		||||
+{
 | 
			
		||||
+	unsigned long usage;
 | 
			
		||||
+	unsigned long old_flags, new_flags;
 | 
			
		||||
+
 | 
			
		||||
+	do {
 | 
			
		||||
+		old_flags = READ_ONCE(page->flags);
 | 
			
		||||
+
 | 
			
		||||
+		if (!(old_flags & BIT(PG_workingset))) {
 | 
			
		||||
+			new_flags = old_flags | BIT(PG_workingset);
 | 
			
		||||
+			continue;
 | 
			
		||||
+		}
 | 
			
		||||
+
 | 
			
		||||
+		usage = (old_flags & LRU_USAGE_MASK) + BIT(LRU_USAGE_PGOFF);
 | 
			
		||||
+
 | 
			
		||||
+		new_flags = (old_flags & ~LRU_USAGE_MASK) | min(usage, LRU_USAGE_MASK);
 | 
			
		||||
+	} while (new_flags != old_flags &&
 | 
			
		||||
+		 cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 #else /* CONFIG_LRU_GEN */
 | 
			
		||||
 
 | 
			
		||||
 static inline bool lru_gen_enabled(void)
 | 
			
		||||
@@ -262,6 +298,10 @@ static inline bool lru_gen_deletion(struct page *page, struct lruvec *lruvec)
 | 
			
		||||
 	return false;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+static inline void page_inc_usage(struct page *page)
 | 
			
		||||
+{
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 #endif /* CONFIG_LRU_GEN */
 | 
			
		||||
 
 | 
			
		||||
 static __always_inline void add_page_to_lru_list(struct page *page,
 | 
			
		||||
diff --git a/include/linux/swap.h b/include/linux/swap.h
 | 
			
		||||
index 144727041e78..30b1f15f5c6e 100644
 | 
			
		||||
--- a/include/linux/swap.h
 | 
			
		||||
+++ b/include/linux/swap.h
 | 
			
		||||
@@ -365,8 +365,8 @@ extern void deactivate_page(struct page *page);
 | 
			
		||||
 extern void mark_page_lazyfree(struct page *page);
 | 
			
		||||
 extern void swap_setup(void);
 | 
			
		||||
 
 | 
			
		||||
-extern void lru_cache_add_inactive_or_unevictable(struct page *page,
 | 
			
		||||
-						struct vm_area_struct *vma);
 | 
			
		||||
+extern void lru_cache_add_page_vma(struct page *page, struct vm_area_struct *vma,
 | 
			
		||||
+				   bool faulting);
 | 
			
		||||
 
 | 
			
		||||
 /* linux/mm/vmscan.c */
 | 
			
		||||
 extern unsigned long zone_reclaimable_pages(struct zone *zone);
 | 
			
		||||
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
 | 
			
		||||
index 6addc9780319..4e93e5602723 100644
 | 
			
		||||
--- a/kernel/events/uprobes.c
 | 
			
		||||
+++ b/kernel/events/uprobes.c
 | 
			
		||||
@@ -184,7 +184,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 | 
			
		||||
 	if (new_page) {
 | 
			
		||||
 		get_page(new_page);
 | 
			
		||||
 		page_add_new_anon_rmap(new_page, vma, addr, false);
 | 
			
		||||
-		lru_cache_add_inactive_or_unevictable(new_page, vma);
 | 
			
		||||
+		lru_cache_add_page_vma(new_page, vma, false);
 | 
			
		||||
 	} else
 | 
			
		||||
 		/* no new page, just dec_mm_counter for old_page */
 | 
			
		||||
 		dec_mm_counter(mm, MM_ANONPAGES);
 | 
			
		||||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
 | 
			
		||||
index 8ac9093e5a0d..681da4a3cf61 100644
 | 
			
		||||
--- a/mm/huge_memory.c
 | 
			
		||||
+++ b/mm/huge_memory.c
 | 
			
		||||
@@ -636,7 +636,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 | 
			
		||||
 		entry = mk_huge_pmd(page, vma->vm_page_prot);
 | 
			
		||||
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 | 
			
		||||
 		page_add_new_anon_rmap(page, vma, haddr, true);
 | 
			
		||||
-		lru_cache_add_inactive_or_unevictable(page, vma);
 | 
			
		||||
+		lru_cache_add_page_vma(page, vma, true);
 | 
			
		||||
 		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
 | 
			
		||||
 		set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
 | 
			
		||||
 		update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 | 
			
		||||
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
 | 
			
		||||
index 6c0185fdd815..09e5346c2754 100644
 | 
			
		||||
--- a/mm/khugepaged.c
 | 
			
		||||
+++ b/mm/khugepaged.c
 | 
			
		||||
@@ -1198,7 +1198,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 | 
			
		||||
 	spin_lock(pmd_ptl);
 | 
			
		||||
 	BUG_ON(!pmd_none(*pmd));
 | 
			
		||||
 	page_add_new_anon_rmap(new_page, vma, address, true);
 | 
			
		||||
-	lru_cache_add_inactive_or_unevictable(new_page, vma);
 | 
			
		||||
+	lru_cache_add_page_vma(new_page, vma, true);
 | 
			
		||||
 	pgtable_trans_huge_deposit(mm, pmd, pgtable);
 | 
			
		||||
 	set_pmd_at(mm, address, pmd, _pmd);
 | 
			
		||||
 	update_mmu_cache_pmd(vma, address, pmd);
 | 
			
		||||
diff --git a/mm/memory.c b/mm/memory.c
 | 
			
		||||
index 730daa00952b..a76196885f92 100644
 | 
			
		||||
--- a/mm/memory.c
 | 
			
		||||
+++ b/mm/memory.c
 | 
			
		||||
@@ -839,7 +839,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
 | 
			
		||||
 	copy_user_highpage(new_page, page, addr, src_vma);
 | 
			
		||||
 	__SetPageUptodate(new_page);
 | 
			
		||||
 	page_add_new_anon_rmap(new_page, dst_vma, addr, false);
 | 
			
		||||
-	lru_cache_add_inactive_or_unevictable(new_page, dst_vma);
 | 
			
		||||
+	lru_cache_add_page_vma(new_page, dst_vma, false);
 | 
			
		||||
 	rss[mm_counter(new_page)]++;
 | 
			
		||||
 
 | 
			
		||||
 	/* All done, just insert the new page copy in the child */
 | 
			
		||||
@@ -2950,7 +2950,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 | 
			
		||||
 		 */
 | 
			
		||||
 		ptep_clear_flush_notify(vma, vmf->address, vmf->pte);
 | 
			
		||||
 		page_add_new_anon_rmap(new_page, vma, vmf->address, false);
 | 
			
		||||
-		lru_cache_add_inactive_or_unevictable(new_page, vma);
 | 
			
		||||
+		lru_cache_add_page_vma(new_page, vma, true);
 | 
			
		||||
 		/*
 | 
			
		||||
 		 * We call the notify macro here because, when using secondary
 | 
			
		||||
 		 * mmu page tables (such as kvm shadow page tables), we want the
 | 
			
		||||
@@ -3479,7 +3479,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 | 
			
		||||
 	/* ksm created a completely new copy */
 | 
			
		||||
 	if (unlikely(page != swapcache && swapcache)) {
 | 
			
		||||
 		page_add_new_anon_rmap(page, vma, vmf->address, false);
 | 
			
		||||
-		lru_cache_add_inactive_or_unevictable(page, vma);
 | 
			
		||||
+		lru_cache_add_page_vma(page, vma, true);
 | 
			
		||||
 	} else {
 | 
			
		||||
 		do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
 | 
			
		||||
 	}
 | 
			
		||||
@@ -3625,7 +3625,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 | 
			
		||||
 
 | 
			
		||||
 	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
 | 
			
		||||
 	page_add_new_anon_rmap(page, vma, vmf->address, false);
 | 
			
		||||
-	lru_cache_add_inactive_or_unevictable(page, vma);
 | 
			
		||||
+	lru_cache_add_page_vma(page, vma, true);
 | 
			
		||||
 setpte:
 | 
			
		||||
 	set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
 | 
			
		||||
 
 | 
			
		||||
@@ -3793,7 +3793,7 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
 | 
			
		||||
 	if (write && !(vma->vm_flags & VM_SHARED)) {
 | 
			
		||||
 		inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
 | 
			
		||||
 		page_add_new_anon_rmap(page, vma, addr, false);
 | 
			
		||||
-		lru_cache_add_inactive_or_unevictable(page, vma);
 | 
			
		||||
+		lru_cache_add_page_vma(page, vma, true);
 | 
			
		||||
 	} else {
 | 
			
		||||
 		inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
 | 
			
		||||
 		page_add_file_rmap(page, false);
 | 
			
		||||
diff --git a/mm/migrate.c b/mm/migrate.c
 | 
			
		||||
index b234c3f3acb7..d3307c9eced4 100644
 | 
			
		||||
--- a/mm/migrate.c
 | 
			
		||||
+++ b/mm/migrate.c
 | 
			
		||||
@@ -2967,7 +2967,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 | 
			
		||||
 	inc_mm_counter(mm, MM_ANONPAGES);
 | 
			
		||||
 	page_add_new_anon_rmap(page, vma, addr, false);
 | 
			
		||||
 	if (!is_zone_device_page(page))
 | 
			
		||||
-		lru_cache_add_inactive_or_unevictable(page, vma);
 | 
			
		||||
+		lru_cache_add_page_vma(page, vma, false);
 | 
			
		||||
 	get_page(page);
 | 
			
		||||
 
 | 
			
		||||
 	if (flush) {
 | 
			
		||||
diff --git a/mm/swap.c b/mm/swap.c
 | 
			
		||||
index dfb48cf9c2c9..96ce95eeb2c9 100644
 | 
			
		||||
--- a/mm/swap.c
 | 
			
		||||
+++ b/mm/swap.c
 | 
			
		||||
@@ -433,6 +433,8 @@ void mark_page_accessed(struct page *page)
 | 
			
		||||
 		 * this list is never rotated or maintained, so marking an
 | 
			
		||||
 		 * evictable page accessed has no effect.
 | 
			
		||||
 		 */
 | 
			
		||||
+	} else if (lru_gen_enabled()) {
 | 
			
		||||
+		page_inc_usage(page);
 | 
			
		||||
 	} else if (!PageActive(page)) {
 | 
			
		||||
 		/*
 | 
			
		||||
 		 * If the page is on the LRU, queue it for activation via
 | 
			
		||||
@@ -478,15 +480,14 @@ void lru_cache_add(struct page *page)
 | 
			
		||||
 EXPORT_SYMBOL(lru_cache_add);
 | 
			
		||||
 
 | 
			
		||||
 /**
 | 
			
		||||
- * lru_cache_add_inactive_or_unevictable
 | 
			
		||||
+ * lru_cache_add_page_vma
 | 
			
		||||
  * @page:  the page to be added to LRU
 | 
			
		||||
  * @vma:   vma in which page is mapped for determining reclaimability
 | 
			
		||||
  *
 | 
			
		||||
- * Place @page on the inactive or unevictable LRU list, depending on its
 | 
			
		||||
- * evictability.
 | 
			
		||||
+ * Place @page on an LRU list, depending on its evictability.
 | 
			
		||||
  */
 | 
			
		||||
-void lru_cache_add_inactive_or_unevictable(struct page *page,
 | 
			
		||||
-					 struct vm_area_struct *vma)
 | 
			
		||||
+void lru_cache_add_page_vma(struct page *page, struct vm_area_struct *vma,
 | 
			
		||||
+			    bool faulting)
 | 
			
		||||
 {
 | 
			
		||||
 	bool unevictable;
 | 
			
		||||
 
 | 
			
		||||
@@ -503,6 +504,11 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
 | 
			
		||||
 		__mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
 | 
			
		||||
 		count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
 | 
			
		||||
 	}
 | 
			
		||||
+
 | 
			
		||||
+	/* tell the multigenerational lru that the page is being faulted in */
 | 
			
		||||
+	if (lru_gen_enabled() && !unevictable && faulting)
 | 
			
		||||
+		SetPageActive(page);
 | 
			
		||||
+
 | 
			
		||||
 	lru_cache_add(page);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
@@ -529,7 +535,7 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
 | 
			
		||||
  */
 | 
			
		||||
 static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
 | 
			
		||||
 {
 | 
			
		||||
-	bool active = PageActive(page);
 | 
			
		||||
+	bool active = PageActive(page) || lru_gen_enabled();
 | 
			
		||||
 	int nr_pages = thp_nr_pages(page);
 | 
			
		||||
 
 | 
			
		||||
 	if (PageUnevictable(page))
 | 
			
		||||
@@ -569,7 +575,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
 | 
			
		||||
 
 | 
			
		||||
 static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
 | 
			
		||||
 {
 | 
			
		||||
-	if (PageActive(page) && !PageUnevictable(page)) {
 | 
			
		||||
+	if (!PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) {
 | 
			
		||||
 		int nr_pages = thp_nr_pages(page);
 | 
			
		||||
 
 | 
			
		||||
 		del_page_from_lru_list(page, lruvec);
 | 
			
		||||
@@ -684,7 +690,7 @@ void deactivate_file_page(struct page *page)
 | 
			
		||||
  */
 | 
			
		||||
 void deactivate_page(struct page *page)
 | 
			
		||||
 {
 | 
			
		||||
-	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
 | 
			
		||||
+	if (PageLRU(page) && !PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) {
 | 
			
		||||
 		struct pagevec *pvec;
 | 
			
		||||
 
 | 
			
		||||
 		local_lock(&lru_pvecs.lock);
 | 
			
		||||
diff --git a/mm/swapfile.c b/mm/swapfile.c
 | 
			
		||||
index 3598b668f533..549e94318b2f 100644
 | 
			
		||||
--- a/mm/swapfile.c
 | 
			
		||||
+++ b/mm/swapfile.c
 | 
			
		||||
@@ -1936,7 +1936,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 | 
			
		||||
 		page_add_anon_rmap(page, vma, addr, false);
 | 
			
		||||
 	} else { /* ksm created a completely new copy */
 | 
			
		||||
 		page_add_new_anon_rmap(page, vma, addr, false);
 | 
			
		||||
-		lru_cache_add_inactive_or_unevictable(page, vma);
 | 
			
		||||
+		lru_cache_add_page_vma(page, vma, false);
 | 
			
		||||
 	}
 | 
			
		||||
 	swap_free(entry);
 | 
			
		||||
 out:
 | 
			
		||||
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
 | 
			
		||||
index e14b3820c6a8..175d55b4f594 100644
 | 
			
		||||
--- a/mm/userfaultfd.c
 | 
			
		||||
+++ b/mm/userfaultfd.c
 | 
			
		||||
@@ -123,7 +123,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 | 
			
		||||
 
 | 
			
		||||
 	inc_mm_counter(dst_mm, MM_ANONPAGES);
 | 
			
		||||
 	page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
 | 
			
		||||
-	lru_cache_add_inactive_or_unevictable(page, dst_vma);
 | 
			
		||||
+	lru_cache_add_page_vma(page, dst_vma, true);
 | 
			
		||||
 
 | 
			
		||||
 	set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
 | 
			
		||||
 
 | 
			
		||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
 | 
			
		||||
index f7bbfc0b1ebd..84d25079092e 100644
 | 
			
		||||
--- a/mm/vmscan.c
 | 
			
		||||
+++ b/mm/vmscan.c
 | 
			
		||||
@@ -1094,9 +1094,11 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 | 
			
		||||
 
 | 
			
		||||
 	if (PageSwapCache(page)) {
 | 
			
		||||
 		swp_entry_t swap = { .val = page_private(page) };
 | 
			
		||||
-		mem_cgroup_swapout(page, swap);
 | 
			
		||||
+
 | 
			
		||||
+		/* get a shadow entry before page_memcg() is cleared */
 | 
			
		||||
 		if (reclaimed && !mapping_exiting(mapping))
 | 
			
		||||
 			shadow = workingset_eviction(page, target_memcg);
 | 
			
		||||
+		mem_cgroup_swapout(page, swap);
 | 
			
		||||
 		__delete_from_swap_cache(page, swap, shadow);
 | 
			
		||||
 		xa_unlock_irqrestore(&mapping->i_pages, flags);
 | 
			
		||||
 		put_swap_page(page, swap);
 | 
			
		||||
@@ -2780,6 +2782,93 @@ static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
 | 
			
		||||
 	       get_nr_gens(lruvec, 1) <= MAX_NR_GENS;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+/******************************************************************************
 | 
			
		||||
+ *                          refault feedback loop
 | 
			
		||||
+ ******************************************************************************/
 | 
			
		||||
+
 | 
			
		||||
+/*
 | 
			
		||||
+ * A feedback loop modeled after the PID controller. Currently supports the
 | 
			
		||||
+ * proportional (P) and the integral (I) terms; the derivative (D) term can be
 | 
			
		||||
+ * added if necessary. The setpoint (SP) is the desired position; the process
 | 
			
		||||
+ * variable (PV) is the measured position. The error is the difference between
 | 
			
		||||
+ * the SP and the PV. A positive error results in a positive control output
 | 
			
		||||
+ * correction, which, in our case, is to allow eviction.
 | 
			
		||||
+ *
 | 
			
		||||
+ * The P term is the current refault rate refaulted/(evicted+activated), which
 | 
			
		||||
+ * has a weight of 1. The I term is the arithmetic mean of the last N refault
 | 
			
		||||
+ * rates, weighted by geometric series 1/2, 1/4, ..., 1/(1<<N).
 | 
			
		||||
+ *
 | 
			
		||||
+ * Our goal is to make sure upper tiers have similar refault rates as the base
 | 
			
		||||
+ * tier. That is we try to be fair to all tiers by maintaining similar refault
 | 
			
		||||
+ * rates across them.
 | 
			
		||||
+ */
 | 
			
		||||
+struct controller_pos {
 | 
			
		||||
+	unsigned long refaulted;
 | 
			
		||||
+	unsigned long total;
 | 
			
		||||
+	int gain;
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+static void read_controller_pos(struct controller_pos *pos, struct lruvec *lruvec,
 | 
			
		||||
+				int type, int tier, int gain)
 | 
			
		||||
+{
 | 
			
		||||
+	struct lrugen *lrugen = &lruvec->evictable;
 | 
			
		||||
+	int hist = hist_from_seq_or_gen(lrugen->min_seq[type]);
 | 
			
		||||
+
 | 
			
		||||
+	pos->refaulted = lrugen->avg_refaulted[type][tier] +
 | 
			
		||||
+			 atomic_long_read(&lrugen->refaulted[hist][type][tier]);
 | 
			
		||||
+	pos->total = lrugen->avg_total[type][tier] +
 | 
			
		||||
+		     atomic_long_read(&lrugen->evicted[hist][type][tier]);
 | 
			
		||||
+	if (tier)
 | 
			
		||||
+		pos->total += lrugen->activated[hist][type][tier - 1];
 | 
			
		||||
+	pos->gain = gain;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void reset_controller_pos(struct lruvec *lruvec, int gen, int type)
 | 
			
		||||
+{
 | 
			
		||||
+	int tier;
 | 
			
		||||
+	int hist = hist_from_seq_or_gen(gen);
 | 
			
		||||
+	struct lrugen *lrugen = &lruvec->evictable;
 | 
			
		||||
+	bool carryover = gen == lru_gen_from_seq(lrugen->min_seq[type]);
 | 
			
		||||
+
 | 
			
		||||
+	if (!carryover && NR_STAT_GENS == 1)
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
+	for (tier = 0; tier < MAX_NR_TIERS; tier++) {
 | 
			
		||||
+		if (carryover) {
 | 
			
		||||
+			unsigned long sum;
 | 
			
		||||
+
 | 
			
		||||
+			sum = lrugen->avg_refaulted[type][tier] +
 | 
			
		||||
+			      atomic_long_read(&lrugen->refaulted[hist][type][tier]);
 | 
			
		||||
+			WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2);
 | 
			
		||||
+
 | 
			
		||||
+			sum = lrugen->avg_total[type][tier] +
 | 
			
		||||
+			      atomic_long_read(&lrugen->evicted[hist][type][tier]);
 | 
			
		||||
+			if (tier)
 | 
			
		||||
+				sum += lrugen->activated[hist][type][tier - 1];
 | 
			
		||||
+			WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2);
 | 
			
		||||
+
 | 
			
		||||
+			if (NR_STAT_GENS > 1)
 | 
			
		||||
+				continue;
 | 
			
		||||
+		}
 | 
			
		||||
+
 | 
			
		||||
+		atomic_long_set(&lrugen->refaulted[hist][type][tier], 0);
 | 
			
		||||
+		atomic_long_set(&lrugen->evicted[hist][type][tier], 0);
 | 
			
		||||
+		if (tier)
 | 
			
		||||
+			WRITE_ONCE(lrugen->activated[hist][type][tier - 1], 0);
 | 
			
		||||
+	}
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static bool positive_ctrl_err(struct controller_pos *sp, struct controller_pos *pv)
 | 
			
		||||
+{
 | 
			
		||||
+	/*
 | 
			
		||||
+	 * Allow eviction if the PV has a limited number of refaulted pages or a
 | 
			
		||||
+	 * lower refault rate than the SP.
 | 
			
		||||
+	 */
 | 
			
		||||
+	return pv->refaulted < SWAP_CLUSTER_MAX ||
 | 
			
		||||
+	       pv->refaulted * max(sp->total, 1UL) * sp->gain <=
 | 
			
		||||
+	       sp->refaulted * max(pv->total, 1UL) * pv->gain;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 /******************************************************************************
 | 
			
		||||
  *                          state change
 | 
			
		||||
  ******************************************************************************/
 | 
			
		||||
diff --git a/mm/workingset.c b/mm/workingset.c
 | 
			
		||||
index edb8aed2587e..3f3f03d51ea7 100644
 | 
			
		||||
--- a/mm/workingset.c
 | 
			
		||||
+++ b/mm/workingset.c
 | 
			
		||||
@@ -201,6 +201,110 @@ static unsigned long unpack_shadow(void *shadow, int *memcg_id, struct pglist_da
 | 
			
		||||
 	return val >> MEM_CGROUP_ID_SHIFT;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+#ifdef CONFIG_LRU_GEN
 | 
			
		||||
+
 | 
			
		||||
+#if LRU_GEN_SHIFT + LRU_USAGE_SHIFT >= EVICTION_SHIFT
 | 
			
		||||
+#error "Please try smaller NODES_SHIFT, NR_LRU_GENS and TIERS_PER_GEN configurations"
 | 
			
		||||
+#endif
 | 
			
		||||
+
 | 
			
		||||
+static void page_set_usage(struct page *page, int usage)
 | 
			
		||||
+{
 | 
			
		||||
+	unsigned long old_flags, new_flags;
 | 
			
		||||
+
 | 
			
		||||
+	VM_BUG_ON(usage > BIT(LRU_USAGE_WIDTH));
 | 
			
		||||
+
 | 
			
		||||
+	if (!usage)
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
+	do {
 | 
			
		||||
+		old_flags = READ_ONCE(page->flags);
 | 
			
		||||
+		new_flags = (old_flags & ~LRU_USAGE_MASK) | LRU_TIER_FLAGS |
 | 
			
		||||
+			    ((usage - 1UL) << LRU_USAGE_PGOFF);
 | 
			
		||||
+	} while (new_flags != old_flags &&
 | 
			
		||||
+		 cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+/* Return a token to be stored in the shadow entry of a page being evicted. */
 | 
			
		||||
+static void *lru_gen_eviction(struct page *page)
 | 
			
		||||
+{
 | 
			
		||||
+	int hist, tier;
 | 
			
		||||
+	unsigned long token;
 | 
			
		||||
+	unsigned long min_seq;
 | 
			
		||||
+	struct lruvec *lruvec;
 | 
			
		||||
+	struct lrugen *lrugen;
 | 
			
		||||
+	int type = page_is_file_lru(page);
 | 
			
		||||
+	int usage = page_tier_usage(page);
 | 
			
		||||
+	struct mem_cgroup *memcg = page_memcg(page);
 | 
			
		||||
+	struct pglist_data *pgdat = page_pgdat(page);
 | 
			
		||||
+
 | 
			
		||||
+	lruvec = mem_cgroup_lruvec(memcg, pgdat);
 | 
			
		||||
+	lrugen = &lruvec->evictable;
 | 
			
		||||
+	min_seq = READ_ONCE(lrugen->min_seq[type]);
 | 
			
		||||
+	token = (min_seq << LRU_USAGE_SHIFT) | usage;
 | 
			
		||||
+
 | 
			
		||||
+	hist = hist_from_seq_or_gen(min_seq);
 | 
			
		||||
+	tier = lru_tier_from_usage(usage);
 | 
			
		||||
+	atomic_long_add(thp_nr_pages(page), &lrugen->evicted[hist][type][tier]);
 | 
			
		||||
+
 | 
			
		||||
+	return pack_shadow(mem_cgroup_id(memcg), pgdat, token);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+/* Account a refaulted page based on the token stored in its shadow entry. */
 | 
			
		||||
+static void lru_gen_refault(struct page *page, void *shadow)
 | 
			
		||||
+{
 | 
			
		||||
+	int hist, tier, usage;
 | 
			
		||||
+	int memcg_id;
 | 
			
		||||
+	unsigned long token;
 | 
			
		||||
+	unsigned long min_seq;
 | 
			
		||||
+	struct lruvec *lruvec;
 | 
			
		||||
+	struct lrugen *lrugen;
 | 
			
		||||
+	struct pglist_data *pgdat;
 | 
			
		||||
+	struct mem_cgroup *memcg;
 | 
			
		||||
+	int type = page_is_file_lru(page);
 | 
			
		||||
+
 | 
			
		||||
+	token = unpack_shadow(shadow, &memcg_id, &pgdat);
 | 
			
		||||
+	if (page_pgdat(page) != pgdat)
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
+	rcu_read_lock();
 | 
			
		||||
+	memcg = page_memcg_rcu(page);
 | 
			
		||||
+	if (mem_cgroup_id(memcg) != memcg_id)
 | 
			
		||||
+		goto unlock;
 | 
			
		||||
+
 | 
			
		||||
+	usage = token & (BIT(LRU_USAGE_SHIFT) - 1);
 | 
			
		||||
+	token >>= LRU_USAGE_SHIFT;
 | 
			
		||||
+
 | 
			
		||||
+	lruvec = mem_cgroup_lruvec(memcg, pgdat);
 | 
			
		||||
+	lrugen = &lruvec->evictable;
 | 
			
		||||
+	min_seq = READ_ONCE(lrugen->min_seq[type]);
 | 
			
		||||
+	if (token != (min_seq & (EVICTION_MASK >> LRU_USAGE_SHIFT)))
 | 
			
		||||
+		goto unlock;
 | 
			
		||||
+
 | 
			
		||||
+	page_set_usage(page, usage);
 | 
			
		||||
+
 | 
			
		||||
+	hist = hist_from_seq_or_gen(min_seq);
 | 
			
		||||
+	tier = lru_tier_from_usage(usage);
 | 
			
		||||
+	atomic_long_add(thp_nr_pages(page), &lrugen->refaulted[hist][type][tier]);
 | 
			
		||||
+	inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + type);
 | 
			
		||||
+	if (tier)
 | 
			
		||||
+		inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type);
 | 
			
		||||
+unlock:
 | 
			
		||||
+	rcu_read_unlock();
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+#else /* CONFIG_LRU_GEN */
 | 
			
		||||
+
 | 
			
		||||
+static void *lru_gen_eviction(struct page *page)
 | 
			
		||||
+{
 | 
			
		||||
+	return NULL;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void lru_gen_refault(struct page *page, void *shadow)
 | 
			
		||||
+{
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+#endif /* CONFIG_LRU_GEN */
 | 
			
		||||
+
 | 
			
		||||
 /**
 | 
			
		||||
  * workingset_age_nonresident - age non-resident entries as LRU ages
 | 
			
		||||
  * @lruvec: the lruvec that was aged
 | 
			
		||||
@@ -249,6 +353,9 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
 | 
			
		||||
 	VM_BUG_ON_PAGE(page_count(page), page);
 | 
			
		||||
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 | 
			
		||||
 
 | 
			
		||||
+	if (lru_gen_enabled())
 | 
			
		||||
+		return lru_gen_eviction(page);
 | 
			
		||||
+
 | 
			
		||||
 	lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
 | 
			
		||||
 	/* XXX: target_memcg can be NULL, go through lruvec */
 | 
			
		||||
 	memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
 | 
			
		||||
@@ -283,6 +390,11 @@ void workingset_refault(struct page *page, void *shadow)
 | 
			
		||||
 	bool workingset;
 | 
			
		||||
 	int memcgid;
 | 
			
		||||
 
 | 
			
		||||
+	if (lru_gen_enabled()) {
 | 
			
		||||
+		lru_gen_refault(page, shadow);
 | 
			
		||||
+		return;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
 	eviction = unpack_shadow(shadow, &memcgid, &pgdat);
 | 
			
		||||
 
 | 
			
		||||
 	rcu_read_lock();
 | 
			
		||||
-- 
 | 
			
		||||
2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,789 @@
 | 
			
		||||
From mboxrd@z Thu Jan  1 00:00:00 1970
 | 
			
		||||
Return-Path: <linux-kernel-owner@kernel.org>
 | 
			
		||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
 | 
			
		||||
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
 | 
			
		||||
X-Spam-Level: 
 | 
			
		||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
 | 
			
		||||
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
 | 
			
		||||
	INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
 | 
			
		||||
	USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
 | 
			
		||||
	version=3.4.0
 | 
			
		||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
 | 
			
		||||
	by smtp.lore.kernel.org (Postfix) with ESMTP id 658D8C433B4
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:28 +0000 (UTC)
 | 
			
		||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
 | 
			
		||||
	by mail.kernel.org (Postfix) with ESMTP id 477CA611BE
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:28 +0000 (UTC)
 | 
			
		||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 | 
			
		||||
        id S231165AbhETGzr (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:47 -0400
 | 
			
		||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37908 "EHLO
 | 
			
		||||
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 | 
			
		||||
        with ESMTP id S230424AbhETGzh (ORCPT
 | 
			
		||||
        <rfc822;linux-kernel@vger.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:37 -0400
 | 
			
		||||
Received: from mail-qv1-xf4a.google.com (mail-qv1-xf4a.google.com [IPv6:2607:f8b0:4864:20::f4a])
 | 
			
		||||
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id EAF95C061574
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:15 -0700 (PDT)
 | 
			
		||||
Received: by mail-qv1-xf4a.google.com with SMTP id r11-20020a0cb28b0000b02901c87a178503so12393761qve.22
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:15 -0700 (PDT)
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=google.com; s=20161025;
 | 
			
		||||
        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
 | 
			
		||||
         :cc;
 | 
			
		||||
        bh=P78haeNjzr5Qg1JjQymXtCqtqXQumRFjJWFx1f2kmKM=;
 | 
			
		||||
        b=Tjsj7/GeS8mUtREXLxPPRM0sVotzXnOQ/Dq8MvDajXLm9nT1QjyleqN5ONXOxfHJSb
 | 
			
		||||
         gOKQ1YJhBwyuC3HCKJXdOCqgqOmQbjJGjOkM9uXhZa9/9W+Bvnszx1RDX4YRwIqqWgFX
 | 
			
		||||
         flJvQvCE2SODYJwvTs6wKWKKQlvvw9WY05ct8oakXuEPnAOblfqTR+pbk7GoCJo67kNf
 | 
			
		||||
         enTegbyR2yRwGi9N5coUMJM8TYP+BoBWQaHNTVR3nL7a6nEjAg1IrL1w4WaZ+/fsdDdF
 | 
			
		||||
         6FlorKJ31sPCd2wxkCOnn+o98vuymHUDmyr+h9KxZtecLKHCkTsolSRuLiyHQvlzqY3q
 | 
			
		||||
         md3Q==
 | 
			
		||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=1e100.net; s=20161025;
 | 
			
		||||
        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
 | 
			
		||||
         :references:subject:from:to:cc;
 | 
			
		||||
        bh=P78haeNjzr5Qg1JjQymXtCqtqXQumRFjJWFx1f2kmKM=;
 | 
			
		||||
        b=oK3flk/MdWi/bqnKFxC7O7BqH1b1apkGTQgT4OLVuSurUs5o7HcTTMvjXuljN/KmMh
 | 
			
		||||
         /OGEWIkS+BHD6OkEE9W7Q/5GoGXL7Np1sLByjbiNrfCNZHtmEvYLHtP9lYulkcWaLTgA
 | 
			
		||||
         XEr3n9zWofP9Jw0bPM24RW8jqzAlzld2tkrpDSgnfmMEpyzmjuFEURnKsx/ubUbuQ8Vd
 | 
			
		||||
         rkIngqIt1YDBI+x6EZEdq4OpP+8H9TDr8KZBjUVfzpvASnMYn2y9gZX4Obd5/t+wys2m
 | 
			
		||||
         zn5+4aqeR8mtxQVzHwPM48LG5wPqbTtMF0+Mhoba0Enk55ZL29+xKT00ltswnvHNJDj9
 | 
			
		||||
         UduQ==
 | 
			
		||||
X-Gm-Message-State: AOAM5324lhHETXZQ7vXVsQ3UhfF140iLgXV/soebRFc0ECp355pnwH5X
 | 
			
		||||
        pEYaLnlH20Lc9hBvEeYp/HXipMEwsdE=
 | 
			
		||||
X-Google-Smtp-Source: ABdhPJxAkOjDRLTPPi669WBE6Bb6QiyW8Wr0JRRG09c2L2y7UvYt7Th6JQxML99ZXqbjrM7T5yJPx76NwGo=
 | 
			
		||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
 | 
			
		||||
 (user=yuzhao job=sendgmr) by 2002:a05:6214:76b:: with SMTP id
 | 
			
		||||
 f11mr3992753qvz.8.1621493655061; Wed, 19 May 2021 23:54:15 -0700 (PDT)
 | 
			
		||||
Date:   Thu, 20 May 2021 00:53:50 -0600
 | 
			
		||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
Message-Id: <20210520065355.2736558-10-yuzhao@google.com>
 | 
			
		||||
Mime-Version: 1.0
 | 
			
		||||
References: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
Subject: [PATCH v3 09/14] mm: multigenerational lru: mm_struct list
 | 
			
		||||
From:   Yu Zhao <yuzhao@google.com>
 | 
			
		||||
To:     linux-mm@kvack.org
 | 
			
		||||
Cc:     Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
 | 
			
		||||
        Andrew Morton <akpm@linux-foundation.org>,
 | 
			
		||||
        Dave Chinner <david@fromorbit.com>,
 | 
			
		||||
        Dave Hansen <dave.hansen@linux.intel.com>,
 | 
			
		||||
        Donald Carr <sirspudd@gmail.com>,
 | 
			
		||||
        Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
 | 
			
		||||
        Johannes Weiner <hannes@cmpxchg.org>,
 | 
			
		||||
        Jonathan Corbet <corbet@lwn.net>,
 | 
			
		||||
        Joonsoo Kim <iamjoonsoo.kim@lge.com>,
 | 
			
		||||
        Konstantin Kharlamov <hi-angel@yandex.ru>,
 | 
			
		||||
        Marcus Seyfarth <m.seyfarth@gmail.com>,
 | 
			
		||||
        Matthew Wilcox <willy@infradead.org>,
 | 
			
		||||
        Mel Gorman <mgorman@suse.de>,
 | 
			
		||||
        Miaohe Lin <linmiaohe@huawei.com>,
 | 
			
		||||
        Michael Larabel <michael@michaellarabel.com>,
 | 
			
		||||
        Michal Hocko <mhocko@suse.com>,
 | 
			
		||||
        Michel Lespinasse <michel@lespinasse.org>,
 | 
			
		||||
        Rik van Riel <riel@surriel.com>,
 | 
			
		||||
        Roman Gushchin <guro@fb.com>,
 | 
			
		||||
        Tim Chen <tim.c.chen@linux.intel.com>,
 | 
			
		||||
        Vlastimil Babka <vbabka@suse.cz>,
 | 
			
		||||
        Yang Shi <shy828301@gmail.com>,
 | 
			
		||||
        Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
 | 
			
		||||
        linux-kernel@vger.kernel.org, lkp@lists.01.org,
 | 
			
		||||
        page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
 | 
			
		||||
        Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
Content-Type: text/plain; charset="UTF-8"
 | 
			
		||||
Precedence: bulk
 | 
			
		||||
List-ID: <linux-kernel.vger.kernel.org>
 | 
			
		||||
X-Mailing-List: linux-kernel@vger.kernel.org
 | 
			
		||||
List-Archive: <https://lore.kernel.org/lkml/>
 | 
			
		||||
 | 
			
		||||
In order to scan page tables, we add an infrastructure to maintain
 | 
			
		||||
either a system-wide mm_struct list or per-memcg mm_struct lists, and
 | 
			
		||||
track whether an mm_struct is being used or has been used since the
 | 
			
		||||
last scan.
 | 
			
		||||
 | 
			
		||||
Multiple threads can concurrently work on the same mm_struct list, and
 | 
			
		||||
each of them will be given a different mm_struct belonging to a
 | 
			
		||||
process that has been scheduled since the last scan.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
 | 
			
		||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
---
 | 
			
		||||
 fs/exec.c                  |   2 +
 | 
			
		||||
 include/linux/memcontrol.h |   6 +
 | 
			
		||||
 include/linux/mm_types.h   | 107 ++++++++++++
 | 
			
		||||
 kernel/exit.c              |   1 +
 | 
			
		||||
 kernel/fork.c              |  10 ++
 | 
			
		||||
 kernel/kthread.c           |   1 +
 | 
			
		||||
 kernel/sched/core.c        |   2 +
 | 
			
		||||
 mm/memcontrol.c            |  28 ++++
 | 
			
		||||
 mm/vmscan.c                | 324 +++++++++++++++++++++++++++++++++++++
 | 
			
		||||
 9 files changed, 481 insertions(+)
 | 
			
		||||
 | 
			
		||||
diff --git a/fs/exec.c b/fs/exec.c
 | 
			
		||||
index 18594f11c31f..c691d4d7720c 100644
 | 
			
		||||
--- a/fs/exec.c
 | 
			
		||||
+++ b/fs/exec.c
 | 
			
		||||
@@ -1008,6 +1008,7 @@ static int exec_mmap(struct mm_struct *mm)
 | 
			
		||||
 	active_mm = tsk->active_mm;
 | 
			
		||||
 	tsk->active_mm = mm;
 | 
			
		||||
 	tsk->mm = mm;
 | 
			
		||||
+	lru_gen_add_mm(mm);
 | 
			
		||||
 	/*
 | 
			
		||||
 	 * This prevents preemption while active_mm is being loaded and
 | 
			
		||||
 	 * it and mm are being updated, which could cause problems for
 | 
			
		||||
@@ -1018,6 +1019,7 @@ static int exec_mmap(struct mm_struct *mm)
 | 
			
		||||
 	if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
 | 
			
		||||
 		local_irq_enable();
 | 
			
		||||
 	activate_mm(active_mm, mm);
 | 
			
		||||
+	lru_gen_switch_mm(active_mm, mm);
 | 
			
		||||
 	if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
 | 
			
		||||
 		local_irq_enable();
 | 
			
		||||
 	tsk->mm->vmacache_seqnum = 0;
 | 
			
		||||
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
 | 
			
		||||
index 6bcac3d91dd1..60601a997433 100644
 | 
			
		||||
--- a/include/linux/memcontrol.h
 | 
			
		||||
+++ b/include/linux/memcontrol.h
 | 
			
		||||
@@ -230,6 +230,8 @@ struct obj_cgroup {
 | 
			
		||||
 	};
 | 
			
		||||
 };
 | 
			
		||||
 
 | 
			
		||||
+struct lru_gen_mm_list;
 | 
			
		||||
+
 | 
			
		||||
 /*
 | 
			
		||||
  * The memory controller data structure. The memory controller controls both
 | 
			
		||||
  * page cache and RSS per cgroup. We would eventually like to provide
 | 
			
		||||
@@ -349,6 +351,10 @@ struct mem_cgroup {
 | 
			
		||||
 	struct deferred_split deferred_split_queue;
 | 
			
		||||
 #endif
 | 
			
		||||
 
 | 
			
		||||
+#ifdef CONFIG_LRU_GEN
 | 
			
		||||
+	struct lru_gen_mm_list *mm_list;
 | 
			
		||||
+#endif
 | 
			
		||||
+
 | 
			
		||||
 	struct mem_cgroup_per_node *nodeinfo[0];
 | 
			
		||||
 	/* WARNING: nodeinfo must be the last member here */
 | 
			
		||||
 };
 | 
			
		||||
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
 | 
			
		||||
index 5aacc1c10a45..b0f662555eae 100644
 | 
			
		||||
--- a/include/linux/mm_types.h
 | 
			
		||||
+++ b/include/linux/mm_types.h
 | 
			
		||||
@@ -15,6 +15,8 @@
 | 
			
		||||
 #include <linux/page-flags-layout.h>
 | 
			
		||||
 #include <linux/workqueue.h>
 | 
			
		||||
 #include <linux/seqlock.h>
 | 
			
		||||
+#include <linux/nodemask.h>
 | 
			
		||||
+#include <linux/mmdebug.h>
 | 
			
		||||
 
 | 
			
		||||
 #include <asm/mmu.h>
 | 
			
		||||
 
 | 
			
		||||
@@ -561,6 +563,22 @@ struct mm_struct {
 | 
			
		||||
 
 | 
			
		||||
 #ifdef CONFIG_IOMMU_SUPPORT
 | 
			
		||||
 		u32 pasid;
 | 
			
		||||
+#endif
 | 
			
		||||
+#ifdef CONFIG_LRU_GEN
 | 
			
		||||
+		struct {
 | 
			
		||||
+			/* the node of a global or per-memcg mm_struct list */
 | 
			
		||||
+			struct list_head list;
 | 
			
		||||
+#ifdef CONFIG_MEMCG
 | 
			
		||||
+			/* points to the memcg of the owner task above */
 | 
			
		||||
+			struct mem_cgroup *memcg;
 | 
			
		||||
+#endif
 | 
			
		||||
+			/* whether this mm_struct has been used since the last walk */
 | 
			
		||||
+			nodemask_t nodes;
 | 
			
		||||
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 | 
			
		||||
+			/* the number of CPUs using this mm_struct */
 | 
			
		||||
+			atomic_t nr_cpus;
 | 
			
		||||
+#endif
 | 
			
		||||
+		} lrugen;
 | 
			
		||||
 #endif
 | 
			
		||||
 	} __randomize_layout;
 | 
			
		||||
 
 | 
			
		||||
@@ -588,6 +606,95 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 | 
			
		||||
 	return (struct cpumask *)&mm->cpu_bitmap;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+#ifdef CONFIG_LRU_GEN
 | 
			
		||||
+
 | 
			
		||||
+void lru_gen_init_mm(struct mm_struct *mm);
 | 
			
		||||
+void lru_gen_add_mm(struct mm_struct *mm);
 | 
			
		||||
+void lru_gen_del_mm(struct mm_struct *mm);
 | 
			
		||||
+#ifdef CONFIG_MEMCG
 | 
			
		||||
+int lru_gen_alloc_mm_list(struct mem_cgroup *memcg);
 | 
			
		||||
+void lru_gen_free_mm_list(struct mem_cgroup *memcg);
 | 
			
		||||
+void lru_gen_migrate_mm(struct mm_struct *mm);
 | 
			
		||||
+#endif
 | 
			
		||||
+
 | 
			
		||||
+/* Track the usage of each mm_struct so that we can skip inactive ones. */
 | 
			
		||||
+static inline void lru_gen_switch_mm(struct mm_struct *old, struct mm_struct *new)
 | 
			
		||||
+{
 | 
			
		||||
+	/* exclude init_mm, efi_mm, etc. */
 | 
			
		||||
+	if (!core_kernel_data((unsigned long)old)) {
 | 
			
		||||
+		VM_BUG_ON(old == &init_mm);
 | 
			
		||||
+
 | 
			
		||||
+		nodes_setall(old->lrugen.nodes);
 | 
			
		||||
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 | 
			
		||||
+		atomic_dec(&old->lrugen.nr_cpus);
 | 
			
		||||
+		VM_BUG_ON_MM(atomic_read(&old->lrugen.nr_cpus) < 0, old);
 | 
			
		||||
+#endif
 | 
			
		||||
+	} else
 | 
			
		||||
+		VM_BUG_ON_MM(READ_ONCE(old->lrugen.list.prev) ||
 | 
			
		||||
+			     READ_ONCE(old->lrugen.list.next), old);
 | 
			
		||||
+
 | 
			
		||||
+	if (!core_kernel_data((unsigned long)new)) {
 | 
			
		||||
+		VM_BUG_ON(new == &init_mm);
 | 
			
		||||
+
 | 
			
		||||
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 | 
			
		||||
+		atomic_inc(&new->lrugen.nr_cpus);
 | 
			
		||||
+		VM_BUG_ON_MM(atomic_read(&new->lrugen.nr_cpus) < 0, new);
 | 
			
		||||
+#endif
 | 
			
		||||
+	} else
 | 
			
		||||
+		VM_BUG_ON_MM(READ_ONCE(new->lrugen.list.prev) ||
 | 
			
		||||
+			     READ_ONCE(new->lrugen.list.next), new);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+/* Return whether this mm_struct is being used on any CPUs. */
 | 
			
		||||
+static inline bool lru_gen_mm_is_active(struct mm_struct *mm)
 | 
			
		||||
+{
 | 
			
		||||
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 | 
			
		||||
+	return !cpumask_empty(mm_cpumask(mm));
 | 
			
		||||
+#else
 | 
			
		||||
+	return atomic_read(&mm->lrugen.nr_cpus);
 | 
			
		||||
+#endif
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+#else /* CONFIG_LRU_GEN */
 | 
			
		||||
+
 | 
			
		||||
+static inline void lru_gen_init_mm(struct mm_struct *mm)
 | 
			
		||||
+{
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static inline void lru_gen_add_mm(struct mm_struct *mm)
 | 
			
		||||
+{
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static inline void lru_gen_del_mm(struct mm_struct *mm)
 | 
			
		||||
+{
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+#ifdef CONFIG_MEMCG
 | 
			
		||||
+static inline int lru_gen_alloc_mm_list(struct mem_cgroup *memcg)
 | 
			
		||||
+{
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static inline void lru_gen_free_mm_list(struct mem_cgroup *memcg)
 | 
			
		||||
+{
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static inline void lru_gen_migrate_mm(struct mm_struct *mm)
 | 
			
		||||
+{
 | 
			
		||||
+}
 | 
			
		||||
+#endif
 | 
			
		||||
+
 | 
			
		||||
+static inline void lru_gen_switch_mm(struct mm_struct *old, struct mm_struct *new)
 | 
			
		||||
+{
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static inline bool lru_gen_mm_is_active(struct mm_struct *mm)
 | 
			
		||||
+{
 | 
			
		||||
+	return false;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+#endif /* CONFIG_LRU_GEN */
 | 
			
		||||
+
 | 
			
		||||
 struct mmu_gather;
 | 
			
		||||
 extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
 | 
			
		||||
 extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
 | 
			
		||||
diff --git a/kernel/exit.c b/kernel/exit.c
 | 
			
		||||
index fd1c04193e18..b362179852f1 100644
 | 
			
		||||
--- a/kernel/exit.c
 | 
			
		||||
+++ b/kernel/exit.c
 | 
			
		||||
@@ -423,6 +423,7 @@ void mm_update_next_owner(struct mm_struct *mm)
 | 
			
		||||
 		goto retry;
 | 
			
		||||
 	}
 | 
			
		||||
 	WRITE_ONCE(mm->owner, c);
 | 
			
		||||
+	lru_gen_migrate_mm(mm);
 | 
			
		||||
 	task_unlock(c);
 | 
			
		||||
 	put_task_struct(c);
 | 
			
		||||
 }
 | 
			
		||||
diff --git a/kernel/fork.c b/kernel/fork.c
 | 
			
		||||
index dc06afd725cb..2fd7dae9afcb 100644
 | 
			
		||||
--- a/kernel/fork.c
 | 
			
		||||
+++ b/kernel/fork.c
 | 
			
		||||
@@ -669,6 +669,7 @@ static void check_mm(struct mm_struct *mm)
 | 
			
		||||
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
 | 
			
		||||
 	VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
 | 
			
		||||
 #endif
 | 
			
		||||
+	VM_BUG_ON_MM(lru_gen_mm_is_active(mm), mm);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
 #define allocate_mm()	(kmem_cache_alloc(mm_cachep, GFP_KERNEL))
 | 
			
		||||
@@ -1061,6 +1062,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 | 
			
		||||
 		goto fail_nocontext;
 | 
			
		||||
 
 | 
			
		||||
 	mm->user_ns = get_user_ns(user_ns);
 | 
			
		||||
+	lru_gen_init_mm(mm);
 | 
			
		||||
 	return mm;
 | 
			
		||||
 
 | 
			
		||||
 fail_nocontext:
 | 
			
		||||
@@ -1103,6 +1105,7 @@ static inline void __mmput(struct mm_struct *mm)
 | 
			
		||||
 	}
 | 
			
		||||
 	if (mm->binfmt)
 | 
			
		||||
 		module_put(mm->binfmt->module);
 | 
			
		||||
+	lru_gen_del_mm(mm);
 | 
			
		||||
 	mmdrop(mm);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
@@ -2524,6 +2527,13 @@ pid_t kernel_clone(struct kernel_clone_args *args)
 | 
			
		||||
 		get_task_struct(p);
 | 
			
		||||
 	}
 | 
			
		||||
 
 | 
			
		||||
+	if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
 | 
			
		||||
+		/* lock the task to synchronize with memcg migration */
 | 
			
		||||
+		task_lock(p);
 | 
			
		||||
+		lru_gen_add_mm(p->mm);
 | 
			
		||||
+		task_unlock(p);
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
 	wake_up_new_task(p);
 | 
			
		||||
 
 | 
			
		||||
 	/* forking complete and child started to run, tell ptracer */
 | 
			
		||||
diff --git a/kernel/kthread.c b/kernel/kthread.c
 | 
			
		||||
index fe3f2a40d61e..b81e49ed31a7 100644
 | 
			
		||||
--- a/kernel/kthread.c
 | 
			
		||||
+++ b/kernel/kthread.c
 | 
			
		||||
@@ -1325,6 +1325,7 @@ void kthread_use_mm(struct mm_struct *mm)
 | 
			
		||||
 	tsk->mm = mm;
 | 
			
		||||
 	membarrier_update_current_mm(mm);
 | 
			
		||||
 	switch_mm_irqs_off(active_mm, mm, tsk);
 | 
			
		||||
+	lru_gen_switch_mm(active_mm, mm);
 | 
			
		||||
 	local_irq_enable();
 | 
			
		||||
 	task_unlock(tsk);
 | 
			
		||||
 #ifdef finish_arch_post_lock_switch
 | 
			
		||||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
 | 
			
		||||
index 5226cc26a095..2d4b77f173db 100644
 | 
			
		||||
--- a/kernel/sched/core.c
 | 
			
		||||
+++ b/kernel/sched/core.c
 | 
			
		||||
@@ -4323,6 +4323,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 | 
			
		||||
 		 * finish_task_switch()'s mmdrop().
 | 
			
		||||
 		 */
 | 
			
		||||
 		switch_mm_irqs_off(prev->active_mm, next->mm, next);
 | 
			
		||||
+		lru_gen_switch_mm(prev->active_mm, next->mm);
 | 
			
		||||
 
 | 
			
		||||
 		if (!prev->mm) {                        // from kernel
 | 
			
		||||
 			/* will mmdrop() in finish_task_switch(). */
 | 
			
		||||
@@ -7603,6 +7604,7 @@ void idle_task_exit(void)
 | 
			
		||||
 
 | 
			
		||||
 	if (mm != &init_mm) {
 | 
			
		||||
 		switch_mm(mm, &init_mm, current);
 | 
			
		||||
+		lru_gen_switch_mm(mm, &init_mm);
 | 
			
		||||
 		finish_arch_post_lock_switch();
 | 
			
		||||
 	}
 | 
			
		||||
 
 | 
			
		||||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
 | 
			
		||||
index 64ada9e650a5..58b610ffa0e0 100644
 | 
			
		||||
--- a/mm/memcontrol.c
 | 
			
		||||
+++ b/mm/memcontrol.c
 | 
			
		||||
@@ -4981,6 +4981,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 | 
			
		||||
 	for_each_node(node)
 | 
			
		||||
 		free_mem_cgroup_per_node_info(memcg, node);
 | 
			
		||||
 	free_percpu(memcg->vmstats_percpu);
 | 
			
		||||
+	lru_gen_free_mm_list(memcg);
 | 
			
		||||
 	kfree(memcg);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
@@ -5030,6 +5031,9 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 | 
			
		||||
 		if (alloc_mem_cgroup_per_node_info(memcg, node))
 | 
			
		||||
 			goto fail;
 | 
			
		||||
 
 | 
			
		||||
+	if (lru_gen_alloc_mm_list(memcg))
 | 
			
		||||
+		goto fail;
 | 
			
		||||
+
 | 
			
		||||
 	if (memcg_wb_domain_init(memcg, GFP_KERNEL))
 | 
			
		||||
 		goto fail;
 | 
			
		||||
 
 | 
			
		||||
@@ -5991,6 +5995,29 @@ static void mem_cgroup_move_task(void)
 | 
			
		||||
 }
 | 
			
		||||
 #endif
 | 
			
		||||
 
 | 
			
		||||
+#ifdef CONFIG_LRU_GEN
 | 
			
		||||
+static void mem_cgroup_attach(struct cgroup_taskset *tset)
 | 
			
		||||
+{
 | 
			
		||||
+	struct cgroup_subsys_state *css;
 | 
			
		||||
+	struct task_struct *task = NULL;
 | 
			
		||||
+
 | 
			
		||||
+	cgroup_taskset_for_each_leader(task, css, tset)
 | 
			
		||||
+		;
 | 
			
		||||
+
 | 
			
		||||
+	if (!task)
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
+	task_lock(task);
 | 
			
		||||
+	if (task->mm && task->mm->owner == task)
 | 
			
		||||
+		lru_gen_migrate_mm(task->mm);
 | 
			
		||||
+	task_unlock(task);
 | 
			
		||||
+}
 | 
			
		||||
+#else
 | 
			
		||||
+static void mem_cgroup_attach(struct cgroup_taskset *tset)
 | 
			
		||||
+{
 | 
			
		||||
+}
 | 
			
		||||
+#endif
 | 
			
		||||
+
 | 
			
		||||
 static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
 | 
			
		||||
 {
 | 
			
		||||
 	if (value == PAGE_COUNTER_MAX)
 | 
			
		||||
@@ -6332,6 +6359,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
 | 
			
		||||
 	.css_reset = mem_cgroup_css_reset,
 | 
			
		||||
 	.css_rstat_flush = mem_cgroup_css_rstat_flush,
 | 
			
		||||
 	.can_attach = mem_cgroup_can_attach,
 | 
			
		||||
+	.attach = mem_cgroup_attach,
 | 
			
		||||
 	.cancel_attach = mem_cgroup_cancel_attach,
 | 
			
		||||
 	.post_attach = mem_cgroup_move_task,
 | 
			
		||||
 	.dfl_cftypes = memory_files,
 | 
			
		||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
 | 
			
		||||
index 84d25079092e..d93d2272e475 100644
 | 
			
		||||
--- a/mm/vmscan.c
 | 
			
		||||
+++ b/mm/vmscan.c
 | 
			
		||||
@@ -2869,6 +2869,323 @@ static bool positive_ctrl_err(struct controller_pos *sp, struct controller_pos *
 | 
			
		||||
 	       sp->refaulted * max(pv->total, 1UL) * pv->gain;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+/******************************************************************************
 | 
			
		||||
+ *                          mm_struct list
 | 
			
		||||
+ ******************************************************************************/
 | 
			
		||||
+
 | 
			
		||||
+enum {
 | 
			
		||||
+	MM_SCHED_ACTIVE,	/* running processes */
 | 
			
		||||
+	MM_SCHED_INACTIVE,	/* sleeping processes */
 | 
			
		||||
+	MM_LOCK_CONTENTION,	/* lock contentions */
 | 
			
		||||
+	MM_VMA_INTERVAL,	/* VMAs within the range of each PUD/PMD/PTE */
 | 
			
		||||
+	MM_LEAF_OTHER_NODE,	/* entries not from the node under reclaim */
 | 
			
		||||
+	MM_LEAF_OTHER_MEMCG,	/* entries not from the memcg under reclaim */
 | 
			
		||||
+	MM_LEAF_OLD,		/* old entries */
 | 
			
		||||
+	MM_LEAF_YOUNG,		/* young entries */
 | 
			
		||||
+	MM_LEAF_DIRTY,		/* dirty entries */
 | 
			
		||||
+	MM_LEAF_HOLE,		/* non-present entries */
 | 
			
		||||
+	MM_NONLEAF_OLD,		/* old non-leaf PMD entries */
 | 
			
		||||
+	MM_NONLEAF_YOUNG,	/* young non-leaf PMD entries */
 | 
			
		||||
+	NR_MM_STATS
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+/* mnemonic codes for the stats above */
 | 
			
		||||
+#define MM_STAT_CODES		"aicvnmoydhlu"
 | 
			
		||||
+
 | 
			
		||||
+struct lru_gen_mm_list {
 | 
			
		||||
+	/* the head of a global or per-memcg mm_struct list */
 | 
			
		||||
+	struct list_head head;
 | 
			
		||||
+	/* protects the list */
 | 
			
		||||
+	spinlock_t lock;
 | 
			
		||||
+	struct {
 | 
			
		||||
+		/* set to max_seq after each round of walk */
 | 
			
		||||
+		unsigned long cur_seq;
 | 
			
		||||
+		/* the next mm on the list to walk */
 | 
			
		||||
+		struct list_head *iter;
 | 
			
		||||
+		/* to wait for the last worker to finish */
 | 
			
		||||
+		struct wait_queue_head wait;
 | 
			
		||||
+		/* the number of concurrent workers */
 | 
			
		||||
+		int nr_workers;
 | 
			
		||||
+		/* stats for debugging */
 | 
			
		||||
+		unsigned long stats[NR_STAT_GENS][NR_MM_STATS];
 | 
			
		||||
+	} nodes[0];
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+static struct lru_gen_mm_list *global_mm_list;
 | 
			
		||||
+
 | 
			
		||||
+static struct lru_gen_mm_list *alloc_mm_list(void)
 | 
			
		||||
+{
 | 
			
		||||
+	int nid;
 | 
			
		||||
+	struct lru_gen_mm_list *mm_list;
 | 
			
		||||
+
 | 
			
		||||
+	mm_list = kzalloc(struct_size(mm_list, nodes, nr_node_ids), GFP_KERNEL);
 | 
			
		||||
+	if (!mm_list)
 | 
			
		||||
+		return NULL;
 | 
			
		||||
+
 | 
			
		||||
+	INIT_LIST_HEAD(&mm_list->head);
 | 
			
		||||
+	spin_lock_init(&mm_list->lock);
 | 
			
		||||
+
 | 
			
		||||
+	for_each_node(nid) {
 | 
			
		||||
+		mm_list->nodes[nid].cur_seq = MIN_NR_GENS;
 | 
			
		||||
+		mm_list->nodes[nid].iter = &mm_list->head;
 | 
			
		||||
+		init_waitqueue_head(&mm_list->nodes[nid].wait);
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	return mm_list;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg)
 | 
			
		||||
+{
 | 
			
		||||
+#ifdef CONFIG_MEMCG
 | 
			
		||||
+	if (!mem_cgroup_disabled())
 | 
			
		||||
+		return memcg ? memcg->mm_list : root_mem_cgroup->mm_list;
 | 
			
		||||
+#endif
 | 
			
		||||
+	VM_BUG_ON(memcg);
 | 
			
		||||
+
 | 
			
		||||
+	return global_mm_list;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+void lru_gen_init_mm(struct mm_struct *mm)
 | 
			
		||||
+{
 | 
			
		||||
+	INIT_LIST_HEAD(&mm->lrugen.list);
 | 
			
		||||
+#ifdef CONFIG_MEMCG
 | 
			
		||||
+	mm->lrugen.memcg = NULL;
 | 
			
		||||
+#endif
 | 
			
		||||
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 | 
			
		||||
+	atomic_set(&mm->lrugen.nr_cpus, 0);
 | 
			
		||||
+#endif
 | 
			
		||||
+	nodes_clear(mm->lrugen.nodes);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+void lru_gen_add_mm(struct mm_struct *mm)
 | 
			
		||||
+{
 | 
			
		||||
+	struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
 | 
			
		||||
+	struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
 | 
			
		||||
+
 | 
			
		||||
+	VM_BUG_ON_MM(!list_empty(&mm->lrugen.list), mm);
 | 
			
		||||
+#ifdef CONFIG_MEMCG
 | 
			
		||||
+	VM_BUG_ON_MM(mm->lrugen.memcg, mm);
 | 
			
		||||
+	WRITE_ONCE(mm->lrugen.memcg, memcg);
 | 
			
		||||
+#endif
 | 
			
		||||
+	spin_lock(&mm_list->lock);
 | 
			
		||||
+	list_add_tail(&mm->lrugen.list, &mm_list->head);
 | 
			
		||||
+	spin_unlock(&mm_list->lock);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+void lru_gen_del_mm(struct mm_struct *mm)
 | 
			
		||||
+{
 | 
			
		||||
+	int nid;
 | 
			
		||||
+#ifdef CONFIG_MEMCG
 | 
			
		||||
+	struct lru_gen_mm_list *mm_list = get_mm_list(mm->lrugen.memcg);
 | 
			
		||||
+#else
 | 
			
		||||
+	struct lru_gen_mm_list *mm_list = get_mm_list(NULL);
 | 
			
		||||
+#endif
 | 
			
		||||
+
 | 
			
		||||
+	spin_lock(&mm_list->lock);
 | 
			
		||||
+
 | 
			
		||||
+	for_each_node(nid) {
 | 
			
		||||
+		if (mm_list->nodes[nid].iter != &mm->lrugen.list)
 | 
			
		||||
+			continue;
 | 
			
		||||
+
 | 
			
		||||
+		mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next;
 | 
			
		||||
+		if (mm_list->nodes[nid].iter == &mm_list->head)
 | 
			
		||||
+			WRITE_ONCE(mm_list->nodes[nid].cur_seq,
 | 
			
		||||
+				   mm_list->nodes[nid].cur_seq + 1);
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	list_del_init(&mm->lrugen.list);
 | 
			
		||||
+
 | 
			
		||||
+	spin_unlock(&mm_list->lock);
 | 
			
		||||
+
 | 
			
		||||
+#ifdef CONFIG_MEMCG
 | 
			
		||||
+	mem_cgroup_put(mm->lrugen.memcg);
 | 
			
		||||
+	WRITE_ONCE(mm->lrugen.memcg, NULL);
 | 
			
		||||
+#endif
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+#ifdef CONFIG_MEMCG
 | 
			
		||||
+int lru_gen_alloc_mm_list(struct mem_cgroup *memcg)
 | 
			
		||||
+{
 | 
			
		||||
+	if (mem_cgroup_disabled())
 | 
			
		||||
+		return 0;
 | 
			
		||||
+
 | 
			
		||||
+	memcg->mm_list = alloc_mm_list();
 | 
			
		||||
+
 | 
			
		||||
+	return memcg->mm_list ? 0 : -ENOMEM;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+void lru_gen_free_mm_list(struct mem_cgroup *memcg)
 | 
			
		||||
+{
 | 
			
		||||
+	kfree(memcg->mm_list);
 | 
			
		||||
+	memcg->mm_list = NULL;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+void lru_gen_migrate_mm(struct mm_struct *mm)
 | 
			
		||||
+{
 | 
			
		||||
+	struct mem_cgroup *memcg;
 | 
			
		||||
+
 | 
			
		||||
+	lockdep_assert_held(&mm->owner->alloc_lock);
 | 
			
		||||
+
 | 
			
		||||
+	if (mem_cgroup_disabled())
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
+	rcu_read_lock();
 | 
			
		||||
+	memcg = mem_cgroup_from_task(mm->owner);
 | 
			
		||||
+	rcu_read_unlock();
 | 
			
		||||
+	if (memcg == mm->lrugen.memcg)
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
+	VM_BUG_ON_MM(!mm->lrugen.memcg, mm);
 | 
			
		||||
+	VM_BUG_ON_MM(list_empty(&mm->lrugen.list), mm);
 | 
			
		||||
+
 | 
			
		||||
+	lru_gen_del_mm(mm);
 | 
			
		||||
+	lru_gen_add_mm(mm);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static bool mm_has_migrated(struct mm_struct *mm, struct mem_cgroup *memcg)
 | 
			
		||||
+{
 | 
			
		||||
+	return READ_ONCE(mm->lrugen.memcg) != memcg;
 | 
			
		||||
+}
 | 
			
		||||
+#else
 | 
			
		||||
+static bool mm_has_migrated(struct mm_struct *mm, struct mem_cgroup *memcg)
 | 
			
		||||
+{
 | 
			
		||||
+	return false;
 | 
			
		||||
+}
 | 
			
		||||
+#endif
 | 
			
		||||
+
 | 
			
		||||
+struct mm_walk_args {
 | 
			
		||||
+	struct mem_cgroup *memcg;
 | 
			
		||||
+	unsigned long max_seq;
 | 
			
		||||
+	unsigned long start_pfn;
 | 
			
		||||
+	unsigned long end_pfn;
 | 
			
		||||
+	unsigned long next_addr;
 | 
			
		||||
+	int node_id;
 | 
			
		||||
+	int swappiness;
 | 
			
		||||
+	int batch_size;
 | 
			
		||||
+	int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
 | 
			
		||||
+	int mm_stats[NR_MM_STATS];
 | 
			
		||||
+	unsigned long bitmap[0];
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+static int size_of_mm_walk_args(void)
 | 
			
		||||
+{
 | 
			
		||||
+	int size = sizeof(struct mm_walk_args);
 | 
			
		||||
+
 | 
			
		||||
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ||
 | 
			
		||||
+	    IS_ENABLED(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG))
 | 
			
		||||
+		size += sizeof(unsigned long) * BITS_TO_LONGS(PTRS_PER_PMD);
 | 
			
		||||
+
 | 
			
		||||
+	return size;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void reset_mm_stats(struct lru_gen_mm_list *mm_list, bool last,
 | 
			
		||||
+			   struct mm_walk_args *args)
 | 
			
		||||
+{
 | 
			
		||||
+	int i;
 | 
			
		||||
+	int nid = args->node_id;
 | 
			
		||||
+	int hist = hist_from_seq_or_gen(args->max_seq);
 | 
			
		||||
+
 | 
			
		||||
+	lockdep_assert_held(&mm_list->lock);
 | 
			
		||||
+
 | 
			
		||||
+	for (i = 0; i < NR_MM_STATS; i++) {
 | 
			
		||||
+		WRITE_ONCE(mm_list->nodes[nid].stats[hist][i],
 | 
			
		||||
+			   mm_list->nodes[nid].stats[hist][i] + args->mm_stats[i]);
 | 
			
		||||
+		args->mm_stats[i] = 0;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	if (!last || NR_STAT_GENS == 1)
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
+	hist = hist_from_seq_or_gen(args->max_seq + 1);
 | 
			
		||||
+	for (i = 0; i < NR_MM_STATS; i++)
 | 
			
		||||
+		WRITE_ONCE(mm_list->nodes[nid].stats[hist][i], 0);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static bool should_skip_mm(struct mm_struct *mm, struct mm_walk_args *args)
 | 
			
		||||
+{
 | 
			
		||||
+	int type;
 | 
			
		||||
+	unsigned long size = 0;
 | 
			
		||||
+
 | 
			
		||||
+	if (!lru_gen_mm_is_active(mm) && !node_isset(args->node_id, mm->lrugen.nodes))
 | 
			
		||||
+		return true;
 | 
			
		||||
+
 | 
			
		||||
+	if (mm_is_oom_victim(mm))
 | 
			
		||||
+		return true;
 | 
			
		||||
+
 | 
			
		||||
+	for (type = !args->swappiness; type < ANON_AND_FILE; type++) {
 | 
			
		||||
+		size += type ? get_mm_counter(mm, MM_FILEPAGES) :
 | 
			
		||||
+			       get_mm_counter(mm, MM_ANONPAGES) +
 | 
			
		||||
+			       get_mm_counter(mm, MM_SHMEMPAGES);
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	/* leave the legwork to the rmap if mappings are too sparse */
 | 
			
		||||
+	if (size < max(SWAP_CLUSTER_MAX, mm_pgtables_bytes(mm) / PAGE_SIZE))
 | 
			
		||||
+		return true;
 | 
			
		||||
+
 | 
			
		||||
+	return !mmget_not_zero(mm);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+/* To support multiple workers that concurrently walk an mm_struct list. */
 | 
			
		||||
+static bool get_next_mm(struct mm_walk_args *args, struct mm_struct **iter)
 | 
			
		||||
+{
 | 
			
		||||
+	bool last = true;
 | 
			
		||||
+	struct mm_struct *mm = NULL;
 | 
			
		||||
+	int nid = args->node_id;
 | 
			
		||||
+	struct lru_gen_mm_list *mm_list = get_mm_list(args->memcg);
 | 
			
		||||
+
 | 
			
		||||
+	if (*iter)
 | 
			
		||||
+		mmput_async(*iter);
 | 
			
		||||
+	else if (args->max_seq <= READ_ONCE(mm_list->nodes[nid].cur_seq))
 | 
			
		||||
+		return false;
 | 
			
		||||
+
 | 
			
		||||
+	spin_lock(&mm_list->lock);
 | 
			
		||||
+
 | 
			
		||||
+	VM_BUG_ON(args->max_seq > mm_list->nodes[nid].cur_seq + 1);
 | 
			
		||||
+	VM_BUG_ON(*iter && args->max_seq < mm_list->nodes[nid].cur_seq);
 | 
			
		||||
+	VM_BUG_ON(*iter && !mm_list->nodes[nid].nr_workers);
 | 
			
		||||
+
 | 
			
		||||
+	if (args->max_seq <= mm_list->nodes[nid].cur_seq) {
 | 
			
		||||
+		last = *iter;
 | 
			
		||||
+		goto done;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	if (mm_list->nodes[nid].iter == &mm_list->head) {
 | 
			
		||||
+		VM_BUG_ON(*iter || mm_list->nodes[nid].nr_workers);
 | 
			
		||||
+		mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	while (!mm && mm_list->nodes[nid].iter != &mm_list->head) {
 | 
			
		||||
+		mm = list_entry(mm_list->nodes[nid].iter, struct mm_struct, lrugen.list);
 | 
			
		||||
+		mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next;
 | 
			
		||||
+		if (should_skip_mm(mm, args))
 | 
			
		||||
+			mm = NULL;
 | 
			
		||||
+
 | 
			
		||||
+		args->mm_stats[mm ? MM_SCHED_ACTIVE : MM_SCHED_INACTIVE]++;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	if (mm_list->nodes[nid].iter == &mm_list->head)
 | 
			
		||||
+		WRITE_ONCE(mm_list->nodes[nid].cur_seq,
 | 
			
		||||
+			   mm_list->nodes[nid].cur_seq + 1);
 | 
			
		||||
+done:
 | 
			
		||||
+	if (*iter && !mm)
 | 
			
		||||
+		mm_list->nodes[nid].nr_workers--;
 | 
			
		||||
+	if (!*iter && mm)
 | 
			
		||||
+		mm_list->nodes[nid].nr_workers++;
 | 
			
		||||
+
 | 
			
		||||
+	last = last && !mm_list->nodes[nid].nr_workers &&
 | 
			
		||||
+	       mm_list->nodes[nid].iter == &mm_list->head;
 | 
			
		||||
+
 | 
			
		||||
+	reset_mm_stats(mm_list, last, args);
 | 
			
		||||
+
 | 
			
		||||
+	spin_unlock(&mm_list->lock);
 | 
			
		||||
+
 | 
			
		||||
+	*iter = mm;
 | 
			
		||||
+	if (mm)
 | 
			
		||||
+		node_clear(nid, mm->lrugen.nodes);
 | 
			
		||||
+
 | 
			
		||||
+	return last;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 /******************************************************************************
 | 
			
		||||
  *                          state change
 | 
			
		||||
  ******************************************************************************/
 | 
			
		||||
@@ -3096,6 +3413,13 @@ static int __init init_lru_gen(void)
 | 
			
		||||
 {
 | 
			
		||||
 	BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
 | 
			
		||||
 	BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
 | 
			
		||||
+	BUILD_BUG_ON(sizeof(MM_STAT_CODES) != NR_MM_STATS + 1);
 | 
			
		||||
+
 | 
			
		||||
+	if (mem_cgroup_disabled()) {
 | 
			
		||||
+		global_mm_list = alloc_mm_list();
 | 
			
		||||
+		if (WARN_ON_ONCE(!global_mm_list))
 | 
			
		||||
+			return -ENOMEM;
 | 
			
		||||
+	}
 | 
			
		||||
 
 | 
			
		||||
 	if (hotplug_memory_notifier(lru_gen_online_mem, 0))
 | 
			
		||||
 		pr_err("lru_gen: failed to subscribe hotplug notifications\n");
 | 
			
		||||
-- 
 | 
			
		||||
2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -0,0 +1,768 @@
 | 
			
		||||
From mboxrd@z Thu Jan  1 00:00:00 1970
 | 
			
		||||
Return-Path: <linux-kernel-owner@kernel.org>
 | 
			
		||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
 | 
			
		||||
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
 | 
			
		||||
X-Spam-Level: 
 | 
			
		||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
 | 
			
		||||
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
 | 
			
		||||
	INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
 | 
			
		||||
	USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
 | 
			
		||||
	version=3.4.0
 | 
			
		||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
 | 
			
		||||
	by smtp.lore.kernel.org (Postfix) with ESMTP id A01B6C433B4
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:39 +0000 (UTC)
 | 
			
		||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
 | 
			
		||||
	by mail.kernel.org (Postfix) with ESMTP id 7B61B6108C
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:39 +0000 (UTC)
 | 
			
		||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 | 
			
		||||
        id S231207AbhETGz5 (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:57 -0400
 | 
			
		||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37946 "EHLO
 | 
			
		||||
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 | 
			
		||||
        with ESMTP id S230519AbhETGzl (ORCPT
 | 
			
		||||
        <rfc822;linux-kernel@vger.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:41 -0400
 | 
			
		||||
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
 | 
			
		||||
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2E0BFC06175F
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:19 -0700 (PDT)
 | 
			
		||||
Received: by mail-yb1-xb49.google.com with SMTP id o6-20020a5b06460000b02905004326697dso21269948ybq.22
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:19 -0700 (PDT)
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=google.com; s=20161025;
 | 
			
		||||
        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
 | 
			
		||||
         :cc;
 | 
			
		||||
        bh=u1VH9oezkgLqdxJ2J45QA+bE6HFSfI1t2pM/Z9SSfcw=;
 | 
			
		||||
        b=ZCwqDV3PtHM/LJ1Jk3mVLSR0meKIBgFwo1J8fy1XCqpRUSN2IaxDKRl6kQ+Kr5x6il
 | 
			
		||||
         ONEGQ71NTF3X5YriYi1HDhha3PmMaPofh1moI1cvhXBQ3BC7QtVM3R2+bDqRzq1heN8I
 | 
			
		||||
         AIXSKXUdwikQDrunmGAxvTK29DMwl/KeHCe+4v24DaVODm4+A+McG4cMpvigEHvQjTyF
 | 
			
		||||
         v8VcycT2kwKRw3j6yPu6tWP+l/IwnXQiY+KsQ1ti1IgPSlH/WyvqWlUCVB7h2C+o5ZS+
 | 
			
		||||
         /wKVmM36EtyVbHuHWwWCJkvkjGaJnzvDjISmaVK9XCh1D8kFXjAL3uXkcExirtkdXQBN
 | 
			
		||||
         na+A==
 | 
			
		||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=1e100.net; s=20161025;
 | 
			
		||||
        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
 | 
			
		||||
         :references:subject:from:to:cc;
 | 
			
		||||
        bh=u1VH9oezkgLqdxJ2J45QA+bE6HFSfI1t2pM/Z9SSfcw=;
 | 
			
		||||
        b=LJDAHspg6fnPnue5XfzSf/BNfnVkyVlvJxf0/6UoekRIfXnLhw2K1izgIrVveHAz3I
 | 
			
		||||
         J/NCoJhs8jZ/aPXP/cQXIGGSdtjJW5eDfEf4zm2qn/9oNaQnLZ7BV6aCDANBoPUCqa/r
 | 
			
		||||
         AJIKGX9sXqevfdwgMdyFFNCF5HROG3lCnszQrQm+Y91p8HixZQRUngPI+mUlfY2VvvbM
 | 
			
		||||
         MK5IMhmus/o35uuc/UPt0oRdz7fAgWg0WMJL5aZJMbFtZ9K8x8KpzSQqTf5tF9cNg3jZ
 | 
			
		||||
         +0F1HWd4vCibOEBYJ0aENh+LFGxZoC+et9tQi5mJM42r+AlcYVzilVzRLzpjG0KKaHNi
 | 
			
		||||
         FKBQ==
 | 
			
		||||
X-Gm-Message-State: AOAM5318NWZEOBXD8F42C3giE31Ee6cXk+kglz/8je4dTQkxkTvmIP2F
 | 
			
		||||
        D7kBlXDwCzr93Jm/4pjbod1wRW/kBWo=
 | 
			
		||||
X-Google-Smtp-Source: ABdhPJxfQJokAdXpqmzcdeUweiPSNNLZPWEWbQ8Rs0Vczp0sf0utxd24KmzGgE8wHfpeqesf2U2Y+MnqF+Q=
 | 
			
		||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
 | 
			
		||||
 (user=yuzhao job=sendgmr) by 2002:a25:b3c3:: with SMTP id x3mr5173887ybf.334.1621493658204;
 | 
			
		||||
 Wed, 19 May 2021 23:54:18 -0700 (PDT)
 | 
			
		||||
Date:   Thu, 20 May 2021 00:53:52 -0600
 | 
			
		||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
Message-Id: <20210520065355.2736558-12-yuzhao@google.com>
 | 
			
		||||
Mime-Version: 1.0
 | 
			
		||||
References: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
Subject: [PATCH v3 11/14] mm: multigenerational lru: eviction
 | 
			
		||||
From:   Yu Zhao <yuzhao@google.com>
 | 
			
		||||
To:     linux-mm@kvack.org
 | 
			
		||||
Cc:     Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
 | 
			
		||||
        Andrew Morton <akpm@linux-foundation.org>,
 | 
			
		||||
        Dave Chinner <david@fromorbit.com>,
 | 
			
		||||
        Dave Hansen <dave.hansen@linux.intel.com>,
 | 
			
		||||
        Donald Carr <sirspudd@gmail.com>,
 | 
			
		||||
        Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
 | 
			
		||||
        Johannes Weiner <hannes@cmpxchg.org>,
 | 
			
		||||
        Jonathan Corbet <corbet@lwn.net>,
 | 
			
		||||
        Joonsoo Kim <iamjoonsoo.kim@lge.com>,
 | 
			
		||||
        Konstantin Kharlamov <hi-angel@yandex.ru>,
 | 
			
		||||
        Marcus Seyfarth <m.seyfarth@gmail.com>,
 | 
			
		||||
        Matthew Wilcox <willy@infradead.org>,
 | 
			
		||||
        Mel Gorman <mgorman@suse.de>,
 | 
			
		||||
        Miaohe Lin <linmiaohe@huawei.com>,
 | 
			
		||||
        Michael Larabel <michael@michaellarabel.com>,
 | 
			
		||||
        Michal Hocko <mhocko@suse.com>,
 | 
			
		||||
        Michel Lespinasse <michel@lespinasse.org>,
 | 
			
		||||
        Rik van Riel <riel@surriel.com>,
 | 
			
		||||
        Roman Gushchin <guro@fb.com>,
 | 
			
		||||
        Tim Chen <tim.c.chen@linux.intel.com>,
 | 
			
		||||
        Vlastimil Babka <vbabka@suse.cz>,
 | 
			
		||||
        Yang Shi <shy828301@gmail.com>,
 | 
			
		||||
        Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
 | 
			
		||||
        linux-kernel@vger.kernel.org, lkp@lists.01.org,
 | 
			
		||||
        page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
 | 
			
		||||
        Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
Content-Type: text/plain; charset="UTF-8"
 | 
			
		||||
Precedence: bulk
 | 
			
		||||
List-ID: <linux-kernel.vger.kernel.org>
 | 
			
		||||
X-Mailing-List: linux-kernel@vger.kernel.org
 | 
			
		||||
List-Archive: <https://lore.kernel.org/lkml/>
 | 
			
		||||
 | 
			
		||||
The eviction consumes old generations. Given an lruvec, the eviction
 | 
			
		||||
scans the pages on the per-zone lists indexed by either of min_seq[2].
 | 
			
		||||
It first tries to select a type based on the values of min_seq[2].
 | 
			
		||||
When anon and file types are both available from the same generation,
 | 
			
		||||
it selects the one that has a lower refault rate.
 | 
			
		||||
 | 
			
		||||
During a scan, the eviction sorts pages according to their new
 | 
			
		||||
generation numbers, if the aging has found them referenced. It also
 | 
			
		||||
moves pages from the tiers that have higher refault rates than tier 0
 | 
			
		||||
to the next generation. When it finds all the per-zone lists of a
 | 
			
		||||
selected type are empty, the eviction increments min_seq[2] indexed by
 | 
			
		||||
this selected type.
 | 
			
		||||
 | 
			
		||||
With the aging and the eviction in place, we can build page reclaim in
 | 
			
		||||
a straightforward manner:
 | 
			
		||||
  1) In order to reduce the latency, direct reclaim only invokes the
 | 
			
		||||
  aging when both min_seq[2] reaches max_seq-1; otherwise it invokes
 | 
			
		||||
  the eviction.
 | 
			
		||||
  2) In order to avoid the aging in the direct reclaim path, kswapd
 | 
			
		||||
  does the background aging. It invokes the aging when either of
 | 
			
		||||
  min_seq[2] reaches max_seq-1; otherwise it invokes the eviction.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
 | 
			
		||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
---
 | 
			
		||||
 include/linux/mmzone.h |   5 +
 | 
			
		||||
 mm/vmscan.c            | 540 +++++++++++++++++++++++++++++++++++++++++
 | 
			
		||||
 2 files changed, 545 insertions(+)
 | 
			
		||||
 | 
			
		||||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
 | 
			
		||||
index 38de59fcbe54..ded72f44d7e7 100644
 | 
			
		||||
--- a/include/linux/mmzone.h
 | 
			
		||||
+++ b/include/linux/mmzone.h
 | 
			
		||||
@@ -863,6 +863,8 @@ struct deferred_split {
 | 
			
		||||
 };
 | 
			
		||||
 #endif
 | 
			
		||||
 
 | 
			
		||||
+struct mm_walk_args;
 | 
			
		||||
+
 | 
			
		||||
 /*
 | 
			
		||||
  * On NUMA machines, each NUMA node would have a pg_data_t to describe
 | 
			
		||||
  * it's memory layout. On UMA machines there is a single pglist_data which
 | 
			
		||||
@@ -968,6 +970,9 @@ typedef struct pglist_data {
 | 
			
		||||
 
 | 
			
		||||
 	unsigned long		flags;
 | 
			
		||||
 
 | 
			
		||||
+#ifdef CONFIG_LRU_GEN
 | 
			
		||||
+	struct mm_walk_args	*mm_walk_args;
 | 
			
		||||
+#endif
 | 
			
		||||
 	ZONE_PADDING(_pad2_)
 | 
			
		||||
 
 | 
			
		||||
 	/* Per-node vmstats */
 | 
			
		||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
 | 
			
		||||
index 837d5e6a821e..2f86dcc04c56 100644
 | 
			
		||||
--- a/mm/vmscan.c
 | 
			
		||||
+++ b/mm/vmscan.c
 | 
			
		||||
@@ -1311,6 +1311,11 @@ static unsigned int shrink_page_list(struct list_head *page_list,
 | 
			
		||||
 		if (!sc->may_unmap && page_mapped(page))
 | 
			
		||||
 			goto keep_locked;
 | 
			
		||||
 
 | 
			
		||||
+		/* in case the page was found accessed by lru_gen_scan_around() */
 | 
			
		||||
+		if (lru_gen_enabled() && !ignore_references &&
 | 
			
		||||
+		    page_mapped(page) && PageReferenced(page))
 | 
			
		||||
+			goto keep_locked;
 | 
			
		||||
+
 | 
			
		||||
 		may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
 | 
			
		||||
 			(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
 | 
			
		||||
 
 | 
			
		||||
@@ -2431,6 +2436,9 @@ static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
 | 
			
		||||
 	unsigned long file;
 | 
			
		||||
 	struct lruvec *target_lruvec;
 | 
			
		||||
 
 | 
			
		||||
+	if (lru_gen_enabled())
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
 	target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
 | 
			
		||||
 
 | 
			
		||||
 	/*
 | 
			
		||||
@@ -3970,6 +3978,489 @@ void lru_gen_scan_around(struct page_vma_mapped_walk *pvmw)
 | 
			
		||||
 		set_page_dirty(pte_page(pte[i]));
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+/******************************************************************************
 | 
			
		||||
+ *                          the eviction
 | 
			
		||||
+ ******************************************************************************/
 | 
			
		||||
+
 | 
			
		||||
+static bool should_skip_page(struct page *page, struct scan_control *sc)
 | 
			
		||||
+{
 | 
			
		||||
+	if (!sc->may_unmap && page_mapped(page))
 | 
			
		||||
+		return true;
 | 
			
		||||
+
 | 
			
		||||
+	if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
 | 
			
		||||
+	    (PageDirty(page) || (PageAnon(page) && !PageSwapCache(page))))
 | 
			
		||||
+		return true;
 | 
			
		||||
+
 | 
			
		||||
+	if (!get_page_unless_zero(page))
 | 
			
		||||
+		return true;
 | 
			
		||||
+
 | 
			
		||||
+	if (!TestClearPageLRU(page)) {
 | 
			
		||||
+		put_page(page);
 | 
			
		||||
+		return true;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	return false;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static bool sort_page(struct page *page, struct lruvec *lruvec, int tier_to_isolate)
 | 
			
		||||
+{
 | 
			
		||||
+	bool success;
 | 
			
		||||
+	int gen = page_lru_gen(page);
 | 
			
		||||
+	int type = page_is_file_lru(page);
 | 
			
		||||
+	int zone = page_zonenum(page);
 | 
			
		||||
+	int tier = lru_tier_from_usage(page_tier_usage(page));
 | 
			
		||||
+	struct lrugen *lrugen = &lruvec->evictable;
 | 
			
		||||
+
 | 
			
		||||
+	VM_BUG_ON_PAGE(gen == -1, page);
 | 
			
		||||
+	VM_BUG_ON_PAGE(tier_to_isolate < 0, page);
 | 
			
		||||
+
 | 
			
		||||
+	/* a lazy-free page that has been written into? */
 | 
			
		||||
+	if (type && PageDirty(page) && PageAnon(page)) {
 | 
			
		||||
+		success = lru_gen_deletion(page, lruvec);
 | 
			
		||||
+		VM_BUG_ON_PAGE(!success, page);
 | 
			
		||||
+		SetPageSwapBacked(page);
 | 
			
		||||
+		add_page_to_lru_list_tail(page, lruvec);
 | 
			
		||||
+		return true;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	/* page_update_gen() has updated the gen #? */
 | 
			
		||||
+	if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
 | 
			
		||||
+		list_move(&page->lru, &lrugen->lists[gen][type][zone]);
 | 
			
		||||
+		return true;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	/* activate this page if its tier has a higher refault rate */
 | 
			
		||||
+	if (tier_to_isolate < tier) {
 | 
			
		||||
+		int hist = hist_from_seq_or_gen(gen);
 | 
			
		||||
+
 | 
			
		||||
+		page_inc_gen(page, lruvec, false);
 | 
			
		||||
+		WRITE_ONCE(lrugen->activated[hist][type][tier - 1],
 | 
			
		||||
+			   lrugen->activated[hist][type][tier - 1] + thp_nr_pages(page));
 | 
			
		||||
+		inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type);
 | 
			
		||||
+		return true;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	/* mark this page for reclaim if it's pending writeback */
 | 
			
		||||
+	if (PageWriteback(page) || (type && PageDirty(page))) {
 | 
			
		||||
+		page_inc_gen(page, lruvec, true);
 | 
			
		||||
+		return true;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	return false;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void isolate_page(struct page *page, struct lruvec *lruvec)
 | 
			
		||||
+{
 | 
			
		||||
+	bool success;
 | 
			
		||||
+
 | 
			
		||||
+	success = lru_gen_deletion(page, lruvec);
 | 
			
		||||
+	VM_BUG_ON_PAGE(!success, page);
 | 
			
		||||
+
 | 
			
		||||
+	if (PageActive(page)) {
 | 
			
		||||
+		ClearPageActive(page);
 | 
			
		||||
+		/* make sure shrink_page_list() rejects this page */
 | 
			
		||||
+		SetPageReferenced(page);
 | 
			
		||||
+		return;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	/* make sure shrink_page_list() doesn't try to write this page */
 | 
			
		||||
+	ClearPageReclaim(page);
 | 
			
		||||
+	/* make sure shrink_page_list() doesn't reject this page */
 | 
			
		||||
+	ClearPageReferenced(page);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int scan_pages(struct lruvec *lruvec, struct scan_control *sc, long *nr_to_scan,
 | 
			
		||||
+		      int type, int tier, struct list_head *list)
 | 
			
		||||
+{
 | 
			
		||||
+	bool success;
 | 
			
		||||
+	int gen, zone;
 | 
			
		||||
+	enum vm_event_item item;
 | 
			
		||||
+	int sorted = 0;
 | 
			
		||||
+	int scanned = 0;
 | 
			
		||||
+	int isolated = 0;
 | 
			
		||||
+	int batch_size = 0;
 | 
			
		||||
+	struct lrugen *lrugen = &lruvec->evictable;
 | 
			
		||||
+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 | 
			
		||||
+
 | 
			
		||||
+	VM_BUG_ON(!list_empty(list));
 | 
			
		||||
+
 | 
			
		||||
+	if (get_nr_gens(lruvec, type) == MIN_NR_GENS)
 | 
			
		||||
+		return -ENOENT;
 | 
			
		||||
+
 | 
			
		||||
+	gen = lru_gen_from_seq(lrugen->min_seq[type]);
 | 
			
		||||
+
 | 
			
		||||
+	for (zone = sc->reclaim_idx; zone >= 0; zone--) {
 | 
			
		||||
+		LIST_HEAD(moved);
 | 
			
		||||
+		int skipped = 0;
 | 
			
		||||
+		struct list_head *head = &lrugen->lists[gen][type][zone];
 | 
			
		||||
+
 | 
			
		||||
+		while (!list_empty(head)) {
 | 
			
		||||
+			struct page *page = lru_to_page(head);
 | 
			
		||||
+			int delta = thp_nr_pages(page);
 | 
			
		||||
+
 | 
			
		||||
+			VM_BUG_ON_PAGE(PageTail(page), page);
 | 
			
		||||
+			VM_BUG_ON_PAGE(PageUnevictable(page), page);
 | 
			
		||||
+			VM_BUG_ON_PAGE(PageActive(page), page);
 | 
			
		||||
+			VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page);
 | 
			
		||||
+			VM_BUG_ON_PAGE(page_zonenum(page) != zone, page);
 | 
			
		||||
+
 | 
			
		||||
+			prefetchw_prev_lru_page(page, head, flags);
 | 
			
		||||
+
 | 
			
		||||
+			scanned += delta;
 | 
			
		||||
+
 | 
			
		||||
+			if (sort_page(page, lruvec, tier))
 | 
			
		||||
+				sorted += delta;
 | 
			
		||||
+			else if (should_skip_page(page, sc)) {
 | 
			
		||||
+				list_move(&page->lru, &moved);
 | 
			
		||||
+				skipped += delta;
 | 
			
		||||
+			} else {
 | 
			
		||||
+				isolate_page(page, lruvec);
 | 
			
		||||
+				list_add(&page->lru, list);
 | 
			
		||||
+				isolated += delta;
 | 
			
		||||
+			}
 | 
			
		||||
+
 | 
			
		||||
+			if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX ||
 | 
			
		||||
+			    ++batch_size == MAX_BATCH_SIZE)
 | 
			
		||||
+				break;
 | 
			
		||||
+		}
 | 
			
		||||
+
 | 
			
		||||
+		list_splice(&moved, head);
 | 
			
		||||
+		__count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
 | 
			
		||||
+
 | 
			
		||||
+		if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX ||
 | 
			
		||||
+		    batch_size == MAX_BATCH_SIZE)
 | 
			
		||||
+			break;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	success = try_inc_min_seq(lruvec, type);
 | 
			
		||||
+
 | 
			
		||||
+	item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
 | 
			
		||||
+	if (!cgroup_reclaim(sc)) {
 | 
			
		||||
+		__count_vm_events(item, scanned);
 | 
			
		||||
+		__count_vm_events(PGREFILL, sorted);
 | 
			
		||||
+	}
 | 
			
		||||
+	__count_memcg_events(memcg, item, scanned);
 | 
			
		||||
+	__count_memcg_events(memcg, PGREFILL, sorted);
 | 
			
		||||
+	__count_vm_events(PGSCAN_ANON + type, scanned);
 | 
			
		||||
+
 | 
			
		||||
+	*nr_to_scan -= scanned;
 | 
			
		||||
+
 | 
			
		||||
+	if (*nr_to_scan <= 0 || success || isolated)
 | 
			
		||||
+		return isolated;
 | 
			
		||||
+	/*
 | 
			
		||||
+	 * We may have trouble finding eligible pages due to reclaim_idx,
 | 
			
		||||
+	 * may_unmap and may_writepage. The following check makes sure we won't
 | 
			
		||||
+	 * be stuck if we aren't making enough progress.
 | 
			
		||||
+	 */
 | 
			
		||||
+	return batch_size == MAX_BATCH_SIZE && sorted >= SWAP_CLUSTER_MAX ? 0 : -ENOENT;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int get_tier_to_isolate(struct lruvec *lruvec, int type)
 | 
			
		||||
+{
 | 
			
		||||
+	int tier;
 | 
			
		||||
+	struct controller_pos sp, pv;
 | 
			
		||||
+
 | 
			
		||||
+	/*
 | 
			
		||||
+	 * Ideally we don't want to evict upper tiers that have higher refault
 | 
			
		||||
+	 * rates. However, we need to leave a margin for the fluctuations in
 | 
			
		||||
+	 * refault rates. So we use a larger gain factor to make sure upper
 | 
			
		||||
+	 * tiers are indeed more active. We choose 2 because the lowest upper
 | 
			
		||||
+	 * tier would have twice of the refault rate of the base tier, according
 | 
			
		||||
+	 * to their numbers of accesses.
 | 
			
		||||
+	 */
 | 
			
		||||
+	read_controller_pos(&sp, lruvec, type, 0, 1);
 | 
			
		||||
+	for (tier = 1; tier < MAX_NR_TIERS; tier++) {
 | 
			
		||||
+		read_controller_pos(&pv, lruvec, type, tier, 2);
 | 
			
		||||
+		if (!positive_ctrl_err(&sp, &pv))
 | 
			
		||||
+			break;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	return tier - 1;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_to_isolate)
 | 
			
		||||
+{
 | 
			
		||||
+	int type, tier;
 | 
			
		||||
+	struct controller_pos sp, pv;
 | 
			
		||||
+	int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness };
 | 
			
		||||
+
 | 
			
		||||
+	/*
 | 
			
		||||
+	 * Compare the refault rates between the base tiers of anon and file to
 | 
			
		||||
+	 * determine which type to evict. Also need to compare the refault rates
 | 
			
		||||
+	 * of the upper tiers of the selected type with that of the base tier of
 | 
			
		||||
+	 * the other type to determine which tier of the selected type to evict.
 | 
			
		||||
+	 */
 | 
			
		||||
+	read_controller_pos(&sp, lruvec, 0, 0, gain[0]);
 | 
			
		||||
+	read_controller_pos(&pv, lruvec, 1, 0, gain[1]);
 | 
			
		||||
+	type = positive_ctrl_err(&sp, &pv);
 | 
			
		||||
+
 | 
			
		||||
+	read_controller_pos(&sp, lruvec, !type, 0, gain[!type]);
 | 
			
		||||
+	for (tier = 1; tier < MAX_NR_TIERS; tier++) {
 | 
			
		||||
+		read_controller_pos(&pv, lruvec, type, tier, gain[type]);
 | 
			
		||||
+		if (!positive_ctrl_err(&sp, &pv))
 | 
			
		||||
+			break;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	*tier_to_isolate = tier - 1;
 | 
			
		||||
+
 | 
			
		||||
+	return type;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int isolate_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
 | 
			
		||||
+			 long *nr_to_scan, int *type_to_scan, struct list_head *list)
 | 
			
		||||
+{
 | 
			
		||||
+	int i;
 | 
			
		||||
+	int type;
 | 
			
		||||
+	int isolated;
 | 
			
		||||
+	int tier = -1;
 | 
			
		||||
+	DEFINE_MAX_SEQ();
 | 
			
		||||
+	DEFINE_MIN_SEQ();
 | 
			
		||||
+
 | 
			
		||||
+	VM_BUG_ON(!seq_is_valid(lruvec));
 | 
			
		||||
+
 | 
			
		||||
+	if (max_nr_gens(max_seq, min_seq, swappiness) == MIN_NR_GENS)
 | 
			
		||||
+		return 0;
 | 
			
		||||
+	/*
 | 
			
		||||
+	 * Try to select a type based on generations and swappiness, and if that
 | 
			
		||||
+	 * fails, fall back to get_type_to_scan(). When anon and file are both
 | 
			
		||||
+	 * available from the same generation, swappiness 200 is interpreted as
 | 
			
		||||
+	 * anon first and swappiness 1 is interpreted as file first.
 | 
			
		||||
+	 */
 | 
			
		||||
+	type = !swappiness || min_seq[0] > min_seq[1] ||
 | 
			
		||||
+	       (min_seq[0] == min_seq[1] && swappiness != 200 &&
 | 
			
		||||
+		(swappiness == 1 || get_type_to_scan(lruvec, swappiness, &tier)));
 | 
			
		||||
+
 | 
			
		||||
+	if (tier == -1)
 | 
			
		||||
+		tier = get_tier_to_isolate(lruvec, type);
 | 
			
		||||
+
 | 
			
		||||
+	for (i = !swappiness; i < ANON_AND_FILE; i++) {
 | 
			
		||||
+		isolated = scan_pages(lruvec, sc, nr_to_scan, type, tier, list);
 | 
			
		||||
+		if (isolated >= 0)
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+		type = !type;
 | 
			
		||||
+		tier = get_tier_to_isolate(lruvec, type);
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	if (isolated < 0)
 | 
			
		||||
+		isolated = *nr_to_scan = 0;
 | 
			
		||||
+
 | 
			
		||||
+	*type_to_scan = type;
 | 
			
		||||
+
 | 
			
		||||
+	return isolated;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+/* Main function used by the foreground, the background and the user-triggered eviction. */
 | 
			
		||||
+static bool evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
 | 
			
		||||
+			long *nr_to_scan)
 | 
			
		||||
+{
 | 
			
		||||
+	int type;
 | 
			
		||||
+	int isolated;
 | 
			
		||||
+	int reclaimed;
 | 
			
		||||
+	LIST_HEAD(list);
 | 
			
		||||
+	struct page *page;
 | 
			
		||||
+	enum vm_event_item item;
 | 
			
		||||
+	struct reclaim_stat stat;
 | 
			
		||||
+	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 | 
			
		||||
+
 | 
			
		||||
+	spin_lock_irq(&lruvec->lru_lock);
 | 
			
		||||
+
 | 
			
		||||
+	isolated = isolate_pages(lruvec, sc, swappiness, nr_to_scan, &type, &list);
 | 
			
		||||
+	VM_BUG_ON(list_empty(&list) == !!isolated);
 | 
			
		||||
+
 | 
			
		||||
+	if (isolated)
 | 
			
		||||
+		__mod_node_page_state(pgdat, NR_ISOLATED_ANON + type, isolated);
 | 
			
		||||
+
 | 
			
		||||
+	spin_unlock_irq(&lruvec->lru_lock);
 | 
			
		||||
+
 | 
			
		||||
+	if (!isolated)
 | 
			
		||||
+		goto done;
 | 
			
		||||
+
 | 
			
		||||
+	reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false);
 | 
			
		||||
+	/*
 | 
			
		||||
+	 * We need to prevent rejected pages from being added back to the same
 | 
			
		||||
+	 * lists they were isolated from. Otherwise we may risk looping on them
 | 
			
		||||
+	 * forever. We use PageActive() or !PageReferenced() && PageWorkingset()
 | 
			
		||||
+	 * to tell lru_gen_addition() not to add them to the oldest generation.
 | 
			
		||||
+	 */
 | 
			
		||||
+	list_for_each_entry(page, &list, lru) {
 | 
			
		||||
+		if (PageMlocked(page))
 | 
			
		||||
+			continue;
 | 
			
		||||
+
 | 
			
		||||
+		if (page_mapped(page) && PageReferenced(page))
 | 
			
		||||
+			SetPageActive(page);
 | 
			
		||||
+		else {
 | 
			
		||||
+			ClearPageActive(page);
 | 
			
		||||
+			SetPageWorkingset(page);
 | 
			
		||||
+		}
 | 
			
		||||
+		ClearPageReferenced(page);
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	spin_lock_irq(&lruvec->lru_lock);
 | 
			
		||||
+
 | 
			
		||||
+	move_pages_to_lru(lruvec, &list);
 | 
			
		||||
+
 | 
			
		||||
+	__mod_node_page_state(pgdat, NR_ISOLATED_ANON + type, -isolated);
 | 
			
		||||
+
 | 
			
		||||
+	item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
 | 
			
		||||
+	if (!cgroup_reclaim(sc))
 | 
			
		||||
+		__count_vm_events(item, reclaimed);
 | 
			
		||||
+	__count_memcg_events(lruvec_memcg(lruvec), item, reclaimed);
 | 
			
		||||
+	__count_vm_events(PGSTEAL_ANON + type, reclaimed);
 | 
			
		||||
+
 | 
			
		||||
+	spin_unlock_irq(&lruvec->lru_lock);
 | 
			
		||||
+
 | 
			
		||||
+	mem_cgroup_uncharge_list(&list);
 | 
			
		||||
+	free_unref_page_list(&list);
 | 
			
		||||
+
 | 
			
		||||
+	sc->nr_reclaimed += reclaimed;
 | 
			
		||||
+done:
 | 
			
		||||
+	return *nr_to_scan > 0 && sc->nr_reclaimed < sc->nr_to_reclaim;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+/******************************************************************************
 | 
			
		||||
+ *                          page reclaim
 | 
			
		||||
+ ******************************************************************************/
 | 
			
		||||
+
 | 
			
		||||
+static int get_swappiness(struct lruvec *lruvec)
 | 
			
		||||
+{
 | 
			
		||||
+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 | 
			
		||||
+	int swappiness = mem_cgroup_get_nr_swap_pages(memcg) >= (long)SWAP_CLUSTER_MAX ?
 | 
			
		||||
+			 mem_cgroup_swappiness(memcg) : 0;
 | 
			
		||||
+
 | 
			
		||||
+	VM_BUG_ON(swappiness > 200U);
 | 
			
		||||
+
 | 
			
		||||
+	return swappiness;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
 | 
			
		||||
+				    int swappiness)
 | 
			
		||||
+{
 | 
			
		||||
+	int gen, type, zone;
 | 
			
		||||
+	long nr_to_scan = 0;
 | 
			
		||||
+	struct lrugen *lrugen = &lruvec->evictable;
 | 
			
		||||
+	DEFINE_MAX_SEQ();
 | 
			
		||||
+	DEFINE_MIN_SEQ();
 | 
			
		||||
+
 | 
			
		||||
+	lru_add_drain();
 | 
			
		||||
+
 | 
			
		||||
+	for (type = !swappiness; type < ANON_AND_FILE; type++) {
 | 
			
		||||
+		unsigned long seq;
 | 
			
		||||
+
 | 
			
		||||
+		for (seq = min_seq[type]; seq <= max_seq; seq++) {
 | 
			
		||||
+			gen = lru_gen_from_seq(seq);
 | 
			
		||||
+
 | 
			
		||||
+			for (zone = 0; zone <= sc->reclaim_idx; zone++)
 | 
			
		||||
+				nr_to_scan += READ_ONCE(lrugen->sizes[gen][type][zone]);
 | 
			
		||||
+		}
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	nr_to_scan = max(nr_to_scan, 0L);
 | 
			
		||||
+	nr_to_scan = round_up(nr_to_scan >> sc->priority, SWAP_CLUSTER_MAX);
 | 
			
		||||
+
 | 
			
		||||
+	if (max_nr_gens(max_seq, min_seq, swappiness) > MIN_NR_GENS)
 | 
			
		||||
+		return nr_to_scan;
 | 
			
		||||
+
 | 
			
		||||
+	/* kswapd uses lru_gen_age_node() */
 | 
			
		||||
+	if (current_is_kswapd())
 | 
			
		||||
+		return 0;
 | 
			
		||||
+
 | 
			
		||||
+	return walk_mm_list(lruvec, max_seq, sc, swappiness, NULL) ? nr_to_scan : 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 | 
			
		||||
+{
 | 
			
		||||
+	struct blk_plug plug;
 | 
			
		||||
+	unsigned long scanned = 0;
 | 
			
		||||
+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 | 
			
		||||
+
 | 
			
		||||
+	blk_start_plug(&plug);
 | 
			
		||||
+
 | 
			
		||||
+	while (true) {
 | 
			
		||||
+		long nr_to_scan;
 | 
			
		||||
+		int swappiness = sc->may_swap ? get_swappiness(lruvec) : 0;
 | 
			
		||||
+
 | 
			
		||||
+		nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness) - scanned;
 | 
			
		||||
+		if (nr_to_scan < (long)SWAP_CLUSTER_MAX)
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+		scanned += nr_to_scan;
 | 
			
		||||
+
 | 
			
		||||
+		if (!evict_pages(lruvec, sc, swappiness, &nr_to_scan))
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+		scanned -= nr_to_scan;
 | 
			
		||||
+
 | 
			
		||||
+		if (mem_cgroup_below_min(memcg) ||
 | 
			
		||||
+		    (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
 | 
			
		||||
+			break;
 | 
			
		||||
+
 | 
			
		||||
+		cond_resched();
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	blk_finish_plug(&plug);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+/******************************************************************************
 | 
			
		||||
+ *                          the background aging
 | 
			
		||||
+ ******************************************************************************/
 | 
			
		||||
+
 | 
			
		||||
+static int lru_gen_spread = MIN_NR_GENS;
 | 
			
		||||
+
 | 
			
		||||
+static void try_walk_mm_list(struct lruvec *lruvec, struct scan_control *sc)
 | 
			
		||||
+{
 | 
			
		||||
+	int gen, type, zone;
 | 
			
		||||
+	long old_and_young[2] = {};
 | 
			
		||||
+	int spread = READ_ONCE(lru_gen_spread);
 | 
			
		||||
+	int swappiness = get_swappiness(lruvec);
 | 
			
		||||
+	struct lrugen *lrugen = &lruvec->evictable;
 | 
			
		||||
+	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 | 
			
		||||
+	DEFINE_MAX_SEQ();
 | 
			
		||||
+	DEFINE_MIN_SEQ();
 | 
			
		||||
+
 | 
			
		||||
+	lru_add_drain();
 | 
			
		||||
+
 | 
			
		||||
+	for (type = !swappiness; type < ANON_AND_FILE; type++) {
 | 
			
		||||
+		unsigned long seq;
 | 
			
		||||
+
 | 
			
		||||
+		for (seq = min_seq[type]; seq <= max_seq; seq++) {
 | 
			
		||||
+			gen = lru_gen_from_seq(seq);
 | 
			
		||||
+
 | 
			
		||||
+			for (zone = 0; zone < MAX_NR_ZONES; zone++)
 | 
			
		||||
+				old_and_young[seq == max_seq] +=
 | 
			
		||||
+					READ_ONCE(lrugen->sizes[gen][type][zone]);
 | 
			
		||||
+		}
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	old_and_young[0] = max(old_and_young[0], 0L);
 | 
			
		||||
+	old_and_young[1] = max(old_and_young[1], 0L);
 | 
			
		||||
+
 | 
			
		||||
+	/* try to spread pages out across spread+1 generations */
 | 
			
		||||
+	if (old_and_young[0] >= old_and_young[1] * spread &&
 | 
			
		||||
+	    min_nr_gens(max_seq, min_seq, swappiness) > max(spread, MIN_NR_GENS))
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
+	walk_mm_list(lruvec, max_seq, sc, swappiness, pgdat->mm_walk_args);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
 | 
			
		||||
+{
 | 
			
		||||
+	struct mem_cgroup *memcg;
 | 
			
		||||
+
 | 
			
		||||
+	VM_BUG_ON(!current_is_kswapd());
 | 
			
		||||
+
 | 
			
		||||
+	memcg = mem_cgroup_iter(NULL, NULL, NULL);
 | 
			
		||||
+	do {
 | 
			
		||||
+		struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
 | 
			
		||||
+
 | 
			
		||||
+		if (!mem_cgroup_below_min(memcg) &&
 | 
			
		||||
+		    (!mem_cgroup_below_low(memcg) || sc->memcg_low_reclaim))
 | 
			
		||||
+			try_walk_mm_list(lruvec, sc);
 | 
			
		||||
+
 | 
			
		||||
+		cond_resched();
 | 
			
		||||
+	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 /******************************************************************************
 | 
			
		||||
  *                          state change
 | 
			
		||||
  ******************************************************************************/
 | 
			
		||||
@@ -4172,6 +4663,21 @@ static int __meminit __maybe_unused lru_gen_online_mem(struct notifier_block *se
 | 
			
		||||
 	return NOTIFY_DONE;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+static void lru_gen_start_kswapd(int nid)
 | 
			
		||||
+{
 | 
			
		||||
+	struct pglist_data *pgdat = NODE_DATA(nid);
 | 
			
		||||
+
 | 
			
		||||
+	pgdat->mm_walk_args = kvzalloc_node(size_of_mm_walk_args(), GFP_KERNEL, nid);
 | 
			
		||||
+	WARN_ON_ONCE(!pgdat->mm_walk_args);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void lru_gen_stop_kswapd(int nid)
 | 
			
		||||
+{
 | 
			
		||||
+	struct pglist_data *pgdat = NODE_DATA(nid);
 | 
			
		||||
+
 | 
			
		||||
+	kvfree(pgdat->mm_walk_args);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 /******************************************************************************
 | 
			
		||||
  *                          initialization
 | 
			
		||||
  ******************************************************************************/
 | 
			
		||||
@@ -4220,6 +4726,24 @@ static int __init init_lru_gen(void)
 | 
			
		||||
  */
 | 
			
		||||
 arch_initcall(init_lru_gen);
 | 
			
		||||
 
 | 
			
		||||
+#else /* CONFIG_LRU_GEN */
 | 
			
		||||
+
 | 
			
		||||
+static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 | 
			
		||||
+{
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
 | 
			
		||||
+{
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void lru_gen_start_kswapd(int nid)
 | 
			
		||||
+{
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void lru_gen_stop_kswapd(int nid)
 | 
			
		||||
+{
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 #endif /* CONFIG_LRU_GEN */
 | 
			
		||||
 
 | 
			
		||||
 static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 | 
			
		||||
@@ -4233,6 +4757,11 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 | 
			
		||||
 	struct blk_plug plug;
 | 
			
		||||
 	bool scan_adjusted;
 | 
			
		||||
 
 | 
			
		||||
+	if (lru_gen_enabled()) {
 | 
			
		||||
+		lru_gen_shrink_lruvec(lruvec, sc);
 | 
			
		||||
+		return;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
 	get_scan_count(lruvec, sc, nr);
 | 
			
		||||
 
 | 
			
		||||
 	/* Record the original scan target for proportional adjustments later */
 | 
			
		||||
@@ -4699,6 +5228,9 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat)
 | 
			
		||||
 	struct lruvec *target_lruvec;
 | 
			
		||||
 	unsigned long refaults;
 | 
			
		||||
 
 | 
			
		||||
+	if (lru_gen_enabled())
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
 	target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
 | 
			
		||||
 	refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON);
 | 
			
		||||
 	target_lruvec->refaults[0] = refaults;
 | 
			
		||||
@@ -5073,6 +5605,11 @@ static void age_active_anon(struct pglist_data *pgdat,
 | 
			
		||||
 	struct mem_cgroup *memcg;
 | 
			
		||||
 	struct lruvec *lruvec;
 | 
			
		||||
 
 | 
			
		||||
+	if (lru_gen_enabled()) {
 | 
			
		||||
+		lru_gen_age_node(pgdat, sc);
 | 
			
		||||
+		return;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
 	if (!total_swap_pages)
 | 
			
		||||
 		return;
 | 
			
		||||
 
 | 
			
		||||
@@ -5753,6 +6290,8 @@ int kswapd_run(int nid)
 | 
			
		||||
 	if (pgdat->kswapd)
 | 
			
		||||
 		return 0;
 | 
			
		||||
 
 | 
			
		||||
+	lru_gen_start_kswapd(nid);
 | 
			
		||||
+
 | 
			
		||||
 	pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
 | 
			
		||||
 	if (IS_ERR(pgdat->kswapd)) {
 | 
			
		||||
 		/* failure at boot is fatal */
 | 
			
		||||
@@ -5775,6 +6314,7 @@ void kswapd_stop(int nid)
 | 
			
		||||
 	if (kswapd) {
 | 
			
		||||
 		kthread_stop(kswapd);
 | 
			
		||||
 		NODE_DATA(nid)->kswapd = NULL;
 | 
			
		||||
+		lru_gen_stop_kswapd(nid);
 | 
			
		||||
 	}
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
-- 
 | 
			
		||||
2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,572 @@
 | 
			
		||||
From mboxrd@z Thu Jan  1 00:00:00 1970
 | 
			
		||||
Return-Path: <linux-kernel-owner@kernel.org>
 | 
			
		||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
 | 
			
		||||
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
 | 
			
		||||
X-Spam-Level: 
 | 
			
		||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
 | 
			
		||||
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
 | 
			
		||||
	INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
 | 
			
		||||
	USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
 | 
			
		||||
	version=3.4.0
 | 
			
		||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
 | 
			
		||||
	by smtp.lore.kernel.org (Postfix) with ESMTP id 17ED0C43460
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:41 +0000 (UTC)
 | 
			
		||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
 | 
			
		||||
	by mail.kernel.org (Postfix) with ESMTP id E04C861184
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:40 +0000 (UTC)
 | 
			
		||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 | 
			
		||||
        id S231223AbhETG4A (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:56:00 -0400
 | 
			
		||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37944 "EHLO
 | 
			
		||||
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 | 
			
		||||
        with ESMTP id S230473AbhETGzl (ORCPT
 | 
			
		||||
        <rfc822;linux-kernel@vger.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:41 -0400
 | 
			
		||||
Received: from mail-qv1-xf49.google.com (mail-qv1-xf49.google.com [IPv6:2607:f8b0:4864:20::f49])
 | 
			
		||||
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C0A0DC061761
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:20 -0700 (PDT)
 | 
			
		||||
Received: by mail-qv1-xf49.google.com with SMTP id d9-20020a0ce4490000b02901f0bee07112so6151672qvm.7
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:20 -0700 (PDT)
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=google.com; s=20161025;
 | 
			
		||||
        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
 | 
			
		||||
         :cc;
 | 
			
		||||
        bh=D+kCP8KjWdhzq6b9AfWqzFHrIcC1HBgTAlg7o1thC8s=;
 | 
			
		||||
        b=Ao3JFmOKgU6GUK7wOdKwO7smRq1lLjob3ltec82Ju9mPzN+QmdjLHzBqk1xnUggESF
 | 
			
		||||
         TqhhI3jybr858NfIj3PCXK9+qR3zojc5Pd/Quyp44VSHbor2BjBUQqP/t8M487uM4XwV
 | 
			
		||||
         WngIjYnvrYzwh9qjiSWbyBv7yV1ee386Z4r6QxKE99zk0yauu04cnFkSyQcJzvL7ST9Y
 | 
			
		||||
         gunIrZGlwh/QB3VgMvJBx8LLRtENwU2C6hFb2JqIhNx7ECiYmfTdxZ3hqTeciT6fp1mo
 | 
			
		||||
         VJhTuLMD0zN+BmbL7udJFNaRaLEzDq8aaX3Qgn7+HzfVXcaIkWuHdLfLiqx6NOEuXJPh
 | 
			
		||||
         aFOw==
 | 
			
		||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=1e100.net; s=20161025;
 | 
			
		||||
        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
 | 
			
		||||
         :references:subject:from:to:cc;
 | 
			
		||||
        bh=D+kCP8KjWdhzq6b9AfWqzFHrIcC1HBgTAlg7o1thC8s=;
 | 
			
		||||
        b=Gjw4qCU2aoOaAwRGh+lY4+hcMXHU7TPGrsgdc0GeQGBjEbSelYAeLx6lfapzEMs4gS
 | 
			
		||||
         OINghBuL7TEDPHzWY92K4Snh4Pm597qGEmIgplE4cMHoWrN8rxc/C+gB/gsW/UgvllX2
 | 
			
		||||
         o0zgNR9ve4/y3vOdD7xGYl0wDq608mGKsoRKDgVf/SEkDCldm1xmB/MaJihWPSw4niAH
 | 
			
		||||
         KRDP84OugEgIRgRj3MrqoREu5cyjw4ClxbQ8HeaRnw1wt6isXGqlXjiBVveyjSbrV+Q/
 | 
			
		||||
         luG3YEEGwMlCYbMovQTSmBB7n0pN8Ihg0qVPmr6GcmbpcwYKQWv1tIves1vbV0kdb4aN
 | 
			
		||||
         u9HQ==
 | 
			
		||||
X-Gm-Message-State: AOAM530vodqQk47GPTvruvJy4njfXeKD7559Rhxl39MVYv2cMgQ/XPuI
 | 
			
		||||
        uuzSatZDrJOCQGdTQCyuRTP/IMipOlM=
 | 
			
		||||
X-Google-Smtp-Source: ABdhPJwiri0QbWt8YjsEa+N+Ooz0Ku0LVYpwKy1ZvcZJzOwoHQf1X931BLtxTF10spH4XfRsXE5x6SYzq+w=
 | 
			
		||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
 | 
			
		||||
 (user=yuzhao job=sendgmr) by 2002:ad4:5767:: with SMTP id r7mr3879143qvx.1.1621493659852;
 | 
			
		||||
 Wed, 19 May 2021 23:54:19 -0700 (PDT)
 | 
			
		||||
Date:   Thu, 20 May 2021 00:53:53 -0600
 | 
			
		||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
Message-Id: <20210520065355.2736558-13-yuzhao@google.com>
 | 
			
		||||
Mime-Version: 1.0
 | 
			
		||||
References: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
Subject: [PATCH v3 12/14] mm: multigenerational lru: user interface
 | 
			
		||||
From:   Yu Zhao <yuzhao@google.com>
 | 
			
		||||
To:     linux-mm@kvack.org
 | 
			
		||||
Cc:     Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
 | 
			
		||||
        Andrew Morton <akpm@linux-foundation.org>,
 | 
			
		||||
        Dave Chinner <david@fromorbit.com>,
 | 
			
		||||
        Dave Hansen <dave.hansen@linux.intel.com>,
 | 
			
		||||
        Donald Carr <sirspudd@gmail.com>,
 | 
			
		||||
        Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
 | 
			
		||||
        Johannes Weiner <hannes@cmpxchg.org>,
 | 
			
		||||
        Jonathan Corbet <corbet@lwn.net>,
 | 
			
		||||
        Joonsoo Kim <iamjoonsoo.kim@lge.com>,
 | 
			
		||||
        Konstantin Kharlamov <hi-angel@yandex.ru>,
 | 
			
		||||
        Marcus Seyfarth <m.seyfarth@gmail.com>,
 | 
			
		||||
        Matthew Wilcox <willy@infradead.org>,
 | 
			
		||||
        Mel Gorman <mgorman@suse.de>,
 | 
			
		||||
        Miaohe Lin <linmiaohe@huawei.com>,
 | 
			
		||||
        Michael Larabel <michael@michaellarabel.com>,
 | 
			
		||||
        Michal Hocko <mhocko@suse.com>,
 | 
			
		||||
        Michel Lespinasse <michel@lespinasse.org>,
 | 
			
		||||
        Rik van Riel <riel@surriel.com>,
 | 
			
		||||
        Roman Gushchin <guro@fb.com>,
 | 
			
		||||
        Tim Chen <tim.c.chen@linux.intel.com>,
 | 
			
		||||
        Vlastimil Babka <vbabka@suse.cz>,
 | 
			
		||||
        Yang Shi <shy828301@gmail.com>,
 | 
			
		||||
        Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
 | 
			
		||||
        linux-kernel@vger.kernel.org, lkp@lists.01.org,
 | 
			
		||||
        page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
 | 
			
		||||
        Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
Content-Type: text/plain; charset="UTF-8"
 | 
			
		||||
Precedence: bulk
 | 
			
		||||
List-ID: <linux-kernel.vger.kernel.org>
 | 
			
		||||
X-Mailing-List: linux-kernel@vger.kernel.org
 | 
			
		||||
List-Archive: <https://lore.kernel.org/lkml/>
 | 
			
		||||
 | 
			
		||||
Add a sysfs file /sys/kernel/mm/lru_gen/enabled to enable and disable
 | 
			
		||||
the multigenerational lru at runtime.
 | 
			
		||||
 | 
			
		||||
Add a sysfs file /sys/kernel/mm/lru_gen/spread to optionally spread
 | 
			
		||||
pages out across more than three generations. More generations make
 | 
			
		||||
the background aging more aggressive.
 | 
			
		||||
 | 
			
		||||
Add a debugfs file /sys/kernel/debug/lru_gen to monitor the
 | 
			
		||||
multigenerational lru and trigger the aging and the eviction. This
 | 
			
		||||
file has the following output:
 | 
			
		||||
  memcg  memcg_id  memcg_path
 | 
			
		||||
    node  node_id
 | 
			
		||||
      min_gen  birth_time  anon_size  file_size
 | 
			
		||||
      ...
 | 
			
		||||
      max_gen  birth_time  anon_size  file_size
 | 
			
		||||
 | 
			
		||||
Given a memcg and a node, "min_gen" is the oldest generation (number)
 | 
			
		||||
and "max_gen" is the youngest. Birth time is in milliseconds. The
 | 
			
		||||
sizes of anon and file types are in pages.
 | 
			
		||||
 | 
			
		||||
This file takes the following input:
 | 
			
		||||
  + memcg_id node_id gen [swappiness]
 | 
			
		||||
  - memcg_id node_id gen [swappiness] [nr_to_reclaim]
 | 
			
		||||
 | 
			
		||||
The first command line accounts referenced pages to generation
 | 
			
		||||
"max_gen" and creates the next generation "max_gen"+1. In this case,
 | 
			
		||||
"gen" should be equal to "max_gen". A swap file and a non-zero
 | 
			
		||||
"swappiness" are required to scan anon type. If swapping is not
 | 
			
		||||
desired, set vm.swappiness to 0. The second command line evicts
 | 
			
		||||
generations less than or equal to "gen". In this case, "gen" should be
 | 
			
		||||
less than "max_gen"-1 as "max_gen" and "max_gen"-1 are active
 | 
			
		||||
generations and therefore protected from the eviction. Use
 | 
			
		||||
"nr_to_reclaim" to limit the number of pages to evict. Multiple
 | 
			
		||||
command lines are supported, so does concatenation with delimiters ","
 | 
			
		||||
and ";".
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
 | 
			
		||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
---
 | 
			
		||||
 mm/vmscan.c | 403 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 | 
			
		||||
 1 file changed, 403 insertions(+)
 | 
			
		||||
 | 
			
		||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
 | 
			
		||||
index 2f86dcc04c56..ff2deec24c64 100644
 | 
			
		||||
--- a/mm/vmscan.c
 | 
			
		||||
+++ b/mm/vmscan.c
 | 
			
		||||
@@ -52,6 +52,8 @@
 | 
			
		||||
 #include <linux/memory.h>
 | 
			
		||||
 #include <linux/pagewalk.h>
 | 
			
		||||
 #include <linux/shmem_fs.h>
 | 
			
		||||
+#include <linux/ctype.h>
 | 
			
		||||
+#include <linux/debugfs.h>
 | 
			
		||||
 
 | 
			
		||||
 #include <asm/tlbflush.h>
 | 
			
		||||
 #include <asm/div64.h>
 | 
			
		||||
@@ -4678,6 +4680,401 @@ static void lru_gen_stop_kswapd(int nid)
 | 
			
		||||
 	kvfree(pgdat->mm_walk_args);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+/******************************************************************************
 | 
			
		||||
+ *                          sysfs interface
 | 
			
		||||
+ ******************************************************************************/
 | 
			
		||||
+
 | 
			
		||||
+static ssize_t show_lru_gen_spread(struct kobject *kobj, struct kobj_attribute *attr,
 | 
			
		||||
+				   char *buf)
 | 
			
		||||
+{
 | 
			
		||||
+	return sprintf(buf, "%d\n", READ_ONCE(lru_gen_spread));
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static ssize_t store_lru_gen_spread(struct kobject *kobj, struct kobj_attribute *attr,
 | 
			
		||||
+				    const char *buf, size_t len)
 | 
			
		||||
+{
 | 
			
		||||
+	int spread;
 | 
			
		||||
+
 | 
			
		||||
+	if (kstrtoint(buf, 10, &spread) || spread >= MAX_NR_GENS)
 | 
			
		||||
+		return -EINVAL;
 | 
			
		||||
+
 | 
			
		||||
+	WRITE_ONCE(lru_gen_spread, spread);
 | 
			
		||||
+
 | 
			
		||||
+	return len;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static struct kobj_attribute lru_gen_spread_attr = __ATTR(
 | 
			
		||||
+	spread, 0644, show_lru_gen_spread, store_lru_gen_spread
 | 
			
		||||
+);
 | 
			
		||||
+
 | 
			
		||||
+static ssize_t show_lru_gen_enabled(struct kobject *kobj, struct kobj_attribute *attr,
 | 
			
		||||
+				    char *buf)
 | 
			
		||||
+{
 | 
			
		||||
+	return snprintf(buf, PAGE_SIZE, "%d\n", lru_gen_enabled());
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static ssize_t store_lru_gen_enabled(struct kobject *kobj, struct kobj_attribute *attr,
 | 
			
		||||
+				     const char *buf, size_t len)
 | 
			
		||||
+{
 | 
			
		||||
+	int enable;
 | 
			
		||||
+
 | 
			
		||||
+	if (kstrtoint(buf, 10, &enable))
 | 
			
		||||
+		return -EINVAL;
 | 
			
		||||
+
 | 
			
		||||
+	lru_gen_set_state(enable, true, false);
 | 
			
		||||
+
 | 
			
		||||
+	return len;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static struct kobj_attribute lru_gen_enabled_attr = __ATTR(
 | 
			
		||||
+	enabled, 0644, show_lru_gen_enabled, store_lru_gen_enabled
 | 
			
		||||
+);
 | 
			
		||||
+
 | 
			
		||||
+static struct attribute *lru_gen_attrs[] = {
 | 
			
		||||
+	&lru_gen_spread_attr.attr,
 | 
			
		||||
+	&lru_gen_enabled_attr.attr,
 | 
			
		||||
+	NULL
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+static struct attribute_group lru_gen_attr_group = {
 | 
			
		||||
+	.name = "lru_gen",
 | 
			
		||||
+	.attrs = lru_gen_attrs,
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+/******************************************************************************
 | 
			
		||||
+ *                          debugfs interface
 | 
			
		||||
+ ******************************************************************************/
 | 
			
		||||
+
 | 
			
		||||
+static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos)
 | 
			
		||||
+{
 | 
			
		||||
+	struct mem_cgroup *memcg;
 | 
			
		||||
+	loff_t nr_to_skip = *pos;
 | 
			
		||||
+
 | 
			
		||||
+	m->private = kzalloc(PATH_MAX, GFP_KERNEL);
 | 
			
		||||
+	if (!m->private)
 | 
			
		||||
+		return ERR_PTR(-ENOMEM);
 | 
			
		||||
+
 | 
			
		||||
+	memcg = mem_cgroup_iter(NULL, NULL, NULL);
 | 
			
		||||
+	do {
 | 
			
		||||
+		int nid;
 | 
			
		||||
+
 | 
			
		||||
+		for_each_node_state(nid, N_MEMORY) {
 | 
			
		||||
+			if (!nr_to_skip--)
 | 
			
		||||
+				return mem_cgroup_lruvec(memcg, NODE_DATA(nid));
 | 
			
		||||
+		}
 | 
			
		||||
+	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
 | 
			
		||||
+
 | 
			
		||||
+	return NULL;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void lru_gen_seq_stop(struct seq_file *m, void *v)
 | 
			
		||||
+{
 | 
			
		||||
+	if (!IS_ERR_OR_NULL(v))
 | 
			
		||||
+		mem_cgroup_iter_break(NULL, lruvec_memcg(v));
 | 
			
		||||
+
 | 
			
		||||
+	kfree(m->private);
 | 
			
		||||
+	m->private = NULL;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos)
 | 
			
		||||
+{
 | 
			
		||||
+	int nid = lruvec_pgdat(v)->node_id;
 | 
			
		||||
+	struct mem_cgroup *memcg = lruvec_memcg(v);
 | 
			
		||||
+
 | 
			
		||||
+	++*pos;
 | 
			
		||||
+
 | 
			
		||||
+	nid = next_memory_node(nid);
 | 
			
		||||
+	if (nid == MAX_NUMNODES) {
 | 
			
		||||
+		memcg = mem_cgroup_iter(NULL, memcg, NULL);
 | 
			
		||||
+		if (!memcg)
 | 
			
		||||
+			return NULL;
 | 
			
		||||
+
 | 
			
		||||
+		nid = first_memory_node;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	return mem_cgroup_lruvec(memcg, NODE_DATA(nid));
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
 | 
			
		||||
+				  unsigned long max_seq, unsigned long *min_seq,
 | 
			
		||||
+				  unsigned long seq)
 | 
			
		||||
+{
 | 
			
		||||
+	int i;
 | 
			
		||||
+	int type, tier;
 | 
			
		||||
+	int hist = hist_from_seq_or_gen(seq);
 | 
			
		||||
+	struct lrugen *lrugen = &lruvec->evictable;
 | 
			
		||||
+	int nid = lruvec_pgdat(lruvec)->node_id;
 | 
			
		||||
+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 | 
			
		||||
+	struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
 | 
			
		||||
+
 | 
			
		||||
+	for (tier = 0; tier < MAX_NR_TIERS; tier++) {
 | 
			
		||||
+		seq_printf(m, "            %10d", tier);
 | 
			
		||||
+		for (type = 0; type < ANON_AND_FILE; type++) {
 | 
			
		||||
+			unsigned long n[3] = {};
 | 
			
		||||
+
 | 
			
		||||
+			if (seq == max_seq) {
 | 
			
		||||
+				n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]);
 | 
			
		||||
+				n[1] = READ_ONCE(lrugen->avg_total[type][tier]);
 | 
			
		||||
+
 | 
			
		||||
+				seq_printf(m, " %10luR %10luT %10lu ", n[0], n[1], n[2]);
 | 
			
		||||
+			} else if (seq == min_seq[type] || NR_STAT_GENS > 1) {
 | 
			
		||||
+				n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]);
 | 
			
		||||
+				n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]);
 | 
			
		||||
+				if (tier)
 | 
			
		||||
+					n[2] = READ_ONCE(lrugen->activated[hist][type][tier - 1]);
 | 
			
		||||
+
 | 
			
		||||
+				seq_printf(m, " %10lur %10lue %10lua", n[0], n[1], n[2]);
 | 
			
		||||
+			} else
 | 
			
		||||
+				seq_puts(m, "          0           0           0 ");
 | 
			
		||||
+		}
 | 
			
		||||
+		seq_putc(m, '\n');
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	seq_puts(m, "                      ");
 | 
			
		||||
+	for (i = 0; i < NR_MM_STATS; i++) {
 | 
			
		||||
+		if (seq == max_seq && NR_STAT_GENS == 1)
 | 
			
		||||
+			seq_printf(m, " %10lu%c", READ_ONCE(mm_list->nodes[nid].stats[hist][i]),
 | 
			
		||||
+				   toupper(MM_STAT_CODES[i]));
 | 
			
		||||
+		else if (seq != max_seq && NR_STAT_GENS > 1)
 | 
			
		||||
+			seq_printf(m, " %10lu%c", READ_ONCE(mm_list->nodes[nid].stats[hist][i]),
 | 
			
		||||
+				   MM_STAT_CODES[i]);
 | 
			
		||||
+		else
 | 
			
		||||
+			seq_puts(m, "          0 ");
 | 
			
		||||
+	}
 | 
			
		||||
+	seq_putc(m, '\n');
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int lru_gen_seq_show(struct seq_file *m, void *v)
 | 
			
		||||
+{
 | 
			
		||||
+	unsigned long seq;
 | 
			
		||||
+	bool full = !debugfs_real_fops(m->file)->write;
 | 
			
		||||
+	struct lruvec *lruvec = v;
 | 
			
		||||
+	struct lrugen *lrugen = &lruvec->evictable;
 | 
			
		||||
+	int nid = lruvec_pgdat(lruvec)->node_id;
 | 
			
		||||
+	struct mem_cgroup *memcg = lruvec_memcg(lruvec);
 | 
			
		||||
+	DEFINE_MAX_SEQ();
 | 
			
		||||
+	DEFINE_MIN_SEQ();
 | 
			
		||||
+
 | 
			
		||||
+	if (nid == first_memory_node) {
 | 
			
		||||
+#ifdef CONFIG_MEMCG
 | 
			
		||||
+		if (memcg)
 | 
			
		||||
+			cgroup_path(memcg->css.cgroup, m->private, PATH_MAX);
 | 
			
		||||
+#endif
 | 
			
		||||
+		seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), (char *)m->private);
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	seq_printf(m, " node %5d\n", nid);
 | 
			
		||||
+
 | 
			
		||||
+	seq = full ? (max_seq < MAX_NR_GENS ? 0 : max_seq - MAX_NR_GENS + 1) :
 | 
			
		||||
+		     min(min_seq[0], min_seq[1]);
 | 
			
		||||
+
 | 
			
		||||
+	for (; seq <= max_seq; seq++) {
 | 
			
		||||
+		int gen, type, zone;
 | 
			
		||||
+		unsigned int msecs;
 | 
			
		||||
+
 | 
			
		||||
+		gen = lru_gen_from_seq(seq);
 | 
			
		||||
+		msecs = jiffies_to_msecs(jiffies - READ_ONCE(lrugen->timestamps[gen]));
 | 
			
		||||
+
 | 
			
		||||
+		seq_printf(m, " %10lu %10u", seq, msecs);
 | 
			
		||||
+
 | 
			
		||||
+		for (type = 0; type < ANON_AND_FILE; type++) {
 | 
			
		||||
+			long size = 0;
 | 
			
		||||
+
 | 
			
		||||
+			if (seq < min_seq[type]) {
 | 
			
		||||
+				seq_puts(m, "         -0 ");
 | 
			
		||||
+				continue;
 | 
			
		||||
+			}
 | 
			
		||||
+
 | 
			
		||||
+			for (zone = 0; zone < MAX_NR_ZONES; zone++)
 | 
			
		||||
+				size += READ_ONCE(lrugen->sizes[gen][type][zone]);
 | 
			
		||||
+
 | 
			
		||||
+			seq_printf(m, " %10lu ", max(size, 0L));
 | 
			
		||||
+		}
 | 
			
		||||
+
 | 
			
		||||
+		seq_putc(m, '\n');
 | 
			
		||||
+
 | 
			
		||||
+		if (full)
 | 
			
		||||
+			lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq);
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static const struct seq_operations lru_gen_seq_ops = {
 | 
			
		||||
+	.start = lru_gen_seq_start,
 | 
			
		||||
+	.stop = lru_gen_seq_stop,
 | 
			
		||||
+	.next = lru_gen_seq_next,
 | 
			
		||||
+	.show = lru_gen_seq_show,
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+static int advance_max_seq(struct lruvec *lruvec, unsigned long seq, int swappiness)
 | 
			
		||||
+{
 | 
			
		||||
+	struct scan_control sc = {
 | 
			
		||||
+		.target_mem_cgroup = lruvec_memcg(lruvec),
 | 
			
		||||
+	};
 | 
			
		||||
+	DEFINE_MAX_SEQ();
 | 
			
		||||
+
 | 
			
		||||
+	if (seq == max_seq)
 | 
			
		||||
+		walk_mm_list(lruvec, max_seq, &sc, swappiness, NULL);
 | 
			
		||||
+
 | 
			
		||||
+	return seq > max_seq ? -EINVAL : 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int advance_min_seq(struct lruvec *lruvec, unsigned long seq, int swappiness,
 | 
			
		||||
+			   unsigned long nr_to_reclaim)
 | 
			
		||||
+{
 | 
			
		||||
+	struct blk_plug plug;
 | 
			
		||||
+	int err = -EINTR;
 | 
			
		||||
+	long nr_to_scan = LONG_MAX;
 | 
			
		||||
+	struct scan_control sc = {
 | 
			
		||||
+		.nr_to_reclaim = nr_to_reclaim,
 | 
			
		||||
+		.target_mem_cgroup = lruvec_memcg(lruvec),
 | 
			
		||||
+		.may_writepage = 1,
 | 
			
		||||
+		.may_unmap = 1,
 | 
			
		||||
+		.may_swap = 1,
 | 
			
		||||
+		.reclaim_idx = MAX_NR_ZONES - 1,
 | 
			
		||||
+		.gfp_mask = GFP_KERNEL,
 | 
			
		||||
+	};
 | 
			
		||||
+	DEFINE_MAX_SEQ();
 | 
			
		||||
+
 | 
			
		||||
+	if (seq >= max_seq - 1)
 | 
			
		||||
+		return -EINVAL;
 | 
			
		||||
+
 | 
			
		||||
+	blk_start_plug(&plug);
 | 
			
		||||
+
 | 
			
		||||
+	while (!signal_pending(current)) {
 | 
			
		||||
+		DEFINE_MIN_SEQ();
 | 
			
		||||
+
 | 
			
		||||
+		if (seq < min(min_seq[!swappiness], min_seq[swappiness < 200]) ||
 | 
			
		||||
+		    !evict_pages(lruvec, &sc, swappiness, &nr_to_scan)) {
 | 
			
		||||
+			err = 0;
 | 
			
		||||
+			break;
 | 
			
		||||
+		}
 | 
			
		||||
+
 | 
			
		||||
+		cond_resched();
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	blk_finish_plug(&plug);
 | 
			
		||||
+
 | 
			
		||||
+	return err;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int advance_seq(char cmd, int memcg_id, int nid, unsigned long seq,
 | 
			
		||||
+		       int swappiness, unsigned long nr_to_reclaim)
 | 
			
		||||
+{
 | 
			
		||||
+	struct lruvec *lruvec;
 | 
			
		||||
+	int err = -EINVAL;
 | 
			
		||||
+	struct mem_cgroup *memcg = NULL;
 | 
			
		||||
+
 | 
			
		||||
+	if (!mem_cgroup_disabled()) {
 | 
			
		||||
+		rcu_read_lock();
 | 
			
		||||
+		memcg = mem_cgroup_from_id(memcg_id);
 | 
			
		||||
+#ifdef CONFIG_MEMCG
 | 
			
		||||
+		if (memcg && !css_tryget(&memcg->css))
 | 
			
		||||
+			memcg = NULL;
 | 
			
		||||
+#endif
 | 
			
		||||
+		rcu_read_unlock();
 | 
			
		||||
+
 | 
			
		||||
+		if (!memcg)
 | 
			
		||||
+			goto done;
 | 
			
		||||
+	}
 | 
			
		||||
+	if (memcg_id != mem_cgroup_id(memcg))
 | 
			
		||||
+		goto done;
 | 
			
		||||
+
 | 
			
		||||
+	if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY))
 | 
			
		||||
+		goto done;
 | 
			
		||||
+
 | 
			
		||||
+	lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
 | 
			
		||||
+
 | 
			
		||||
+	if (swappiness == -1)
 | 
			
		||||
+		swappiness = get_swappiness(lruvec);
 | 
			
		||||
+	else if (swappiness > 200U)
 | 
			
		||||
+		goto done;
 | 
			
		||||
+
 | 
			
		||||
+	switch (cmd) {
 | 
			
		||||
+	case '+':
 | 
			
		||||
+		err = advance_max_seq(lruvec, seq, swappiness);
 | 
			
		||||
+		break;
 | 
			
		||||
+	case '-':
 | 
			
		||||
+		err = advance_min_seq(lruvec, seq, swappiness, nr_to_reclaim);
 | 
			
		||||
+		break;
 | 
			
		||||
+	}
 | 
			
		||||
+done:
 | 
			
		||||
+	mem_cgroup_put(memcg);
 | 
			
		||||
+
 | 
			
		||||
+	return err;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
 | 
			
		||||
+				 size_t len, loff_t *pos)
 | 
			
		||||
+{
 | 
			
		||||
+	void *buf;
 | 
			
		||||
+	char *cur, *next;
 | 
			
		||||
+	int err = 0;
 | 
			
		||||
+
 | 
			
		||||
+	buf = kvmalloc(len + 1, GFP_USER);
 | 
			
		||||
+	if (!buf)
 | 
			
		||||
+		return -ENOMEM;
 | 
			
		||||
+
 | 
			
		||||
+	if (copy_from_user(buf, src, len)) {
 | 
			
		||||
+		kvfree(buf);
 | 
			
		||||
+		return -EFAULT;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	next = buf;
 | 
			
		||||
+	next[len] = '\0';
 | 
			
		||||
+
 | 
			
		||||
+	while ((cur = strsep(&next, ",;\n"))) {
 | 
			
		||||
+		int n;
 | 
			
		||||
+		int end;
 | 
			
		||||
+		char cmd;
 | 
			
		||||
+		unsigned int memcg_id;
 | 
			
		||||
+		unsigned int nid;
 | 
			
		||||
+		unsigned long seq;
 | 
			
		||||
+		unsigned int swappiness = -1;
 | 
			
		||||
+		unsigned long nr_to_reclaim = -1;
 | 
			
		||||
+
 | 
			
		||||
+		cur = skip_spaces(cur);
 | 
			
		||||
+		if (!*cur)
 | 
			
		||||
+			continue;
 | 
			
		||||
+
 | 
			
		||||
+		n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid,
 | 
			
		||||
+			   &seq, &end, &swappiness, &end, &nr_to_reclaim, &end);
 | 
			
		||||
+		if (n < 4 || cur[end]) {
 | 
			
		||||
+			err = -EINVAL;
 | 
			
		||||
+			break;
 | 
			
		||||
+		}
 | 
			
		||||
+
 | 
			
		||||
+		err = advance_seq(cmd, memcg_id, nid, seq, swappiness, nr_to_reclaim);
 | 
			
		||||
+		if (err)
 | 
			
		||||
+			break;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	kvfree(buf);
 | 
			
		||||
+
 | 
			
		||||
+	return err ? : len;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int lru_gen_seq_open(struct inode *inode, struct file *file)
 | 
			
		||||
+{
 | 
			
		||||
+	return seq_open(file, &lru_gen_seq_ops);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static const struct file_operations lru_gen_rw_fops = {
 | 
			
		||||
+	.open = lru_gen_seq_open,
 | 
			
		||||
+	.read = seq_read,
 | 
			
		||||
+	.write = lru_gen_seq_write,
 | 
			
		||||
+	.llseek = seq_lseek,
 | 
			
		||||
+	.release = seq_release,
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
+static const struct file_operations lru_gen_ro_fops = {
 | 
			
		||||
+	.open = lru_gen_seq_open,
 | 
			
		||||
+	.read = seq_read,
 | 
			
		||||
+	.llseek = seq_lseek,
 | 
			
		||||
+	.release = seq_release,
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
 /******************************************************************************
 | 
			
		||||
  *                          initialization
 | 
			
		||||
  ******************************************************************************/
 | 
			
		||||
@@ -4718,6 +5115,12 @@ static int __init init_lru_gen(void)
 | 
			
		||||
 	if (hotplug_memory_notifier(lru_gen_online_mem, 0))
 | 
			
		||||
 		pr_err("lru_gen: failed to subscribe hotplug notifications\n");
 | 
			
		||||
 
 | 
			
		||||
+	if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
 | 
			
		||||
+		pr_err("lru_gen: failed to create sysfs group\n");
 | 
			
		||||
+
 | 
			
		||||
+	debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops);
 | 
			
		||||
+	debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops);
 | 
			
		||||
+
 | 
			
		||||
 	return 0;
 | 
			
		||||
 };
 | 
			
		||||
 /*
 | 
			
		||||
-- 
 | 
			
		||||
2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,177 @@
 | 
			
		||||
From mboxrd@z Thu Jan  1 00:00:00 1970
 | 
			
		||||
Return-Path: <linux-kernel-owner@kernel.org>
 | 
			
		||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
 | 
			
		||||
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
 | 
			
		||||
X-Spam-Level: 
 | 
			
		||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
 | 
			
		||||
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
 | 
			
		||||
	INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
 | 
			
		||||
	USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
 | 
			
		||||
	version=3.4.0
 | 
			
		||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
 | 
			
		||||
	by smtp.lore.kernel.org (Postfix) with ESMTP id 1B6E6C433B4
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:47 +0000 (UTC)
 | 
			
		||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
 | 
			
		||||
	by mail.kernel.org (Postfix) with ESMTP id 01DA3613BA
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:46 +0000 (UTC)
 | 
			
		||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 | 
			
		||||
        id S231250AbhETG4G (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:56:06 -0400
 | 
			
		||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37952 "EHLO
 | 
			
		||||
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 | 
			
		||||
        with ESMTP id S231130AbhETGzn (ORCPT
 | 
			
		||||
        <rfc822;linux-kernel@vger.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:43 -0400
 | 
			
		||||
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
 | 
			
		||||
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 38C6EC061574
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:22 -0700 (PDT)
 | 
			
		||||
Received: by mail-yb1-xb49.google.com with SMTP id e138-20020a25e7900000b029050df4b648fcso15235225ybh.7
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:22 -0700 (PDT)
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=google.com; s=20161025;
 | 
			
		||||
        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
 | 
			
		||||
         :cc;
 | 
			
		||||
        bh=RJZRi3A5hgCw5vurGae674wlMdAubVgl39rxTDtDyVU=;
 | 
			
		||||
        b=JLF6ekZqpQB+K5YXwsizPGhNysHViGntJ8r9yptD6ne6XqGKoI0Wr7dT82jfftMw45
 | 
			
		||||
         KOQGieGgRV+BKuZtTbu4cD96tkttpjfquqm9xT5G1x+H3lcXyVbFnA/e0Iz2BGxOx/oJ
 | 
			
		||||
         BP1OK9ib2xvMirn2ogaiMLLuQUMqUVLP4SPszQpLdYUpmsyvtcBjEJsyZiQXMIULinqi
 | 
			
		||||
         S2oaVggbJoWpCxB/3pF4W62fMm5D/LXGAxEWoOTfyY0Ng+NdQ206TROqcoNsbbncUKfa
 | 
			
		||||
         mpyuoyCTOYlALfoNN1kP2lNPrNTUz+UQK31nuEwEnfTBGdsmWTsTelrdxl+7zutfQ7Vh
 | 
			
		||||
         E6Ag==
 | 
			
		||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=1e100.net; s=20161025;
 | 
			
		||||
        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
 | 
			
		||||
         :references:subject:from:to:cc;
 | 
			
		||||
        bh=RJZRi3A5hgCw5vurGae674wlMdAubVgl39rxTDtDyVU=;
 | 
			
		||||
        b=I4OJloc251SYLj8eJFPP7eJ7uA2r6NXUj5S3hGT6Cv3INg/pbFfz6U56wMbDKeUUx2
 | 
			
		||||
         PFZUxNbewINRi+Xyu0XORumSFYK8cRNAA2xJjsiB/Mi20wJutqQp8eggHjJ4klnT3Arg
 | 
			
		||||
         fC/Qi7JDEKR9akyObrL1SszlU1EyBRMRlSuA56tL8Ayw3KSXAha5WNL73FfjPvnDX4Jn
 | 
			
		||||
         bmGmhKmr4OxIJyYH+35RFfAEzVEoAkRi3miAuWb8eWC6T+GXdpovk6EqvlqSAS4RZyph
 | 
			
		||||
         hWXp4amXtIPmK3meD1g4aF/hJ6IDATp3RWD8SfDV+tNQHU5Wvz3exLpVhhmIWwqq93UL
 | 
			
		||||
         V4jg==
 | 
			
		||||
X-Gm-Message-State: AOAM532JcEmtEayMiu5r4FpU7325mYFfTWklXmMaEvDn+KHT0zqmqSZv
 | 
			
		||||
        de4I079gO1eY+8FLxSzEABlOF2R4isA=
 | 
			
		||||
X-Google-Smtp-Source: ABdhPJxxpSuYeOHDU9e04y/REOjF/gJdL+d+nc25sd9W9QvVBy/CaC3vEGm8uybifzDpCi76iP47Kw7dnfI=
 | 
			
		||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
 | 
			
		||||
 (user=yuzhao job=sendgmr) by 2002:a05:6902:4b3:: with SMTP id
 | 
			
		||||
 r19mr5173987ybs.290.1621493661349; Wed, 19 May 2021 23:54:21 -0700 (PDT)
 | 
			
		||||
Date:   Thu, 20 May 2021 00:53:54 -0600
 | 
			
		||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
Message-Id: <20210520065355.2736558-14-yuzhao@google.com>
 | 
			
		||||
Mime-Version: 1.0
 | 
			
		||||
References: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
Subject: [PATCH v3 13/14] mm: multigenerational lru: Kconfig
 | 
			
		||||
From:   Yu Zhao <yuzhao@google.com>
 | 
			
		||||
To:     linux-mm@kvack.org
 | 
			
		||||
Cc:     Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
 | 
			
		||||
        Andrew Morton <akpm@linux-foundation.org>,
 | 
			
		||||
        Dave Chinner <david@fromorbit.com>,
 | 
			
		||||
        Dave Hansen <dave.hansen@linux.intel.com>,
 | 
			
		||||
        Donald Carr <sirspudd@gmail.com>,
 | 
			
		||||
        Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
 | 
			
		||||
        Johannes Weiner <hannes@cmpxchg.org>,
 | 
			
		||||
        Jonathan Corbet <corbet@lwn.net>,
 | 
			
		||||
        Joonsoo Kim <iamjoonsoo.kim@lge.com>,
 | 
			
		||||
        Konstantin Kharlamov <hi-angel@yandex.ru>,
 | 
			
		||||
        Marcus Seyfarth <m.seyfarth@gmail.com>,
 | 
			
		||||
        Matthew Wilcox <willy@infradead.org>,
 | 
			
		||||
        Mel Gorman <mgorman@suse.de>,
 | 
			
		||||
        Miaohe Lin <linmiaohe@huawei.com>,
 | 
			
		||||
        Michael Larabel <michael@michaellarabel.com>,
 | 
			
		||||
        Michal Hocko <mhocko@suse.com>,
 | 
			
		||||
        Michel Lespinasse <michel@lespinasse.org>,
 | 
			
		||||
        Rik van Riel <riel@surriel.com>,
 | 
			
		||||
        Roman Gushchin <guro@fb.com>,
 | 
			
		||||
        Tim Chen <tim.c.chen@linux.intel.com>,
 | 
			
		||||
        Vlastimil Babka <vbabka@suse.cz>,
 | 
			
		||||
        Yang Shi <shy828301@gmail.com>,
 | 
			
		||||
        Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
 | 
			
		||||
        linux-kernel@vger.kernel.org, lkp@lists.01.org,
 | 
			
		||||
        page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
 | 
			
		||||
        Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
Content-Type: text/plain; charset="UTF-8"
 | 
			
		||||
Precedence: bulk
 | 
			
		||||
List-ID: <linux-kernel.vger.kernel.org>
 | 
			
		||||
X-Mailing-List: linux-kernel@vger.kernel.org
 | 
			
		||||
List-Archive: <https://lore.kernel.org/lkml/>
 | 
			
		||||
 | 
			
		||||
Add configuration options for the multigenerational lru.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
 | 
			
		||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
---
 | 
			
		||||
 mm/Kconfig | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 | 
			
		||||
 1 file changed, 58 insertions(+)
 | 
			
		||||
 | 
			
		||||
diff --git a/mm/Kconfig b/mm/Kconfig
 | 
			
		||||
index 02d44e3420f5..da125f145bc4 100644
 | 
			
		||||
--- a/mm/Kconfig
 | 
			
		||||
+++ b/mm/Kconfig
 | 
			
		||||
@@ -901,4 +901,62 @@ config KMAP_LOCAL
 | 
			
		||||
 # struct io_mapping based helper.  Selected by drivers that need them
 | 
			
		||||
 config IO_MAPPING
 | 
			
		||||
 	bool
 | 
			
		||||
+
 | 
			
		||||
+# the multigenerational lru {
 | 
			
		||||
+config LRU_GEN
 | 
			
		||||
+	bool "Multigenerational LRU"
 | 
			
		||||
+	depends on MMU
 | 
			
		||||
+	help
 | 
			
		||||
+	  A high performance LRU implementation to heavily overcommit workloads
 | 
			
		||||
+	  that are not IO bound. See Documentation/vm/multigen_lru.rst for
 | 
			
		||||
+	  details.
 | 
			
		||||
+
 | 
			
		||||
+	  Warning: do not enable this option unless you plan to use it because
 | 
			
		||||
+	  it introduces a small per-process and per-memcg and per-node memory
 | 
			
		||||
+	  overhead.
 | 
			
		||||
+
 | 
			
		||||
+config LRU_GEN_ENABLED
 | 
			
		||||
+	bool "Turn on by default"
 | 
			
		||||
+	depends on LRU_GEN
 | 
			
		||||
+	help
 | 
			
		||||
+	  The default value of /sys/kernel/mm/lru_gen/enabled is 0. This option
 | 
			
		||||
+	  changes it to 1.
 | 
			
		||||
+
 | 
			
		||||
+	  Warning: the default value is the fast path. See
 | 
			
		||||
+	  Documentation/static-keys.txt for details.
 | 
			
		||||
+
 | 
			
		||||
+config LRU_GEN_STATS
 | 
			
		||||
+	bool "Full stats for debugging"
 | 
			
		||||
+	depends on LRU_GEN
 | 
			
		||||
+	help
 | 
			
		||||
+	  This option keeps full stats for each generation, which can be read
 | 
			
		||||
+	  from /sys/kernel/debug/lru_gen_full.
 | 
			
		||||
+
 | 
			
		||||
+	  Warning: do not enable this option unless you plan to use it because
 | 
			
		||||
+	  it introduces an additional small per-process and per-memcg and
 | 
			
		||||
+	  per-node memory overhead.
 | 
			
		||||
+
 | 
			
		||||
+config NR_LRU_GENS
 | 
			
		||||
+	int "Max number of generations"
 | 
			
		||||
+	depends on LRU_GEN
 | 
			
		||||
+	range 4 31
 | 
			
		||||
+	default 7
 | 
			
		||||
+	help
 | 
			
		||||
+	  This will use order_base_2(N+1) spare bits from page flags.
 | 
			
		||||
+
 | 
			
		||||
+	  Warning: do not use numbers larger than necessary because each
 | 
			
		||||
+	  generation introduces a small per-node and per-memcg memory overhead.
 | 
			
		||||
+
 | 
			
		||||
+config TIERS_PER_GEN
 | 
			
		||||
+	int "Number of tiers per generation"
 | 
			
		||||
+	depends on LRU_GEN
 | 
			
		||||
+	range 2 5
 | 
			
		||||
+	default 4
 | 
			
		||||
+	help
 | 
			
		||||
+	  This will use N-2 spare bits from page flags.
 | 
			
		||||
+
 | 
			
		||||
+	  Larger values generally offer better protection to active pages under
 | 
			
		||||
+	  heavy buffered I/O workloads.
 | 
			
		||||
+# }
 | 
			
		||||
+
 | 
			
		||||
 endmenu
 | 
			
		||||
-- 
 | 
			
		||||
2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,273 @@
 | 
			
		||||
From mboxrd@z Thu Jan  1 00:00:00 1970
 | 
			
		||||
Return-Path: <linux-kernel-owner@kernel.org>
 | 
			
		||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
 | 
			
		||||
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
 | 
			
		||||
X-Spam-Level: 
 | 
			
		||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
 | 
			
		||||
	DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
 | 
			
		||||
	INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
 | 
			
		||||
	USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
 | 
			
		||||
	version=3.4.0
 | 
			
		||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
 | 
			
		||||
	by smtp.lore.kernel.org (Postfix) with ESMTP id 10B58C433ED
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:48 +0000 (UTC)
 | 
			
		||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
 | 
			
		||||
	by mail.kernel.org (Postfix) with ESMTP id E99D16108C
 | 
			
		||||
	for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:47 +0000 (UTC)
 | 
			
		||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
 | 
			
		||||
        id S231251AbhETG4H (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:56:07 -0400
 | 
			
		||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37970 "EHLO
 | 
			
		||||
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
 | 
			
		||||
        with ESMTP id S231152AbhETGzq (ORCPT
 | 
			
		||||
        <rfc822;linux-kernel@vger.kernel.org>);
 | 
			
		||||
        Thu, 20 May 2021 02:55:46 -0400
 | 
			
		||||
Received: from mail-qt1-x84a.google.com (mail-qt1-x84a.google.com [IPv6:2607:f8b0:4864:20::84a])
 | 
			
		||||
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C8635C06175F
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:23 -0700 (PDT)
 | 
			
		||||
Received: by mail-qt1-x84a.google.com with SMTP id x9-20020ac84a090000b0290203194f1f86so3499707qtq.13
 | 
			
		||||
        for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:23 -0700 (PDT)
 | 
			
		||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=google.com; s=20161025;
 | 
			
		||||
        h=date:in-reply-to:message-id:mime-version:references:subject:from:to
 | 
			
		||||
         :cc;
 | 
			
		||||
        bh=tmnYUMpAe2KoFw1JK5DEOLa6QKjWz+/jEuUps2TjE0M=;
 | 
			
		||||
        b=DnoKJgXGcZrakGIsy2wdggTSzr8gNr5Cga30A6c8a6Hf9x2dffeKxupvvvPjuu1gFH
 | 
			
		||||
         aGdEv0BQdUdQtd0c3PTB1yYrqJsJcPp5S6L8/JeU1mBsAkTgRAJC+WwYC2oJaN+K/+rh
 | 
			
		||||
         m7SHkphIH6F6L72NTt2b96CmRop8AS7h70mGFoqBtxgJZEEG0JjTr93/mLmeGl1DrblN
 | 
			
		||||
         ViY8g/jh939e21AJjULOIlpeBbxplek6u+fXKVxsYdCV2JKDsA0LwaCxMlx08fCc/j9n
 | 
			
		||||
         pt2cBRltMZSTctDaJlkHWcEOuGP8bGJA/JzG0MeUfva0r9KcYGAVy5zcvXU4Mkz8AXA/
 | 
			
		||||
         v3JQ==
 | 
			
		||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
 | 
			
		||||
        d=1e100.net; s=20161025;
 | 
			
		||||
        h=x-gm-message-state:date:in-reply-to:message-id:mime-version
 | 
			
		||||
         :references:subject:from:to:cc;
 | 
			
		||||
        bh=tmnYUMpAe2KoFw1JK5DEOLa6QKjWz+/jEuUps2TjE0M=;
 | 
			
		||||
        b=eSQlkp99GhbOJbbfWHaqWXYyj8f2uV+mVQE23pf6QSUOoTukthTWydqV3fgiwXFIDZ
 | 
			
		||||
         SDohHvXcyn6N5BbFXVm6CtNfXtb315OJsMSEplLbhXduGrLKjsp7Zfpa0MW/pBEJOfNH
 | 
			
		||||
         /go5cnOUxmpFFo2+nAoIm8Xug3YYddsalK9BH0YMXpESvTCgOPpHU8wev9wLTU4zDG2s
 | 
			
		||||
         NSpyxsj72ahnHDJFkm3eEio8zmWqdEa9MYXuSU+QTZ/HJ0OwLb4BOdRwaOx/GeFoWGTu
 | 
			
		||||
         We7/PREhKWf+7tUeB8o2wbzSdGaKSWLh2SOQR0Ydr269QRIv3J4q6e/zT85DkE6XOcYE
 | 
			
		||||
         ziTg==
 | 
			
		||||
X-Gm-Message-State: AOAM530QSwtf5Kda/I2DOnicxCl9Xadwo6H9cConTRpJ9+gh6AP7aLlU
 | 
			
		||||
        qfB+G5KAp3JrJXlL4Qf1Gmbl32aZCU0=
 | 
			
		||||
X-Google-Smtp-Source: ABdhPJyfR8302KuxyD/mIOKCO+jxW1RXoZnlJejF8SLfwvo9YuRoFSL43tZzQ7DdKcZXlLzVckFytBbp+9s=
 | 
			
		||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
 | 
			
		||||
 (user=yuzhao job=sendgmr) by 2002:ad4:5a52:: with SMTP id ej18mr3968319qvb.31.1621493662894;
 | 
			
		||||
 Wed, 19 May 2021 23:54:22 -0700 (PDT)
 | 
			
		||||
Date:   Thu, 20 May 2021 00:53:55 -0600
 | 
			
		||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
Message-Id: <20210520065355.2736558-15-yuzhao@google.com>
 | 
			
		||||
Mime-Version: 1.0
 | 
			
		||||
References: <20210520065355.2736558-1-yuzhao@google.com>
 | 
			
		||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
Subject: [PATCH v3 14/14] mm: multigenerational lru: documentation
 | 
			
		||||
From:   Yu Zhao <yuzhao@google.com>
 | 
			
		||||
To:     linux-mm@kvack.org
 | 
			
		||||
Cc:     Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
 | 
			
		||||
        Andrew Morton <akpm@linux-foundation.org>,
 | 
			
		||||
        Dave Chinner <david@fromorbit.com>,
 | 
			
		||||
        Dave Hansen <dave.hansen@linux.intel.com>,
 | 
			
		||||
        Donald Carr <sirspudd@gmail.com>,
 | 
			
		||||
        Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
 | 
			
		||||
        Johannes Weiner <hannes@cmpxchg.org>,
 | 
			
		||||
        Jonathan Corbet <corbet@lwn.net>,
 | 
			
		||||
        Joonsoo Kim <iamjoonsoo.kim@lge.com>,
 | 
			
		||||
        Konstantin Kharlamov <hi-angel@yandex.ru>,
 | 
			
		||||
        Marcus Seyfarth <m.seyfarth@gmail.com>,
 | 
			
		||||
        Matthew Wilcox <willy@infradead.org>,
 | 
			
		||||
        Mel Gorman <mgorman@suse.de>,
 | 
			
		||||
        Miaohe Lin <linmiaohe@huawei.com>,
 | 
			
		||||
        Michael Larabel <michael@michaellarabel.com>,
 | 
			
		||||
        Michal Hocko <mhocko@suse.com>,
 | 
			
		||||
        Michel Lespinasse <michel@lespinasse.org>,
 | 
			
		||||
        Rik van Riel <riel@surriel.com>,
 | 
			
		||||
        Roman Gushchin <guro@fb.com>,
 | 
			
		||||
        Tim Chen <tim.c.chen@linux.intel.com>,
 | 
			
		||||
        Vlastimil Babka <vbabka@suse.cz>,
 | 
			
		||||
        Yang Shi <shy828301@gmail.com>,
 | 
			
		||||
        Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
 | 
			
		||||
        linux-kernel@vger.kernel.org, lkp@lists.01.org,
 | 
			
		||||
        page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
 | 
			
		||||
        Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
Content-Type: text/plain; charset="UTF-8"
 | 
			
		||||
Precedence: bulk
 | 
			
		||||
List-ID: <linux-kernel.vger.kernel.org>
 | 
			
		||||
X-Mailing-List: linux-kernel@vger.kernel.org
 | 
			
		||||
List-Archive: <https://lore.kernel.org/lkml/>
 | 
			
		||||
 | 
			
		||||
Add Documentation/vm/multigen_lru.rst.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
 | 
			
		||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
 | 
			
		||||
---
 | 
			
		||||
 Documentation/vm/index.rst        |   1 +
 | 
			
		||||
 Documentation/vm/multigen_lru.rst | 143 ++++++++++++++++++++++++++++++
 | 
			
		||||
 2 files changed, 144 insertions(+)
 | 
			
		||||
 create mode 100644 Documentation/vm/multigen_lru.rst
 | 
			
		||||
 | 
			
		||||
diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
 | 
			
		||||
index eff5fbd492d0..c353b3f55924 100644
 | 
			
		||||
--- a/Documentation/vm/index.rst
 | 
			
		||||
+++ b/Documentation/vm/index.rst
 | 
			
		||||
@@ -17,6 +17,7 @@ various features of the Linux memory management
 | 
			
		||||
 
 | 
			
		||||
    swap_numa
 | 
			
		||||
    zswap
 | 
			
		||||
+   multigen_lru
 | 
			
		||||
 
 | 
			
		||||
 Kernel developers MM documentation
 | 
			
		||||
 ==================================
 | 
			
		||||
diff --git a/Documentation/vm/multigen_lru.rst b/Documentation/vm/multigen_lru.rst
 | 
			
		||||
new file mode 100644
 | 
			
		||||
index 000000000000..a18416ed7e92
 | 
			
		||||
--- /dev/null
 | 
			
		||||
+++ b/Documentation/vm/multigen_lru.rst
 | 
			
		||||
@@ -0,0 +1,143 @@
 | 
			
		||||
+.. SPDX-License-Identifier: GPL-2.0
 | 
			
		||||
+
 | 
			
		||||
+=====================
 | 
			
		||||
+Multigenerational LRU
 | 
			
		||||
+=====================
 | 
			
		||||
+
 | 
			
		||||
+Quick Start
 | 
			
		||||
+===========
 | 
			
		||||
+Build Options
 | 
			
		||||
+-------------
 | 
			
		||||
+:Required: Set ``CONFIG_LRU_GEN=y``.
 | 
			
		||||
+
 | 
			
		||||
+:Optional: Set ``CONFIG_LRU_GEN_ENABLED=y`` to turn the feature on by
 | 
			
		||||
+ default.
 | 
			
		||||
+
 | 
			
		||||
+:Optional: Change ``CONFIG_NR_LRU_GENS`` to a number ``X`` to support
 | 
			
		||||
+ a maximum of ``X`` generations.
 | 
			
		||||
+
 | 
			
		||||
+:Optional: Change ``CONFIG_TIERS_PER_GEN`` to a number ``Y`` to
 | 
			
		||||
+ support a maximum of ``Y`` tiers per generation.
 | 
			
		||||
+
 | 
			
		||||
+Runtime Options
 | 
			
		||||
+---------------
 | 
			
		||||
+:Required: Write ``1`` to ``/sys/kernel/mm/lru_gen/enable`` if the
 | 
			
		||||
+ feature was not turned on by default.
 | 
			
		||||
+
 | 
			
		||||
+:Optional: Change ``/sys/kernel/mm/lru_gen/spread`` to a number ``N``
 | 
			
		||||
+ to spread pages out across ``N+1`` generations. ``N`` should be less
 | 
			
		||||
+ than ``X``. Larger values make the background aging more aggressive.
 | 
			
		||||
+
 | 
			
		||||
+:Optional: Read ``/sys/kernel/debug/lru_gen`` to verify the feature.
 | 
			
		||||
+ This file has the following output:
 | 
			
		||||
+
 | 
			
		||||
+::
 | 
			
		||||
+
 | 
			
		||||
+  memcg  memcg_id  memcg_path
 | 
			
		||||
+    node  node_id
 | 
			
		||||
+      min_gen  birth_time  anon_size  file_size
 | 
			
		||||
+      ...
 | 
			
		||||
+      max_gen  birth_time  anon_size  file_size
 | 
			
		||||
+
 | 
			
		||||
+Given a memcg and a node, ``min_gen`` is the oldest generation
 | 
			
		||||
+(number) and ``max_gen`` is the youngest. Birth time is in
 | 
			
		||||
+milliseconds. The sizes of anon and file types are in pages.
 | 
			
		||||
+
 | 
			
		||||
+Recipes
 | 
			
		||||
+-------
 | 
			
		||||
+:Android on ARMv8.1+: ``X=4``, ``Y=3`` and ``N=0``.
 | 
			
		||||
+
 | 
			
		||||
+:Android on pre-ARMv8.1 CPUs: Not recommended due to the lack of
 | 
			
		||||
+ ``ARM64_HW_AFDBM``.
 | 
			
		||||
+
 | 
			
		||||
+:Laptops and workstations running Chrome on x86_64: Use the default
 | 
			
		||||
+ values.
 | 
			
		||||
+
 | 
			
		||||
+:Working set estimation: Write ``+ memcg_id node_id gen [swappiness]``
 | 
			
		||||
+ to ``/sys/kernel/debug/lru_gen`` to account referenced pages to
 | 
			
		||||
+ generation ``max_gen`` and create the next generation ``max_gen+1``.
 | 
			
		||||
+ ``gen`` should be equal to ``max_gen``. A swap file and a non-zero
 | 
			
		||||
+ ``swappiness`` are required to scan anon type. If swapping is not
 | 
			
		||||
+ desired, set ``vm.swappiness`` to ``0``.
 | 
			
		||||
+
 | 
			
		||||
+:Proactive reclaim: Write ``- memcg_id node_id gen [swappiness]
 | 
			
		||||
+ [nr_to_reclaim]`` to ``/sys/kernel/debug/lru_gen`` to evict
 | 
			
		||||
+ generations less than or equal to ``gen``. ``gen`` should be less
 | 
			
		||||
+ than ``max_gen-1`` as ``max_gen`` and ``max_gen-1`` are active
 | 
			
		||||
+ generations and therefore protected from the eviction. Use
 | 
			
		||||
+ ``nr_to_reclaim`` to limit the number of pages to evict. Multiple
 | 
			
		||||
+ command lines are supported, so does concatenation with delimiters
 | 
			
		||||
+ ``,`` and ``;``.
 | 
			
		||||
+
 | 
			
		||||
+Framework
 | 
			
		||||
+=========
 | 
			
		||||
+For each ``lruvec``, evictable pages are divided into multiple
 | 
			
		||||
+generations. The youngest generation number is stored in ``max_seq``
 | 
			
		||||
+for both anon and file types as they are aged on an equal footing. The
 | 
			
		||||
+oldest generation numbers are stored in ``min_seq[2]`` separately for
 | 
			
		||||
+anon and file types as clean file pages can be evicted regardless of
 | 
			
		||||
+swap and write-back constraints. These three variables are
 | 
			
		||||
+monotonically increasing. Generation numbers are truncated into
 | 
			
		||||
+``order_base_2(CONFIG_NR_LRU_GENS+1)`` bits in order to fit into
 | 
			
		||||
+``page->flags``. The sliding window technique is used to prevent
 | 
			
		||||
+truncated generation numbers from overlapping. Each truncated
 | 
			
		||||
+generation number is an index to an array of per-type and per-zone
 | 
			
		||||
+lists. Evictable pages are added to the per-zone lists indexed by
 | 
			
		||||
+``max_seq`` or ``min_seq[2]`` (modulo ``CONFIG_NR_LRU_GENS``),
 | 
			
		||||
+depending on their types.
 | 
			
		||||
+
 | 
			
		||||
+Each generation is then divided into multiple tiers. Tiers represent
 | 
			
		||||
+levels of usage from file descriptors only. Pages accessed N times via
 | 
			
		||||
+file descriptors belong to tier order_base_2(N). Each generation
 | 
			
		||||
+contains at most CONFIG_TIERS_PER_GEN tiers, and they require
 | 
			
		||||
+additional CONFIG_TIERS_PER_GEN-2 bits in page->flags. In contrast to
 | 
			
		||||
+moving across generations which requires the lru lock for the list
 | 
			
		||||
+operations, moving across tiers only involves an atomic operation on
 | 
			
		||||
+``page->flags`` and therefore has a negligible cost. A feedback loop
 | 
			
		||||
+modeled after the PID controller monitors the refault rates across all
 | 
			
		||||
+tiers and decides when to activate pages from which tiers in the
 | 
			
		||||
+reclaim path.
 | 
			
		||||
+
 | 
			
		||||
+The framework comprises two conceptually independent components: the
 | 
			
		||||
+aging and the eviction, which can be invoked separately from user
 | 
			
		||||
+space for the purpose of working set estimation and proactive reclaim.
 | 
			
		||||
+
 | 
			
		||||
+Aging
 | 
			
		||||
+-----
 | 
			
		||||
+The aging produces young generations. Given an ``lruvec``, the aging
 | 
			
		||||
+scans page tables for referenced pages of this ``lruvec``. Upon
 | 
			
		||||
+finding one, the aging updates its generation number to ``max_seq``.
 | 
			
		||||
+After each round of scan, the aging increments ``max_seq``.
 | 
			
		||||
+
 | 
			
		||||
+The aging maintains either a system-wide ``mm_struct`` list or
 | 
			
		||||
+per-memcg ``mm_struct`` lists, and it only scans page tables of
 | 
			
		||||
+processes that have been scheduled since the last scan.
 | 
			
		||||
+
 | 
			
		||||
+The aging is due when both of ``min_seq[2]`` reaches ``max_seq-1``,
 | 
			
		||||
+assuming both anon and file types are reclaimable.
 | 
			
		||||
+
 | 
			
		||||
+Eviction
 | 
			
		||||
+--------
 | 
			
		||||
+The eviction consumes old generations. Given an ``lruvec``, the
 | 
			
		||||
+eviction scans the pages on the per-zone lists indexed by either of
 | 
			
		||||
+``min_seq[2]``. It first tries to select a type based on the values of
 | 
			
		||||
+``min_seq[2]``. When anon and file types are both available from the
 | 
			
		||||
+same generation, it selects the one that has a lower refault rate.
 | 
			
		||||
+
 | 
			
		||||
+During a scan, the eviction sorts pages according to their new
 | 
			
		||||
+generation numbers, if the aging has found them referenced. It also
 | 
			
		||||
+moves pages from the tiers that have higher refault rates than tier 0
 | 
			
		||||
+to the next generation.
 | 
			
		||||
+
 | 
			
		||||
+When it finds all the per-zone lists of a selected type are empty, the
 | 
			
		||||
+eviction increments ``min_seq[2]`` indexed by this selected type.
 | 
			
		||||
+
 | 
			
		||||
+To-do List
 | 
			
		||||
+==========
 | 
			
		||||
+KVM Optimization
 | 
			
		||||
+----------------
 | 
			
		||||
+Support shadow page table scanning.
 | 
			
		||||
+
 | 
			
		||||
+NUMA Optimization
 | 
			
		||||
+-----------------
 | 
			
		||||
+Optimize page table scan for NUMA.
 | 
			
		||||
-- 
 | 
			
		||||
2.31.1.751.gd2f1c929bd-goog
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										6531
									
								
								sys-kernel/pinephone-sources/files/config-5.13.5
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6531
									
								
								sys-kernel/pinephone-sources/files/config-5.13.5
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -0,0 +1,9 @@
 | 
			
		||||
post_upgrade() {
 | 
			
		||||
  if findmnt --fstab -uno SOURCE /boot &>/dev/null && ! mountpoint -q /boot; then
 | 
			
		||||
    echo "WARNING: /boot appears to be a separate partition but is not mounted."
 | 
			
		||||
  fi
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
post_remove() {
 | 
			
		||||
  rm -f boot/initramfs-linux.img
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										11
									
								
								sys-kernel/pinephone-sources/files/linux.preset
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								sys-kernel/pinephone-sources/files/linux.preset
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,11 @@
 | 
			
		||||
# mkinitcpio preset file for the '%PKGBASE%' package
 | 
			
		||||
 | 
			
		||||
ALL_config="/etc/mkinitcpio.conf"
 | 
			
		||||
ALL_kver="%KERNVER%"
 | 
			
		||||
 | 
			
		||||
PRESETS=('default')
 | 
			
		||||
 | 
			
		||||
#default_config="/etc/mkinitcpio.conf"
 | 
			
		||||
default_image="/boot/initramfs-linux.img"
 | 
			
		||||
#default_options=""
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,409 @@
 | 
			
		||||
From f062022f2a2781d6b8ca63c460b0e72ebac30870 Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Martijn Braam <martijn@brixit.nl>
 | 
			
		||||
Date: Mon, 28 Sep 2020 14:26:11 +0200
 | 
			
		||||
Subject: [PATCH] media: ov5640: Implement autofocus
 | 
			
		||||
 | 
			
		||||
The autofocus functionality needs a firmware blob loaded into the
 | 
			
		||||
internal microcontroller.
 | 
			
		||||
 | 
			
		||||
V4L2 doesn't have an api to control all autofocus functionality, but
 | 
			
		||||
this at least makes it possible to focus on the center of the sensor.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Martijn Braam <martijn@brixit.nl>
 | 
			
		||||
---
 | 
			
		||||
 drivers/media/i2c/ov5640.c | 254 +++++++++++++++++++++++++++++++++++++
 | 
			
		||||
 1 file changed, 254 insertions(+)
 | 
			
		||||
 | 
			
		||||
diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c
 | 
			
		||||
index df0a507c211f..08a5304c0e95 100644
 | 
			
		||||
--- a/drivers/media/i2c/ov5640.c
 | 
			
		||||
+++ b/drivers/media/i2c/ov5640.c
 | 
			
		||||
@@ -9,6 +9,7 @@
 | 
			
		||||
 #include <linux/clkdev.h>
 | 
			
		||||
 #include <linux/ctype.h>
 | 
			
		||||
 #include <linux/delay.h>
 | 
			
		||||
+#include <linux/firmware.h>
 | 
			
		||||
 #include <linux/device.h>
 | 
			
		||||
 #include <linux/gpio/consumer.h>
 | 
			
		||||
 #include <linux/i2c.h>
 | 
			
		||||
@@ -31,7 +32,11 @@
 | 
			
		||||
 
 | 
			
		||||
 #define OV5640_DEFAULT_SLAVE_ID 0x3c
 | 
			
		||||
 
 | 
			
		||||
+#define OV5640_REG_SYS_RESET00		0x3000
 | 
			
		||||
+#define OV5640_REG_SYS_RESET01		0x3001
 | 
			
		||||
 #define OV5640_REG_SYS_RESET02		0x3002
 | 
			
		||||
+#define OV5640_REG_SYS_CLOCK_ENABLE00	0x3004
 | 
			
		||||
+#define OV5640_REG_SYS_CLOCK_ENABLE01	0x3005
 | 
			
		||||
 #define OV5640_REG_SYS_CLOCK_ENABLE02	0x3006
 | 
			
		||||
 #define OV5640_REG_SYS_CTRL0		0x3008
 | 
			
		||||
 #define OV5640_REG_SYS_CTRL0_SW_PWDN	0x42
 | 
			
		||||
@@ -41,6 +46,14 @@
 | 
			
		||||
 #define OV5640_REG_PAD_OUTPUT_ENABLE01	0x3017
 | 
			
		||||
 #define OV5640_REG_PAD_OUTPUT_ENABLE02	0x3018
 | 
			
		||||
 #define OV5640_REG_PAD_OUTPUT00		0x3019
 | 
			
		||||
+#define OV5640_REG_FW_CMD_MAIN		0x3022
 | 
			
		||||
+#define OV5640_REG_FW_CMD_ACK		0x3023
 | 
			
		||||
+#define OV5640_REG_FW_CMD_PARA0		0x3024
 | 
			
		||||
+#define OV5640_REG_FW_CMD_PARA1		0x3025
 | 
			
		||||
+#define OV5640_REG_FW_CMD_PARA2		0x3026
 | 
			
		||||
+#define OV5640_REG_FW_CMD_PARA3		0x3027
 | 
			
		||||
+#define OV5640_REG_FW_CMD_PARA4		0x3028
 | 
			
		||||
+#define OV5640_REG_FW_STATUS		0x3029
 | 
			
		||||
 #define OV5640_REG_SYSTEM_CONTROL1	0x302e
 | 
			
		||||
 #define OV5640_REG_SC_PLL_CTRL0		0x3034
 | 
			
		||||
 #define OV5640_REG_SC_PLL_CTRL1		0x3035
 | 
			
		||||
@@ -59,6 +72,7 @@
 | 
			
		||||
 #define OV5640_REG_AEC_PK_MANUAL	0x3503
 | 
			
		||||
 #define OV5640_REG_AEC_PK_REAL_GAIN	0x350a
 | 
			
		||||
 #define OV5640_REG_AEC_PK_VTS		0x350c
 | 
			
		||||
+#define OV5640_REG_VCM_CONTROL4		0x3606
 | 
			
		||||
 #define OV5640_REG_TIMING_DVPHO		0x3808
 | 
			
		||||
 #define OV5640_REG_TIMING_DVPVO		0x380a
 | 
			
		||||
 #define OV5640_REG_TIMING_HTS		0x380c
 | 
			
		||||
@@ -95,6 +109,20 @@
 | 
			
		||||
 #define OV5640_REG_SDE_CTRL4		0x5584
 | 
			
		||||
 #define OV5640_REG_SDE_CTRL5		0x5585
 | 
			
		||||
 #define OV5640_REG_AVG_READOUT		0x56a1
 | 
			
		||||
+#define OV5640_REG_FIRMWARE_BASE	0x8000
 | 
			
		||||
+
 | 
			
		||||
+#define OV5640_FW_STATUS_S_FIRMWARE	0x7f
 | 
			
		||||
+#define OV5640_FW_STATUS_S_STARTUP	0x7e
 | 
			
		||||
+#define OV5640_FW_STATUS_S_IDLE		0x70
 | 
			
		||||
+#define OV5640_FW_STATUS_S_FOCUSING	0x00
 | 
			
		||||
+#define OV5640_FW_STATUS_S_FOCUSED	0x10
 | 
			
		||||
+
 | 
			
		||||
+#define OV5640_FW_CMD_TRIGGER_FOCUS	0x03
 | 
			
		||||
+#define OV5640_FW_CMD_CONTINUOUS_FOCUS	0x04
 | 
			
		||||
+#define OV5640_FW_CMD_GET_FOCUS_RESULT	0x07
 | 
			
		||||
+#define OV5640_FW_CMD_RELEASE_FOCUS	0x08
 | 
			
		||||
+#define OV5640_FW_CMD_ZONE_CONFIG	0x12
 | 
			
		||||
+#define OV5640_FW_CMD_DEFAULT_ZONES	0x80
 | 
			
		||||
 
 | 
			
		||||
 enum ov5640_mode_id {
 | 
			
		||||
 	OV5640_MODE_QCIF_176_144 = 0,
 | 
			
		||||
@@ -218,6 +246,12 @@ struct ov5640_ctrls {
 | 
			
		||||
 		struct v4l2_ctrl *auto_gain;
 | 
			
		||||
 		struct v4l2_ctrl *gain;
 | 
			
		||||
 	};
 | 
			
		||||
+	struct {
 | 
			
		||||
+		struct v4l2_ctrl *focus_auto;
 | 
			
		||||
+		struct v4l2_ctrl *af_start;
 | 
			
		||||
+		struct v4l2_ctrl *af_stop;
 | 
			
		||||
+		struct v4l2_ctrl *af_status;
 | 
			
		||||
+	};
 | 
			
		||||
 	struct v4l2_ctrl *brightness;
 | 
			
		||||
 	struct v4l2_ctrl *light_freq;
 | 
			
		||||
 	struct v4l2_ctrl *saturation;
 | 
			
		||||
@@ -261,6 +295,8 @@ struct ov5640_dev {
 | 
			
		||||
 
 | 
			
		||||
 	bool pending_mode_change;
 | 
			
		||||
 	bool streaming;
 | 
			
		||||
+
 | 
			
		||||
+	bool af_initialized;
 | 
			
		||||
 };
 | 
			
		||||
 
 | 
			
		||||
 static inline struct ov5640_dev *to_ov5640_dev(struct v4l2_subdev *sd)
 | 
			
		||||
@@ -1967,6 +2003,118 @@ static void ov5640_reset(struct ov5640_dev *sensor)
 | 
			
		||||
 	usleep_range(20000, 25000);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+static int ov5640_copy_fw_to_device(struct ov5640_dev *sensor,
 | 
			
		||||
+					const struct firmware *fw)
 | 
			
		||||
+{
 | 
			
		||||
+	struct i2c_client *client = sensor->i2c_client;
 | 
			
		||||
+	const u8 *data = (const u8 *)fw->data;
 | 
			
		||||
+	u8 fw_status;
 | 
			
		||||
+	int i;
 | 
			
		||||
+	int ret;
 | 
			
		||||
+
 | 
			
		||||
+	// Putting MCU in reset state
 | 
			
		||||
+	ret = ov5640_write_reg(sensor, OV5640_REG_SYS_RESET00, 0x20);
 | 
			
		||||
+	if (ret)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+
 | 
			
		||||
+	// Write firmware
 | 
			
		||||
+	for (i = 0; i < fw->size / sizeof(u8); i++)
 | 
			
		||||
+		ov5640_write_reg(sensor,
 | 
			
		||||
+				OV5640_REG_FIRMWARE_BASE + i, 
 | 
			
		||||
+				data[i]);
 | 
			
		||||
+
 | 
			
		||||
+	// Reset MCU state
 | 
			
		||||
+	ov5640_write_reg(sensor, OV5640_REG_FW_CMD_MAIN, 0x00);
 | 
			
		||||
+	ov5640_write_reg(sensor, OV5640_REG_FW_CMD_ACK, 0x00);
 | 
			
		||||
+	ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA0, 0x00);
 | 
			
		||||
+	ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA1, 0x00);
 | 
			
		||||
+	ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA2, 0x00);
 | 
			
		||||
+	ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA3, 0x00);
 | 
			
		||||
+	ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA4, 0x00);
 | 
			
		||||
+	ov5640_write_reg(sensor, OV5640_REG_FW_STATUS, 0x7f);
 | 
			
		||||
+
 | 
			
		||||
+	// Start AF MCU
 | 
			
		||||
+	ret = ov5640_write_reg(sensor, OV5640_REG_SYS_RESET00, 0x00);
 | 
			
		||||
+	if (ret)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+
 | 
			
		||||
+	dev_info(&client->dev, "firmware upload success\n");
 | 
			
		||||
+
 | 
			
		||||
+	// Wait for firmware to be ready
 | 
			
		||||
+	for (i = 0; i < 5; i++) {
 | 
			
		||||
+		ret = ov5640_read_reg(sensor, OV5640_REG_FW_STATUS, &fw_status);
 | 
			
		||||
+		if (fw_status == OV5640_FW_STATUS_S_IDLE) {
 | 
			
		||||
+			dev_info(&client->dev, "fw started after %d ms\n", i * 50);
 | 
			
		||||
+			return ret;
 | 
			
		||||
+		}
 | 
			
		||||
+		msleep(50);
 | 
			
		||||
+	}
 | 
			
		||||
+	dev_err(&client->dev, "uploaded firmware didn't start, got to 0x%x, retrying...\n", fw_status);
 | 
			
		||||
+
 | 
			
		||||
+	// Putting MCU in reset state
 | 
			
		||||
+	ret = ov5640_write_reg(sensor, OV5640_REG_SYS_RESET00, 0x20);
 | 
			
		||||
+	if (ret)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+	// Start AF MCU
 | 
			
		||||
+	ret = ov5640_write_reg(sensor, OV5640_REG_SYS_RESET00, 0x00);
 | 
			
		||||
+	if (ret)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+	// Wait for firmware to be ready
 | 
			
		||||
+	for (i = 0; i < 5; i++) {
 | 
			
		||||
+		ret = ov5640_read_reg(sensor, OV5640_REG_FW_STATUS, &fw_status);
 | 
			
		||||
+		if (fw_status == OV5640_FW_STATUS_S_IDLE) {
 | 
			
		||||
+			dev_info(&client->dev, "fw started after %d ms\n", i * 50);
 | 
			
		||||
+			return ret;
 | 
			
		||||
+		}
 | 
			
		||||
+		msleep(50);
 | 
			
		||||
+	}
 | 
			
		||||
+	dev_err(&client->dev, "uploaded firmware didn't start, got to 0x%x\n", fw_status);
 | 
			
		||||
+	return -ETIMEDOUT;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+static int ov5640_af_init(struct ov5640_dev *sensor)
 | 
			
		||||
+{
 | 
			
		||||
+	struct i2c_client *client = sensor->i2c_client;
 | 
			
		||||
+	const char* fwname = "ov5640_af.bin";
 | 
			
		||||
+	const struct firmware *fw;
 | 
			
		||||
+	int ret;
 | 
			
		||||
+
 | 
			
		||||
+	if (sensor->af_initialized) {
 | 
			
		||||
+		return 0;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	if (firmware_request_nowarn(&fw, fwname, &client->dev) == 0) {
 | 
			
		||||
+		ret = ov5640_copy_fw_to_device(sensor, fw);
 | 
			
		||||
+		if (ret == 0)
 | 
			
		||||
+			sensor->af_initialized = 1;
 | 
			
		||||
+	} else {
 | 
			
		||||
+		dev_warn(&client->dev, "%s: no autofocus firmware available (%s)\n",
 | 
			
		||||
+			__func__, fwname);
 | 
			
		||||
+		ret = -1;
 | 
			
		||||
+	}
 | 
			
		||||
+	release_firmware(fw);
 | 
			
		||||
+
 | 
			
		||||
+	if (ret)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+
 | 
			
		||||
+	// Enable AF systems
 | 
			
		||||
+	ret = ov5640_mod_reg(sensor, OV5640_REG_SYS_CLOCK_ENABLE00,
 | 
			
		||||
+			     (BIT(6) | BIT(5)), (BIT(6) | BIT(5)));
 | 
			
		||||
+	if (ret)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+	ret = ov5640_mod_reg(sensor, OV5640_REG_SYS_CLOCK_ENABLE01,
 | 
			
		||||
+			     BIT(6), BIT(6));
 | 
			
		||||
+	if (ret)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+
 | 
			
		||||
+	// Set lens focus driver on
 | 
			
		||||
+	ov5640_write_reg(sensor, OV5640_REG_VCM_CONTROL4, 0x3f);
 | 
			
		||||
+	if (ret)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+
 | 
			
		||||
+	return ret;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 static int ov5640_set_power_on(struct ov5640_dev *sensor)
 | 
			
		||||
 {
 | 
			
		||||
 	struct i2c_client *client = sensor->i2c_client;
 | 
			
		||||
@@ -1988,6 +2117,8 @@ static int ov5640_set_power_on(struct ov5640_dev *sensor)
 | 
			
		||||
 		goto xclk_off;
 | 
			
		||||
 	}
 | 
			
		||||
 
 | 
			
		||||
+	sensor->af_initialized = 0;
 | 
			
		||||
+
 | 
			
		||||
 	ov5640_reset(sensor);
 | 
			
		||||
 	ov5640_power(sensor, true);
 | 
			
		||||
 
 | 
			
		||||
@@ -2416,6 +2547,35 @@ static int ov5640_set_framefmt(struct ov5640_dev *sensor,
 | 
			
		||||
 			      is_jpeg ? (BIT(5) | BIT(3)) : 0);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+static int ov5640_fw_command(struct ov5640_dev *sensor, int command)
 | 
			
		||||
+{
 | 
			
		||||
+	u8 fw_ack;
 | 
			
		||||
+	int i;
 | 
			
		||||
+	int ret;
 | 
			
		||||
+
 | 
			
		||||
+	ret = ov5640_write_reg(sensor, OV5640_REG_FW_CMD_ACK, 0x01);
 | 
			
		||||
+	if(ret)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+	
 | 
			
		||||
+	ret = ov5640_write_reg(sensor, OV5640_REG_FW_CMD_MAIN, command);
 | 
			
		||||
+	if(ret)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+
 | 
			
		||||
+	for (i = 0; i < 100; i++) {
 | 
			
		||||
+		ret = ov5640_read_reg(sensor, OV5640_REG_FW_CMD_ACK, &fw_ack);
 | 
			
		||||
+		if (ret)
 | 
			
		||||
+			return ret;
 | 
			
		||||
+
 | 
			
		||||
+		if (fw_ack == 0){
 | 
			
		||||
+			return ret;
 | 
			
		||||
+		}
 | 
			
		||||
+
 | 
			
		||||
+		msleep(50);
 | 
			
		||||
+	}
 | 
			
		||||
+	return -ETIMEDOUT;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
+
 | 
			
		||||
 /*
 | 
			
		||||
  * Sensor Controls.
 | 
			
		||||
  */
 | 
			
		||||
@@ -2532,6 +2692,41 @@ static int ov5640_set_ctrl_exposure(struct ov5640_dev *sensor,
 | 
			
		||||
 	return ret;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+static int ov5640_set_ctrl_focus(struct ov5640_dev *sensor, int command)
 | 
			
		||||
+{
 | 
			
		||||
+	struct i2c_client *client = sensor->i2c_client;
 | 
			
		||||
+	int ret;
 | 
			
		||||
+
 | 
			
		||||
+	ret = ov5640_af_init(sensor);
 | 
			
		||||
+	if (ret) {
 | 
			
		||||
+		dev_err(&client->dev, "%s: no autofocus firmware loaded\n",
 | 
			
		||||
+			__func__);
 | 
			
		||||
+		return 0;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	if (command == OV5640_FW_CMD_RELEASE_FOCUS) {
 | 
			
		||||
+		dev_dbg(&client->dev, "%s: Releasing autofocus\n",
 | 
			
		||||
+			__func__);
 | 
			
		||||
+		return ov5640_fw_command(sensor, OV5640_FW_CMD_RELEASE_FOCUS);
 | 
			
		||||
+	}
 | 
			
		||||
+	
 | 
			
		||||
+	// Restart zone config
 | 
			
		||||
+	ret = ov5640_fw_command(sensor, OV5640_FW_CMD_ZONE_CONFIG);
 | 
			
		||||
+	if (ret)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+
 | 
			
		||||
+	// Set default focus zones
 | 
			
		||||
+	ret = ov5640_fw_command(sensor, OV5640_FW_CMD_DEFAULT_ZONES);
 | 
			
		||||
+	if (ret)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+
 | 
			
		||||
+	dev_dbg(&client->dev, "%s: Triggering autofocus\n",
 | 
			
		||||
+		__func__);
 | 
			
		||||
+
 | 
			
		||||
+	// Start focussing
 | 
			
		||||
+	return ov5640_fw_command(sensor, command);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 static int ov5640_set_ctrl_gain(struct ov5640_dev *sensor, bool auto_gain)
 | 
			
		||||
 {
 | 
			
		||||
 	struct ov5640_ctrls *ctrls = &sensor->ctrls;
 | 
			
		||||
@@ -2638,6 +2833,32 @@ static int ov5640_set_ctrl_vflip(struct ov5640_dev *sensor, int value)
 | 
			
		||||
 			      (BIT(2) | BIT(1)) : 0);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+static int ov5640_get_af_status(struct ov5640_dev *sensor)
 | 
			
		||||
+{
 | 
			
		||||
+	u8 fw_status;
 | 
			
		||||
+	int ret;
 | 
			
		||||
+
 | 
			
		||||
+	ret = ov5640_read_reg(sensor, OV5640_REG_FW_STATUS, &fw_status);
 | 
			
		||||
+	if (ret)
 | 
			
		||||
+		return ret;
 | 
			
		||||
+
 | 
			
		||||
+	switch (fw_status) {
 | 
			
		||||
+		case OV5640_FW_STATUS_S_FIRMWARE:
 | 
			
		||||
+		case OV5640_FW_STATUS_S_STARTUP:
 | 
			
		||||
+			return V4L2_AUTO_FOCUS_STATUS_FAILED;
 | 
			
		||||
+			break;
 | 
			
		||||
+		case OV5640_FW_STATUS_S_IDLE:
 | 
			
		||||
+			return V4L2_AUTO_FOCUS_STATUS_IDLE;
 | 
			
		||||
+			break;
 | 
			
		||||
+		case OV5640_FW_STATUS_S_FOCUSED:
 | 
			
		||||
+			return V4L2_AUTO_FOCUS_STATUS_REACHED;
 | 
			
		||||
+			break;
 | 
			
		||||
+		default:
 | 
			
		||||
+			return V4L2_AUTO_FOCUS_STATUS_BUSY;
 | 
			
		||||
+			break;
 | 
			
		||||
+	}
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 static int ov5640_g_volatile_ctrl(struct v4l2_ctrl *ctrl)
 | 
			
		||||
 {
 | 
			
		||||
 	struct v4l2_subdev *sd = ctrl_to_sd(ctrl);
 | 
			
		||||
@@ -2659,6 +2880,12 @@ static int ov5640_g_volatile_ctrl(struct v4l2_ctrl *ctrl)
 | 
			
		||||
 			return val;
 | 
			
		||||
 		sensor->ctrls.exposure->val = val;
 | 
			
		||||
 		break;
 | 
			
		||||
+	case V4L2_CID_FOCUS_AUTO:
 | 
			
		||||
+		val = ov5640_get_af_status(sensor);
 | 
			
		||||
+		if (val < 0)
 | 
			
		||||
+			return val;
 | 
			
		||||
+		sensor->ctrls.af_status->val = val;
 | 
			
		||||
+		break;
 | 
			
		||||
 	}
 | 
			
		||||
 
 | 
			
		||||
 	return 0;
 | 
			
		||||
@@ -2690,6 +2917,18 @@ static int ov5640_s_ctrl(struct v4l2_ctrl *ctrl)
 | 
			
		||||
 	case V4L2_CID_AUTO_WHITE_BALANCE:
 | 
			
		||||
 		ret = ov5640_set_ctrl_white_balance(sensor, ctrl->val);
 | 
			
		||||
 		break;
 | 
			
		||||
+	case V4L2_CID_FOCUS_AUTO:
 | 
			
		||||
+		if (ctrl->val)
 | 
			
		||||
+			ret = ov5640_set_ctrl_focus(sensor, OV5640_FW_CMD_CONTINUOUS_FOCUS);
 | 
			
		||||
+		else
 | 
			
		||||
+			ret = ov5640_set_ctrl_focus(sensor, OV5640_FW_CMD_RELEASE_FOCUS);
 | 
			
		||||
+		break;
 | 
			
		||||
+	case V4L2_CID_AUTO_FOCUS_START:
 | 
			
		||||
+		ret = ov5640_set_ctrl_focus(sensor, OV5640_FW_CMD_TRIGGER_FOCUS);
 | 
			
		||||
+		break;
 | 
			
		||||
+	case V4L2_CID_AUTO_FOCUS_STOP:
 | 
			
		||||
+		ret = ov5640_set_ctrl_focus(sensor, OV5640_FW_CMD_RELEASE_FOCUS);
 | 
			
		||||
+		break;
 | 
			
		||||
 	case V4L2_CID_HUE:
 | 
			
		||||
 		ret = ov5640_set_ctrl_hue(sensor, ctrl->val);
 | 
			
		||||
 		break;
 | 
			
		||||
@@ -2762,6 +3001,20 @@ static int ov5640_init_controls(struct ov5640_dev *sensor)
 | 
			
		||||
 	ctrls->gain = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_GAIN,
 | 
			
		||||
 					0, 1023, 1, 0);
 | 
			
		||||
 
 | 
			
		||||
+	/* Autofocus */
 | 
			
		||||
+	ctrls->focus_auto = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_FOCUS_AUTO,
 | 
			
		||||
+					    0, 1, 1, 0);
 | 
			
		||||
+	ctrls->af_start = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_AUTO_FOCUS_START,
 | 
			
		||||
+					    0, 1, 1, 0);
 | 
			
		||||
+	ctrls->af_stop = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_AUTO_FOCUS_STOP,
 | 
			
		||||
+					   0, 1, 1, 0);
 | 
			
		||||
+	ctrls->af_status = v4l2_ctrl_new_std(hdl, ops,
 | 
			
		||||
+					     V4L2_CID_AUTO_FOCUS_STATUS, 0,
 | 
			
		||||
+					     (V4L2_AUTO_FOCUS_STATUS_BUSY |
 | 
			
		||||
+					      V4L2_AUTO_FOCUS_STATUS_REACHED |
 | 
			
		||||
+					      V4L2_AUTO_FOCUS_STATUS_FAILED),
 | 
			
		||||
+					     0, V4L2_AUTO_FOCUS_STATUS_IDLE);
 | 
			
		||||
+
 | 
			
		||||
 	ctrls->saturation = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_SATURATION,
 | 
			
		||||
 					      0, 255, 1, 64);
 | 
			
		||||
 	ctrls->hue = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_HUE,
 | 
			
		||||
@@ -2795,6 +3048,7 @@ static int ov5640_init_controls(struct ov5640_dev *sensor)
 | 
			
		||||
 	v4l2_ctrl_auto_cluster(3, &ctrls->auto_wb, 0, false);
 | 
			
		||||
 	v4l2_ctrl_auto_cluster(2, &ctrls->auto_gain, 0, true);
 | 
			
		||||
 	v4l2_ctrl_auto_cluster(2, &ctrls->auto_exp, 1, true);
 | 
			
		||||
+	v4l2_ctrl_cluster(4, &ctrls->focus_auto);
 | 
			
		||||
 
 | 
			
		||||
 	sensor->sd.ctrl_handler = hdl;
 | 
			
		||||
 	return 0;
 | 
			
		||||
-- 
 | 
			
		||||
2.25.4
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										12
									
								
								sys-kernel/pinephone-sources/files/panic-led-5.12.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								sys-kernel/pinephone-sources/files/panic-led-5.12.patch
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,12 @@
 | 
			
		||||
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
 | 
			
		||||
index 1c555456b..05fab5d79 100644
 | 
			
		||||
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
 | 
			
		||||
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
 | 
			
		||||
@@ -78,6 +78,7 @@ green {
 | 
			
		||||
 		};
 | 
			
		||||
 
 | 
			
		||||
 		led-2 {
 | 
			
		||||
+			linux,default-trigger = "panic";
 | 
			
		||||
 			function = LED_FUNCTION_INDICATOR;
 | 
			
		||||
 			color = <LED_COLOR_ID_RED>;
 | 
			
		||||
 			gpios = <&pio 3 19 GPIO_ACTIVE_HIGH>; /* PD19 */
 | 
			
		||||
@@ -0,0 +1,21 @@
 | 
			
		||||
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
 | 
			
		||||
index a72c2ec8c..b3a7bef13 100644
 | 
			
		||||
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
 | 
			
		||||
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
 | 
			
		||||
@@ -227,7 +227,15 @@ &i2c0_pins {
 | 
			
		||||
 &i2c1 {
 | 
			
		||||
 	status = "okay";
 | 
			
		||||
 
 | 
			
		||||
-	/* TODO: add Bochs BMA223 accelerometer here */
 | 
			
		||||
+	bma223@18 {
 | 
			
		||||
+		compatible = "bosch,bma223", "bosch,bma222e";
 | 
			
		||||
+		reg = <0x18>;
 | 
			
		||||
+		interrupt-parent = <&pio>;
 | 
			
		||||
+		interrupts = <7 5 IRQ_TYPE_LEVEL_HIGH>; /* PH5 */
 | 
			
		||||
+		mount-matrix = "0", "-1", "0",
 | 
			
		||||
+			       "-1", "0", "0",
 | 
			
		||||
+			       "0", "0", "-1";
 | 
			
		||||
+	};
 | 
			
		||||
 };
 | 
			
		||||
 
 | 
			
		||||
 &lradc {
 | 
			
		||||
							
								
								
									
										44
									
								
								sys-kernel/pinephone-sources/files/pinetab-bluetooth.patch
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								sys-kernel/pinephone-sources/files/pinetab-bluetooth.patch
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,44 @@
 | 
			
		||||
From 330d05da1b6e8118c9c4655f0b234cf32a2f1ce4 Mon Sep 17 00:00:00 2001
 | 
			
		||||
From: Icenowy Zheng <icenowy@aosc.io>
 | 
			
		||||
Date: Sun, 14 Apr 2019 23:46:47 +0800
 | 
			
		||||
Subject: [PATCH] arm64: allwinner: a64: pinetab: enable RTL8723CS bluetooth
 | 
			
		||||
 | 
			
		||||
PineTab has a RTL8723CS Wi-Fi/BT combo chip on board, the bluetooth part
 | 
			
		||||
of it communicates with A64 via UART, and the power of it is controlled
 | 
			
		||||
with some GPIO at PL bank.
 | 
			
		||||
 | 
			
		||||
Enable the bluetooth in the device tree.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
 | 
			
		||||
---
 | 
			
		||||
 .../boot/dts/allwinner/sun50i-a64-pinetab.dts      | 14 ++++++++++++++
 | 
			
		||||
 1 file changed, 14 insertions(+)
 | 
			
		||||
 | 
			
		||||
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
 | 
			
		||||
index 84d6e8cb2b88..e8b823875740 100644
 | 
			
		||||
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
 | 
			
		||||
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
 | 
			
		||||
@@ -447,6 +447,20 @@ &uart0 {
 | 
			
		||||
 	status = "okay";
 | 
			
		||||
 };
 | 
			
		||||
 
 | 
			
		||||
+&uart1 {
 | 
			
		||||
+	pinctrl-names = "default";
 | 
			
		||||
+	pinctrl-0 = <&uart1_pins>, <&uart1_rts_cts_pins>;
 | 
			
		||||
+	status = "okay";
 | 
			
		||||
+
 | 
			
		||||
+	bluetooth {
 | 
			
		||||
+		compatible = "realtek,rtl8723cs-bt";
 | 
			
		||||
+		reset-gpios = <&r_pio 0 4 GPIO_ACTIVE_LOW>; /* PL4 */
 | 
			
		||||
+		device-wake-gpios = <&r_pio 0 5 GPIO_ACTIVE_LOW>; /* PL5 */
 | 
			
		||||
+		host-wake-gpios = <&r_pio 0 6 GPIO_ACTIVE_HIGH>; /* PL6 */
 | 
			
		||||
+		firmware-postfix = "pinebook";
 | 
			
		||||
+	};
 | 
			
		||||
+};
 | 
			
		||||
+
 | 
			
		||||
 &usb_otg {
 | 
			
		||||
 	dr_mode = "otg";
 | 
			
		||||
 	status = "okay";
 | 
			
		||||
-- 
 | 
			
		||||
GitLab
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user