Added kernel
This commit is contained in:
parent
ae0a534e13
commit
5a003fc1a8
3
sys-kernel/pinephone-sources/Manifest
Normal file
3
sys-kernel/pinephone-sources/Manifest
Normal file
@ -0,0 +1,3 @@
|
||||
DIST all-5.13.5.patch 15071574 BLAKE2B f0b44888b216a60bb12a920a170ffb8ee705e357b82b0cacd58551e2d0e257c0f4419c34976263dc062335bb37f4b3a7418f3d9674e601fd8adda88bacad97d6 SHA512 046f42a5c8fe6477cdda82f47a07093ea51cf26b231b1c58230885954b7ecab9faa9eb72ac3c0cb1603dd6ca2b5b0d76421de6d2c3c05a0bee3ca6e080bfa084
|
||||
DIST linux-5.13.tar.xz 119297284 BLAKE2B 9c4c12e2394dec064adff51f7ccdf389192eb27ba7906db5eda543afe3d04afca6b9ea0848a057571bf2534eeb98e1e3a67734deff82c0d3731be205ad995668 SHA512 a8edf97e9d38a49f1be2bde1e29ad96274bb2c6f7e8a2bebaa1161dd4df9cabcbaec4ff644c45bee94f86ae47725087d6deed0cd954209cec717621d137db85e
|
||||
DIST patch-5.13.5.xz 473120 BLAKE2B a0dd9f3f972a16de87f0d2d8daa7f5d35b27314d22597a28f471cdbe6cedfa7d4bf69e41504d6a9b9d4c1f085146604394747771185dd0a09276cfd92820b4a8 SHA512 1e4eb575775ccbc2e88b34b902a75562e49d6dfb4699dadd5b41fff9db8c2bc994d946d1e60f6320f48ef233aa721d3725582d4ec57458f2293da9a85806c7b1
|
@ -0,0 +1,29 @@
|
||||
From c965cb8a004c6cc370b4bf297c61fe5ac8ab0583 Mon Sep 17 00:00:00 2001
|
||||
From: Martijn Braam <martijn@brixit.nl>
|
||||
Date: Wed, 6 Jan 2021 03:11:17 +0100
|
||||
Subject: [PATCH] arm64: dts: sunxi: Add mmc aliases for the PineTab
|
||||
|
||||
The order for the mmc devices changed in the kernel without this change.
|
||||
|
||||
Signed-off-by: Martijn Braam <martijn@brixit.nl>
|
||||
---
|
||||
arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
|
||||
index a87790df94b3..1cf3c3a9ad7f 100644
|
||||
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
|
||||
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
|
||||
@@ -18,6 +18,9 @@ / {
|
||||
compatible = "pine64,pinetab", "allwinner,sun50i-a64";
|
||||
|
||||
aliases {
|
||||
+ mmc0 = &mmc0;
|
||||
+ mmc1 = &mmc1;
|
||||
+ mmc2 = &mmc2;
|
||||
serial0 = &uart0;
|
||||
ethernet0 = &rtl8723cs;
|
||||
};
|
||||
--
|
||||
2.29.2
|
||||
|
511
sys-kernel/pinephone-sources/files/0012-bootsplash.patch
Normal file
511
sys-kernel/pinephone-sources/files/0012-bootsplash.patch
Normal file
@ -0,0 +1,511 @@
|
||||
diff --git a/MAINTAINERS b/MAINTAINERS
|
||||
index 7ffac272434e..ddff07cd794c 100644
|
||||
--- a/MAINTAINERS
|
||||
+++ b/MAINTAINERS
|
||||
@@ -2715,6 +2715,7 @@ F: drivers/video/fbdev/core/bootsplash*.*
|
||||
F: drivers/video/fbdev/core/dummycon.c
|
||||
F: include/linux/bootsplash.h
|
||||
F: include/uapi/linux/bootsplash_file.h
|
||||
+F: tools/bootsplash/*
|
||||
|
||||
BPF (Safe dynamic programs and tools)
|
||||
M: Alexei Starovoitov <ast@kernel.org>
|
||||
diff --git a/tools/bootsplash/.gitignore b/tools/bootsplash/.gitignore
|
||||
new file mode 100644
|
||||
index 000000000000..091b99a17567
|
||||
--- /dev/null
|
||||
+++ b/tools/bootsplash/.gitignore
|
||||
@@ -0,0 +1 @@
|
||||
+bootsplash-packer
|
||||
diff --git a/tools/bootsplash/Makefile b/tools/bootsplash/Makefile
|
||||
new file mode 100644
|
||||
index 000000000000..0ad8e8a84942
|
||||
--- /dev/null
|
||||
+++ b/tools/bootsplash/Makefile
|
||||
@@ -0,0 +1,9 @@
|
||||
+CC := $(CROSS_COMPILE)gcc
|
||||
+CFLAGS := -I../../usr/include
|
||||
+
|
||||
+PROGS := bootsplash-packer
|
||||
+
|
||||
+all: $(PROGS)
|
||||
+
|
||||
+clean:
|
||||
+ rm -fr $(PROGS)
|
||||
diff --git a/tools/bootsplash/bootsplash-packer.c b/tools/bootsplash/bootsplash-packer.c
|
||||
new file mode 100644
|
||||
index 000000000000..ffb6a8b69885
|
||||
--- /dev/null
|
||||
+++ b/tools/bootsplash/bootsplash-packer.c
|
||||
@@ -0,0 +1,471 @@
|
||||
+/*
|
||||
+ * Kernel based bootsplash.
|
||||
+ *
|
||||
+ * (Splash file packer tool)
|
||||
+ *
|
||||
+ * Authors:
|
||||
+ * Max Staudt <mstaudt@suse.de>
|
||||
+ *
|
||||
+ * SPDX-License-Identifier: GPL-2.0
|
||||
+ */
|
||||
+
|
||||
+#include <endian.h>
|
||||
+#include <getopt.h>
|
||||
+#include <stdint.h>
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <string.h>
|
||||
+
|
||||
+#include <linux/bootsplash_file.h>
|
||||
+
|
||||
+
|
||||
+static void print_help(char *progname)
|
||||
+{
|
||||
+ printf("Usage: %s [OPTIONS] outfile\n", progname);
|
||||
+ printf("\n"
|
||||
+ "Options, executed in order given:\n"
|
||||
+ " -h, --help Print this help message\n"
|
||||
+ "\n"
|
||||
+ " --bg_red <u8> Background color (red part)\n"
|
||||
+ " --bg_green <u8> Background color (green part)\n"
|
||||
+ " --bg_blue <u8> Background color (blue part)\n"
|
||||
+ " --bg_reserved <u8> (do not use)\n"
|
||||
+ " --frame_ms <u16> Minimum milliseconds between animation steps\n"
|
||||
+ "\n"
|
||||
+ " --picture Start describing the next picture\n"
|
||||
+ " --pic_width <u16> Picture width in pixels\n"
|
||||
+ " --pic_height <u16> Picture height in pixels\n"
|
||||
+ " --pic_position <u8> Coarse picture placement:\n"
|
||||
+ " 0x00 - Top left\n"
|
||||
+ " 0x01 - Top\n"
|
||||
+ " 0x02 - Top right\n"
|
||||
+ " 0x03 - Right\n"
|
||||
+ " 0x04 - Bottom right\n"
|
||||
+ " 0x05 - Bottom\n"
|
||||
+ " 0x06 - Bottom left\n"
|
||||
+ " 0x07 - Left\n"
|
||||
+ "\n"
|
||||
+ " Flags:\n"
|
||||
+ " 0x10 - Calculate offset from corner towards center,\n"
|
||||
+ " rather than from center towards corner\n"
|
||||
+ " --pic_position_offset <u16> Distance from base position in pixels\n"
|
||||
+ " --pic_anim_type <u8> Animation type:\n"
|
||||
+ " 0 - None\n"
|
||||
+ " 1 - Forward loop\n"
|
||||
+ " --pic_anim_loop <u8> Loop point for animation\n"
|
||||
+ "\n"
|
||||
+ " --blob <filename> Include next data stream\n"
|
||||
+ " --blob_type <u16> Type of data\n"
|
||||
+ " --blob_picture_id <u8> Picture to associate this blob with, starting at 0\n"
|
||||
+ " (default: number of last --picture)\n"
|
||||
+ "\n");
|
||||
+ printf("This tool will write %s files.\n\n",
|
||||
+#if __BYTE_ORDER == __BIG_ENDIAN
|
||||
+ "Big Endian (BE)");
|
||||
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
+ "Little Endian (LE)");
|
||||
+#else
|
||||
+#error
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+
|
||||
+struct blob_entry {
|
||||
+ struct blob_entry *next;
|
||||
+
|
||||
+ char *fn;
|
||||
+
|
||||
+ struct splash_blob_header header;
|
||||
+};
|
||||
+
|
||||
+
|
||||
+static void dump_file_header(struct splash_file_header *h)
|
||||
+{
|
||||
+ printf(" --- File header ---\n");
|
||||
+ printf("\n");
|
||||
+ printf(" version: %5u\n", h->version);
|
||||
+ printf("\n");
|
||||
+ printf(" bg_red: %5u\n", h->bg_red);
|
||||
+ printf(" bg_green: %5u\n", h->bg_green);
|
||||
+ printf(" bg_blue: %5u\n", h->bg_blue);
|
||||
+ printf(" bg_reserved: %5u\n", h->bg_reserved);
|
||||
+ printf("\n");
|
||||
+ printf(" num_blobs: %5u\n", h->num_blobs);
|
||||
+ printf(" num_pics: %5u\n", h->num_pics);
|
||||
+ printf("\n");
|
||||
+ printf(" frame_ms: %5u\n", h->frame_ms);
|
||||
+ printf("\n");
|
||||
+}
|
||||
+
|
||||
+static void dump_pic_header(struct splash_pic_header *ph)
|
||||
+{
|
||||
+ printf(" --- Picture header ---\n");
|
||||
+ printf("\n");
|
||||
+ printf(" width: %5u\n", ph->width);
|
||||
+ printf(" height: %5u\n", ph->height);
|
||||
+ printf("\n");
|
||||
+ printf(" num_blobs: %5u\n", ph->num_blobs);
|
||||
+ printf("\n");
|
||||
+ printf(" position: %0x3x\n", ph->position);
|
||||
+ printf(" position_offset: %5u\n", ph->position_offset);
|
||||
+ printf("\n");
|
||||
+ printf(" anim_type: %5u\n", ph->anim_type);
|
||||
+ printf(" anim_loop: %5u\n", ph->anim_loop);
|
||||
+ printf("\n");
|
||||
+}
|
||||
+
|
||||
+static void dump_blob(struct blob_entry *b)
|
||||
+{
|
||||
+ printf(" --- Blob header ---\n");
|
||||
+ printf("\n");
|
||||
+ printf(" length: %7u\n", b->header.length);
|
||||
+ printf(" type: %7u\n", b->header.type);
|
||||
+ printf("\n");
|
||||
+ printf(" picture_id: %7u\n", b->header.picture_id);
|
||||
+ printf("\n");
|
||||
+}
|
||||
+
|
||||
+
|
||||
+#define OPT_MAX(var, max) \
|
||||
+ do { \
|
||||
+ if ((var) > max) { \
|
||||
+ fprintf(stderr, "--%s: Invalid value\n", \
|
||||
+ long_options[option_index].name); \
|
||||
+ break; \
|
||||
+ } \
|
||||
+ } while (0)
|
||||
+
|
||||
+static struct option long_options[] = {
|
||||
+ {"help", 0, 0, 'h'},
|
||||
+ {"bg_red", 1, 0, 10001},
|
||||
+ {"bg_green", 1, 0, 10002},
|
||||
+ {"bg_blue", 1, 0, 10003},
|
||||
+ {"bg_reserved", 1, 0, 10004},
|
||||
+ {"frame_ms", 1, 0, 10005},
|
||||
+ {"picture", 0, 0, 20000},
|
||||
+ {"pic_width", 1, 0, 20001},
|
||||
+ {"pic_height", 1, 0, 20002},
|
||||
+ {"pic_position", 1, 0, 20003},
|
||||
+ {"pic_position_offset", 1, 0, 20004},
|
||||
+ {"pic_anim_type", 1, 0, 20005},
|
||||
+ {"pic_anim_loop", 1, 0, 20006},
|
||||
+ {"blob", 1, 0, 30000},
|
||||
+ {"blob_type", 1, 0, 30001},
|
||||
+ {"blob_picture_id", 1, 0, 30002},
|
||||
+ {NULL, 0, NULL, 0}
|
||||
+};
|
||||
+
|
||||
+
|
||||
+int main(int argc, char **argv)
|
||||
+{
|
||||
+ FILE *of;
|
||||
+ char *ofn;
|
||||
+ int c;
|
||||
+ int option_index = 0;
|
||||
+
|
||||
+ unsigned long ul;
|
||||
+ struct splash_file_header fh = {};
|
||||
+ struct splash_pic_header ph[255];
|
||||
+ struct blob_entry *blob_first = NULL;
|
||||
+ struct blob_entry *blob_last = NULL;
|
||||
+ struct blob_entry *blob_cur = NULL;
|
||||
+
|
||||
+ if (argc < 2) {
|
||||
+ print_help(argv[0]);
|
||||
+ return EXIT_FAILURE;
|
||||
+ }
|
||||
+
|
||||
+
|
||||
+ /* Parse and and execute user commands */
|
||||
+ while ((c = getopt_long(argc, argv, "h",
|
||||
+ long_options, &option_index)) != -1) {
|
||||
+ switch (c) {
|
||||
+ case 10001: /* bg_red */
|
||||
+ ul = strtoul(optarg, NULL, 0);
|
||||
+ OPT_MAX(ul, 255);
|
||||
+ fh.bg_red = ul;
|
||||
+ break;
|
||||
+ case 10002: /* bg_green */
|
||||
+ ul = strtoul(optarg, NULL, 0);
|
||||
+ OPT_MAX(ul, 255);
|
||||
+ fh.bg_green = ul;
|
||||
+ break;
|
||||
+ case 10003: /* bg_blue */
|
||||
+ ul = strtoul(optarg, NULL, 0);
|
||||
+ OPT_MAX(ul, 255);
|
||||
+ fh.bg_blue = ul;
|
||||
+ break;
|
||||
+ case 10004: /* bg_reserved */
|
||||
+ ul = strtoul(optarg, NULL, 0);
|
||||
+ OPT_MAX(ul, 255);
|
||||
+ fh.bg_reserved = ul;
|
||||
+ break;
|
||||
+ case 10005: /* frame_ms */
|
||||
+ ul = strtoul(optarg, NULL, 0);
|
||||
+ OPT_MAX(ul, 65535);
|
||||
+ fh.frame_ms = ul;
|
||||
+ break;
|
||||
+
|
||||
+
|
||||
+ case 20000: /* picture */
|
||||
+ if (fh.num_pics >= 255) {
|
||||
+ fprintf(stderr, "--%s: Picture array full\n",
|
||||
+ long_options[option_index].name);
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ fh.num_pics++;
|
||||
+ break;
|
||||
+
|
||||
+ case 20001: /* pic_width */
|
||||
+ ul = strtoul(optarg, NULL, 0);
|
||||
+ OPT_MAX(ul, 65535);
|
||||
+ ph[fh.num_pics - 1].width = ul;
|
||||
+ break;
|
||||
+
|
||||
+ case 20002: /* pic_height */
|
||||
+ ul = strtoul(optarg, NULL, 0);
|
||||
+ OPT_MAX(ul, 65535);
|
||||
+ ph[fh.num_pics - 1].height = ul;
|
||||
+ break;
|
||||
+
|
||||
+ case 20003: /* pic_position */
|
||||
+ ul = strtoul(optarg, NULL, 0);
|
||||
+ OPT_MAX(ul, 255);
|
||||
+ ph[fh.num_pics - 1].position = ul;
|
||||
+ break;
|
||||
+
|
||||
+ case 20004: /* pic_position_offset */
|
||||
+ ul = strtoul(optarg, NULL, 0);
|
||||
+ OPT_MAX(ul, 255);
|
||||
+ ph[fh.num_pics - 1].position_offset = ul;
|
||||
+ break;
|
||||
+
|
||||
+ case 20005: /* pic_anim_type */
|
||||
+ ul = strtoul(optarg, NULL, 0);
|
||||
+ OPT_MAX(ul, 255);
|
||||
+ ph[fh.num_pics - 1].anim_type = ul;
|
||||
+ break;
|
||||
+
|
||||
+ case 20006: /* pic_anim_loop */
|
||||
+ ul = strtoul(optarg, NULL, 0);
|
||||
+ OPT_MAX(ul, 255);
|
||||
+ ph[fh.num_pics - 1].anim_loop = ul;
|
||||
+ break;
|
||||
+
|
||||
+
|
||||
+ case 30000: /* blob */
|
||||
+ if (fh.num_blobs >= 65535) {
|
||||
+ fprintf(stderr, "--%s: Blob array full\n",
|
||||
+ long_options[option_index].name);
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ blob_cur = calloc(1, sizeof(struct blob_entry));
|
||||
+ if (!blob_cur) {
|
||||
+ fprintf(stderr, "--%s: Out of memory\n",
|
||||
+ long_options[option_index].name);
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ blob_cur->fn = optarg;
|
||||
+ if (fh.num_pics)
|
||||
+ blob_cur->header.picture_id = fh.num_pics - 1;
|
||||
+
|
||||
+ if (!blob_first)
|
||||
+ blob_first = blob_cur;
|
||||
+ if (blob_last)
|
||||
+ blob_last->next = blob_cur;
|
||||
+ blob_last = blob_cur;
|
||||
+ fh.num_blobs++;
|
||||
+ break;
|
||||
+
|
||||
+ case 30001: /* blob_type */
|
||||
+ if (!blob_cur) {
|
||||
+ fprintf(stderr, "--%s: No blob selected\n",
|
||||
+ long_options[option_index].name);
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ ul = strtoul(optarg, NULL, 0);
|
||||
+ OPT_MAX(ul, 255);
|
||||
+ blob_cur->header.type = ul;
|
||||
+ break;
|
||||
+
|
||||
+ case 30002: /* blob_picture_id */
|
||||
+ if (!blob_cur) {
|
||||
+ fprintf(stderr, "--%s: No blob selected\n",
|
||||
+ long_options[option_index].name);
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ ul = strtoul(optarg, NULL, 0);
|
||||
+ OPT_MAX(ul, 255);
|
||||
+ blob_cur->header.picture_id = ul;
|
||||
+ break;
|
||||
+
|
||||
+
|
||||
+
|
||||
+ case 'h':
|
||||
+ case '?':
|
||||
+ default:
|
||||
+ print_help(argv[0]);
|
||||
+ goto EXIT;
|
||||
+ } /* switch (c) */
|
||||
+ } /* while ((c = getopt_long(...)) != -1) */
|
||||
+
|
||||
+ /* Consume and drop lone arguments */
|
||||
+ while (optind < argc) {
|
||||
+ ofn = argv[optind];
|
||||
+ optind++;
|
||||
+ }
|
||||
+
|
||||
+
|
||||
+ /* Read file lengths */
|
||||
+ for (blob_cur = blob_first; blob_cur; blob_cur = blob_cur->next) {
|
||||
+ FILE *f;
|
||||
+ long pos;
|
||||
+ int i;
|
||||
+
|
||||
+ if (!blob_cur->fn)
|
||||
+ continue;
|
||||
+
|
||||
+ f = fopen(blob_cur->fn, "rb");
|
||||
+ if (!f)
|
||||
+ goto ERR_FILE_LEN;
|
||||
+
|
||||
+ if (fseek(f, 0, SEEK_END))
|
||||
+ goto ERR_FILE_LEN;
|
||||
+
|
||||
+ pos = ftell(f);
|
||||
+ if (pos < 0 || pos > (1 << 30))
|
||||
+ goto ERR_FILE_LEN;
|
||||
+
|
||||
+ blob_cur->header.length = pos;
|
||||
+
|
||||
+ fclose(f);
|
||||
+ continue;
|
||||
+
|
||||
+ERR_FILE_LEN:
|
||||
+ fprintf(stderr, "Error getting file length (or too long): %s\n",
|
||||
+ blob_cur->fn);
|
||||
+ if (f)
|
||||
+ fclose(f);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+
|
||||
+ /* Set magic headers */
|
||||
+#if __BYTE_ORDER == __BIG_ENDIAN
|
||||
+ memcpy(&fh.id[0], BOOTSPLASH_MAGIC_BE, 16);
|
||||
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
+ memcpy(&fh.id[0], BOOTSPLASH_MAGIC_LE, 16);
|
||||
+#else
|
||||
+#error
|
||||
+#endif
|
||||
+ fh.version = BOOTSPLASH_VERSION;
|
||||
+
|
||||
+ /* Set blob counts */
|
||||
+ for (blob_cur = blob_first; blob_cur; blob_cur = blob_cur->next) {
|
||||
+ if (blob_cur->header.picture_id < fh.num_pics)
|
||||
+ ph[blob_cur->header.picture_id].num_blobs++;
|
||||
+ }
|
||||
+
|
||||
+
|
||||
+ /* Dump structs */
|
||||
+ dump_file_header(&fh);
|
||||
+
|
||||
+ for (ul = 0; ul < fh.num_pics; ul++)
|
||||
+ dump_pic_header(&ph[ul]);
|
||||
+
|
||||
+ for (blob_cur = blob_first; blob_cur; blob_cur = blob_cur->next)
|
||||
+ dump_blob(blob_cur);
|
||||
+
|
||||
+
|
||||
+ /* Write to file */
|
||||
+ printf("Writing splash to file: %s\n", ofn);
|
||||
+ of = fopen(ofn, "wb");
|
||||
+ if (!of)
|
||||
+ goto ERR_WRITING;
|
||||
+
|
||||
+ if (fwrite(&fh, sizeof(struct splash_file_header), 1, of) != 1)
|
||||
+ goto ERR_WRITING;
|
||||
+
|
||||
+ for (ul = 0; ul < fh.num_pics; ul++) {
|
||||
+ if (fwrite(&ph[ul], sizeof(struct splash_pic_header), 1, of)
|
||||
+ != 1)
|
||||
+ goto ERR_WRITING;
|
||||
+ }
|
||||
+
|
||||
+ blob_cur = blob_first;
|
||||
+ while (blob_cur) {
|
||||
+ struct blob_entry *blob_old = blob_cur;
|
||||
+ FILE *f;
|
||||
+ char *buf[256];
|
||||
+ uint32_t left;
|
||||
+
|
||||
+ if (fwrite(&blob_cur->header,
|
||||
+ sizeof(struct splash_blob_header), 1, of) != 1)
|
||||
+ goto ERR_WRITING;
|
||||
+
|
||||
+ if (!blob_cur->header.length || !blob_cur->fn)
|
||||
+ continue;
|
||||
+
|
||||
+ f = fopen(blob_cur->fn, "rb");
|
||||
+ if (!f)
|
||||
+ goto ERR_FILE_COPY;
|
||||
+
|
||||
+ left = blob_cur->header.length;
|
||||
+ while (left >= sizeof(buf)) {
|
||||
+ if (fread(buf, sizeof(buf), 1, f) != 1)
|
||||
+ goto ERR_FILE_COPY;
|
||||
+ if (fwrite(buf, sizeof(buf), 1, of) != 1)
|
||||
+ goto ERR_FILE_COPY;
|
||||
+ left -= sizeof(buf);
|
||||
+ }
|
||||
+ if (left) {
|
||||
+ if (fread(buf, left, 1, f) != 1)
|
||||
+ goto ERR_FILE_COPY;
|
||||
+ if (fwrite(buf, left, 1, of) != 1)
|
||||
+ goto ERR_FILE_COPY;
|
||||
+ }
|
||||
+
|
||||
+ /* Pad data stream to 16 bytes */
|
||||
+ if (left % 16) {
|
||||
+ if (fwrite("\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0",
|
||||
+ 16 - (left % 16), 1, of) != 1)
|
||||
+ goto ERR_FILE_COPY;
|
||||
+ }
|
||||
+
|
||||
+ fclose(f);
|
||||
+ blob_cur = blob_cur->next;
|
||||
+ free(blob_old);
|
||||
+ continue;
|
||||
+
|
||||
+ERR_FILE_COPY:
|
||||
+ if (f)
|
||||
+ fclose(f);
|
||||
+ goto ERR_WRITING;
|
||||
+ }
|
||||
+
|
||||
+ fclose(of);
|
||||
+
|
||||
+EXIT:
|
||||
+ return EXIT_SUCCESS;
|
||||
+
|
||||
+
|
||||
+ERR_WRITING:
|
||||
+ fprintf(stderr, "Error writing splash.\n");
|
||||
+ fprintf(stderr, "The output file is probably corrupt.\n");
|
||||
+ if (of)
|
||||
+ fclose(of);
|
||||
+
|
||||
+ while (blob_cur) {
|
||||
+ struct blob_entry *blob_old = blob_cur;
|
||||
+
|
||||
+ blob_cur = blob_cur->next;
|
||||
+ free(blob_old);
|
||||
+ }
|
||||
+
|
||||
+ return EXIT_FAILURE;
|
||||
+}
|
@ -0,0 +1,12 @@
|
||||
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
|
||||
index 03b83aa91277..dfc6c7d1b0e7 100644
|
||||
--- a/drivers/bluetooth/btusb.c
|
||||
+++ b/drivers/bluetooth/btusb.c
|
||||
@@ -4070,6 +4070,7 @@ static int btusb_setup_qca(struct hci_dev *hdev)
|
||||
}
|
||||
if (!info) {
|
||||
bt_dev_err(hdev, "don't support firmware rome 0x%x", ver_rom);
|
||||
+ if (ver_rom & ~0xffffU) return 0;
|
||||
return -ENODEV;
|
||||
}
|
||||
|
@ -0,0 +1,49 @@
|
||||
From cb408fb65a08bd45543724c1e9b8f38ae1bebc4a Mon Sep 17 00:00:00 2001
|
||||
From: Arnaud Ferraris <arnaud.ferraris@gmail.com>
|
||||
Date: Tue, 4 Aug 2020 15:12:59 +0200
|
||||
Subject: [PATCH 177/183] leds-gpio: make max_brightness configurable
|
||||
|
||||
---
|
||||
drivers/leds/leds-gpio.c | 4 ++++
|
||||
include/linux/leds.h | 3 ++-
|
||||
2 files changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
|
||||
index 93f5b1b60fde..f8483fab1164 100644
|
||||
--- a/drivers/leds/leds-gpio.c
|
||||
+++ b/drivers/leds/leds-gpio.c
|
||||
@@ -108,6 +108,8 @@ static int create_gpio_led(const struct gpio_led *template,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
+ led_dat->cdev.max_brightness = template->max_brightness;
|
||||
+
|
||||
if (template->name) {
|
||||
led_dat->cdev.name = template->name;
|
||||
ret = devm_led_classdev_register(parent, &led_dat->cdev);
|
||||
@@ -177,6 +179,8 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev)
|
||||
if (fwnode_property_present(child, "panic-indicator"))
|
||||
led.panic_indicator = 1;
|
||||
|
||||
+ fwnode_property_read_u32(child, "max-brightness", &led.max_brightness);
|
||||
+
|
||||
ret = create_gpio_led(&led, led_dat, dev, child, NULL);
|
||||
if (ret < 0) {
|
||||
fwnode_handle_put(child);
|
||||
diff --git a/include/linux/leds.h b/include/linux/leds.h
|
||||
index 6a8d6409c993..99a80092114d 100644
|
||||
--- a/include/linux/leds.h
|
||||
+++ b/include/linux/leds.h
|
||||
@@ -513,7 +513,8 @@ typedef int (*gpio_blink_set_t)(struct gpio_desc *desc, int state,
|
||||
struct gpio_led {
|
||||
const char *name;
|
||||
const char *default_trigger;
|
||||
- unsigned gpio;
|
||||
+ unsigned gpio;
|
||||
+ unsigned max_brightness;
|
||||
unsigned active_low : 1;
|
||||
unsigned retain_state_suspended : 1;
|
||||
unsigned panic_indicator : 1;
|
||||
--
|
||||
2.30.0
|
||||
|
1526
sys-kernel/pinephone-sources/files/5.11.5.patch
Normal file
1526
sys-kernel/pinephone-sources/files/5.11.5.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,391 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Return-Path: <linux-kernel-owner@kernel.org>
|
||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
||||
aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
|
||||
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
|
||||
INCLUDES_PATCH,MAILING_LIST_MULTI,MENTIONS_GIT_HOSTING,SPF_HELO_NONE,SPF_PASS,
|
||||
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
|
||||
version=3.4.0
|
||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
|
||||
by smtp.lore.kernel.org (Postfix) with ESMTP id BA09AC433ED
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:04 +0000 (UTC)
|
||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
|
||||
by mail.kernel.org (Postfix) with ESMTP id 99A326108C
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:04 +0000 (UTC)
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S230359AbhETGzY (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:24 -0400
|
||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37854 "EHLO
|
||||
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S229534AbhETGzX (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:23 -0400
|
||||
Received: from mail-qk1-x74a.google.com (mail-qk1-x74a.google.com [IPv6:2607:f8b0:4864:20::74a])
|
||||
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2DB47C061574
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:01 -0700 (PDT)
|
||||
Received: by mail-qk1-x74a.google.com with SMTP id z2-20020a3765020000b02903a5f51b1c74so684222qkb.7
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:01 -0700 (PDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=google.com; s=20161025;
|
||||
h=date:message-id:mime-version:subject:from:to:cc;
|
||||
bh=Y3hJAMwzbf34YQU8QX5BsSV2xoCmy36DYto5ZLIStJc=;
|
||||
b=r/V1aR1KHSQ2RwrGIEEbdDV0RqV+tdHJLBnCnPMLdI4quvTDua13dKOHpxS2Rc7bc4
|
||||
6ON9rpxOpEhBMPLS8798xqa4jQBTINTCKNlIi3TpaV8t/shwlViCb4Y9bZ4ng8VEsXp3
|
||||
H2s3DQbb47Iio7YrOnBahF4qBDJl2fkHL257Ao4wgzgG/ZCK2oy5dcipOFrEpQqPk5vO
|
||||
hhTC4Zr1DE3XI+Y+uTozfI8CoAtllv6qL31gAWcycyeN72teVQa9ilaeTdglxhCO9DVG
|
||||
BFkiZH+21Eo3M8PRz4OztnGgRtMvbgNnuUWZ68bnZkO4wMyL6mX2520HA9NQNkGSXLnP
|
||||
74Zg==
|
||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=1e100.net; s=20161025;
|
||||
h=x-gm-message-state:date:message-id:mime-version:subject:from:to:cc;
|
||||
bh=Y3hJAMwzbf34YQU8QX5BsSV2xoCmy36DYto5ZLIStJc=;
|
||||
b=h+lPKp7mQ6QF8fW3fzT7HQgoaLvOfkKtGjwFNvWMOi8UMz94CGWpTgC4tsEX0PenoK
|
||||
snCz9kDMIR35YO9Dlhz1Ci/04htNK9p+rnvGn7ri/Oin5fFeyVQ15qh33Bgut5m3SKR2
|
||||
imeFBLkWsXGtFd23XCBmjIcNrqZA0LxhIwoYCbrVWSq5H29Eo6C9ab0gmJ1oY0DCPOL/
|
||||
Fi8M2neMwLN09EebwZONh8AGuP0XiL0oSnAGDZAhaaAimfHrPBMMYCrxpjnaGxPG2hY0
|
||||
gvju/bIag6Ug8urHdAAGWsdLaNIsdrIKWlaL76FjcULVwdAARKQifiMMTwJ2JU5y5jMG
|
||||
OKRg==
|
||||
X-Gm-Message-State: AOAM5322gu+Tvm1pCjTiKdWMNb3cz1Z6+VCfYHkB7vDvNRYItvu08gEA
|
||||
/W/WlY6Lc6/4O5nrreOspbq5n77XobE=
|
||||
X-Google-Smtp-Source: ABdhPJy+4EmI1VvFDhlB3errX+0774OdClFY8nQyFqDe9Pqq8FOdLBnXamEbn+N9M1F/HG6sJ6Mw/n7qw/8=
|
||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
|
||||
(user=yuzhao job=sendgmr) by 2002:a0c:e4cd:: with SMTP id g13mr3727631qvm.34.1621493640278;
|
||||
Wed, 19 May 2021 23:54:00 -0700 (PDT)
|
||||
Date: Thu, 20 May 2021 00:53:41 -0600
|
||||
Message-Id: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
Mime-Version: 1.0
|
||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
|
||||
Subject: [PATCH v3 00/14] Multigenerational LRU Framework
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
To: linux-mm@kvack.org
|
||||
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
|
||||
Andrew Morton <akpm@linux-foundation.org>,
|
||||
Dave Chinner <david@fromorbit.com>,
|
||||
Dave Hansen <dave.hansen@linux.intel.com>,
|
||||
Donald Carr <sirspudd@gmail.com>,
|
||||
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
|
||||
Johannes Weiner <hannes@cmpxchg.org>,
|
||||
Jonathan Corbet <corbet@lwn.net>,
|
||||
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru>,
|
||||
Marcus Seyfarth <m.seyfarth@gmail.com>,
|
||||
Matthew Wilcox <willy@infradead.org>,
|
||||
Mel Gorman <mgorman@suse.de>,
|
||||
Miaohe Lin <linmiaohe@huawei.com>,
|
||||
Michael Larabel <michael@michaellarabel.com>,
|
||||
Michal Hocko <mhocko@suse.com>,
|
||||
Michel Lespinasse <michel@lespinasse.org>,
|
||||
Rik van Riel <riel@surriel.com>,
|
||||
Roman Gushchin <guro@fb.com>,
|
||||
Tim Chen <tim.c.chen@linux.intel.com>,
|
||||
Vlastimil Babka <vbabka@suse.cz>,
|
||||
Yang Shi <shy828301@gmail.com>,
|
||||
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
|
||||
linux-kernel@vger.kernel.org, lkp@lists.01.org,
|
||||
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
List-Archive: <https://lore.kernel.org/lkml/>
|
||||
|
||||
What's new in v3
|
||||
================
|
||||
1) Fixed a bug reported by the Arch Linux kernel team:
|
||||
https://github.com/zen-kernel/zen-kernel/issues/207
|
||||
2) Rebased to v5.13-rc2.
|
||||
|
||||
Highlights from v2
|
||||
==================
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru> reported:
|
||||
My success story: I have Archlinux with 8G RAM + zswap + swap. While
|
||||
developing, I have lots of apps opened such as multiple LSP-servers
|
||||
for different langs, chats, two browsers, etc. Usually, my system
|
||||
gets quickly to a point of SWAP-storms, where I have to kill
|
||||
LSP-servers, restart browsers to free memory, etc, otherwise the
|
||||
system lags heavily and is barely usable.
|
||||
|
||||
1.5 day ago I migrated from 5.11.15 kernel to 5.12 + the LRU
|
||||
patchset, and I started up by opening lots of apps to create memory
|
||||
pressure, and worked for a day like this. Till now I had *not a
|
||||
single SWAP-storm*, and mind you I got 3.4G in SWAP. I was never
|
||||
getting to the point of 3G in SWAP before without a single
|
||||
SWAP-storm.
|
||||
|
||||
TLDR
|
||||
====
|
||||
The current page reclaim is too expensive in terms of CPU usage and
|
||||
often making poor choices about what to evict. We would like to offer
|
||||
an alternative framework that is performant, versatile and
|
||||
straightforward.
|
||||
|
||||
Repo
|
||||
====
|
||||
git fetch https://linux-mm.googlesource.com/page-reclaim refs/changes/53/1253/1
|
||||
|
||||
Problems
|
||||
========
|
||||
Notion of active/inactive
|
||||
-------------------------
|
||||
Data centers need to predict whether a job can successfully land on a
|
||||
machine without actually impacting the existing jobs. The granularity
|
||||
of the active/inactive is too coarse to be useful for job schedulers
|
||||
to make such decisions. In addition, data centers need to monitor
|
||||
their memory utilization for horizontal scaling. The active/inactive
|
||||
cannot give any insight into a pool of machines because aggregating
|
||||
them across multiple machines without a common frame of reference
|
||||
yields no meaningful results.
|
||||
|
||||
Phones and laptops need to make good choices about what to evict,
|
||||
since they are more sensitive to the major faults and the power
|
||||
consumption. Major faults can cause "janks" (slow UI renderings) and
|
||||
negatively impact user experience. The selection between anon and file
|
||||
types has been suboptimal because direct comparisons between them are
|
||||
infeasible based on the notion of active/inactive. On phones and
|
||||
laptops, executable pages are frequently evicted despite the fact that
|
||||
there are many less recently used anon pages. Conversely, on
|
||||
workstations building large projects, anon pages are occasionally
|
||||
swapped out while page cache contains many less recently used pages.
|
||||
|
||||
Fundamentally, the notion of active/inactive has very limited ability
|
||||
to measure temporal locality.
|
||||
|
||||
Incremental scans via rmap
|
||||
--------------------------
|
||||
Each incremental scan picks up at where the last scan left off and
|
||||
stops after it has found a handful of unreferenced pages. For
|
||||
workloads using a large amount of anon memory, incremental scans lose
|
||||
the advantage under sustained memory pressure due to high ratios of
|
||||
the number of scanned pages to the number of reclaimed pages. On top
|
||||
of this, the rmap has complex data structures. And the combined
|
||||
effects typically result in a high amount of CPU usage in the reclaim
|
||||
path.
|
||||
|
||||
Simply put, incremental scans via rmap have no regard for spatial
|
||||
locality.
|
||||
|
||||
Solutions
|
||||
=========
|
||||
Notion of generation numbers
|
||||
----------------------------
|
||||
The notion of generation numbers introduces a temporal dimension. Each
|
||||
generation is a dot on the timeline and it includes all pages that
|
||||
have been referenced since it was created.
|
||||
|
||||
Given an lruvec, scans of anon and file types and selections between
|
||||
them are all based on direct comparisons of generation numbers, which
|
||||
are simple and yet effective.
|
||||
|
||||
A larger number of pages can be spread out across a configurable
|
||||
number of generations, which are associated with timestamps and
|
||||
therefore aggregatable. This is specifically designed for data centers
|
||||
that require working set estimation and proactive reclaim.
|
||||
|
||||
Differential scans via page tables
|
||||
----------------------------------
|
||||
Each differential scan discovers all pages that have been referenced
|
||||
since the last scan. It walks the mm_struct list associated with an
|
||||
lruvec to scan page tables of processes that have been scheduled since
|
||||
the last scan. The cost of each differential scan is roughly
|
||||
proportional to the number of referenced pages it discovers. Page
|
||||
tables usually have good memory locality. The end result is generally
|
||||
a significant reduction in CPU usage, for workloads using a large
|
||||
amount of anon memory.
|
||||
|
||||
For workloads that have extremely sparse page tables, it is still
|
||||
possible to fall back to incremental scans via rmap.
|
||||
|
||||
Framework
|
||||
=========
|
||||
For each lruvec, evictable pages are divided into multiple
|
||||
generations. The youngest generation number is stored in
|
||||
lrugen->max_seq for both anon and file types as they are aged on an
|
||||
equal footing. The oldest generation numbers are stored in
|
||||
lrugen->min_seq[2] separately for anon and file types as clean file
|
||||
pages can be evicted regardless of may_swap or may_writepage. These
|
||||
three variables are monotonically increasing. Generation numbers are
|
||||
truncated into order_base_2(MAX_NR_GENS+1) bits in order to fit into
|
||||
page->flags. The sliding window technique is used to prevent truncated
|
||||
generation numbers from overlapping. Each truncated generation number
|
||||
is an index to
|
||||
lrugen->lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]. Evictable
|
||||
pages are added to the per-zone lists indexed by lrugen->max_seq or
|
||||
lrugen->min_seq[2] (modulo MAX_NR_GENS), depending on their types.
|
||||
|
||||
Each generation is then divided into multiple tiers. Tiers represent
|
||||
levels of usage from file descriptors only. Pages accessed N times via
|
||||
file descriptors belong to tier order_base_2(N). Each generation
|
||||
contains at most MAX_NR_TIERS tiers, and they require additional
|
||||
MAX_NR_TIERS-2 bits in page->flags. In contrast to moving across
|
||||
generations which requires the lru lock for the list operations,
|
||||
moving across tiers only involves an atomic operation on page->flags
|
||||
and therefore has a negligible cost. A feedback loop modeled after the
|
||||
PID controller monitors the refault rates across all tiers and decides
|
||||
when to activate pages from which tiers in the reclaim path.
|
||||
|
||||
The framework comprises two conceptually independent components: the
|
||||
aging and the eviction, which can be invoked separately from user
|
||||
space for the purpose of working set estimation and proactive reclaim.
|
||||
|
||||
Aging
|
||||
-----
|
||||
The aging produces young generations. Given an lruvec, the aging scans
|
||||
page tables for referenced pages of this lruvec. Upon finding one, the
|
||||
aging updates its generation number to max_seq. After each round of
|
||||
scan, the aging increments max_seq.
|
||||
|
||||
The aging maintains either a system-wide mm_struct list or per-memcg
|
||||
mm_struct lists, and it only scans page tables of processes that have
|
||||
been scheduled since the last scan.
|
||||
|
||||
The aging is due when both of min_seq[2] reaches max_seq-1, assuming
|
||||
both anon and file types are reclaimable.
|
||||
|
||||
Eviction
|
||||
--------
|
||||
The eviction consumes old generations. Given an lruvec, the eviction
|
||||
scans the pages on the per-zone lists indexed by either of min_seq[2].
|
||||
It first tries to select a type based on the values of min_seq[2].
|
||||
When anon and file types are both available from the same generation,
|
||||
it selects the one that has a lower refault rate.
|
||||
|
||||
During a scan, the eviction sorts pages according to their new
|
||||
generation numbers, if the aging has found them referenced. It also
|
||||
moves pages from the tiers that have higher refault rates than tier 0
|
||||
to the next generation.
|
||||
|
||||
When it finds all the per-zone lists of a selected type are empty, the
|
||||
eviction increments min_seq[2] indexed by this selected type.
|
||||
|
||||
Use cases
|
||||
=========
|
||||
High anon workloads
|
||||
-------------------
|
||||
Our real-world benchmark that browses popular websites in multiple
|
||||
Chrome tabs demonstrates 51% less CPU usage from kswapd and 52% (full)
|
||||
less PSI.
|
||||
|
||||
Without this patchset, the profile of kswapd looks like:
|
||||
31.03% page_vma_mapped_walk
|
||||
25.59% lzo1x_1_do_compress
|
||||
4.63% do_raw_spin_lock
|
||||
3.89% vma_interval_tree_iter_next
|
||||
3.33% vma_interval_tree_subtree_search
|
||||
|
||||
With this patchset, it looks like:
|
||||
49.36% lzo1x_1_do_compress
|
||||
4.54% page_vma_mapped_walk
|
||||
4.45% memset_erms
|
||||
3.47% walk_pte_range
|
||||
2.88% zram_bvec_rw
|
||||
|
||||
In addition, direct reclaim latency is reduced by 22% at 99th
|
||||
percentile and the number of refaults is reduced by 7%. Both metrics
|
||||
are important to phones and laptops as they are highly correlated to
|
||||
user experience.
|
||||
|
||||
High page cache workloads
|
||||
-------------------------
|
||||
Tiers are specifically designed to improve the performance of page
|
||||
cache under memory pressure. The fio/io_uring benchmark shows 14%
|
||||
increase in IOPS when randomly accessing in buffered I/O mode.
|
||||
|
||||
Without this patchset, the profile of fio/io_uring looks like:
|
||||
Children Self Symbol
|
||||
-----------------------------------
|
||||
12.03% 0.03% __page_cache_alloc
|
||||
6.53% 0.83% shrink_active_list
|
||||
2.53% 0.44% mark_page_accessed
|
||||
|
||||
With this patchset, it looks like:
|
||||
Children Self Symbol
|
||||
-----------------------------------
|
||||
9.45% 0.03% __page_cache_alloc
|
||||
0.52% 0.46% mark_page_accessed
|
||||
|
||||
Working set estimation
|
||||
----------------------
|
||||
User space can invoke the aging by writing "+ memcg_id node_id gen
|
||||
[swappiness]" to /sys/kernel/debug/lru_gen. This debugfs interface
|
||||
also provides the birth time and the size of each generation.
|
||||
|
||||
For example, given a pool of machines, a job scheduler periodically
|
||||
invokes the aging to estimate the working set of each machine. And it
|
||||
ranks the machines based on the sizes of their working sets and
|
||||
selects the most ideal ones to land new jobs.
|
||||
|
||||
Proactive reclaim
|
||||
-----------------
|
||||
User space can invoke the eviction by writing "- memcg_id node_id gen
|
||||
[swappiness] [nr_to_reclaim]" to /sys/kernel/debug/lru_gen. Multiple
|
||||
command lines are supported, so does concatenation with delimiters.
|
||||
|
||||
For example, a job scheduler can invoke the eviction if it anticipates
|
||||
new jobs. The savings from proactive reclaim may provide certain SLA
|
||||
when new jobs actually land.
|
||||
|
||||
Yu Zhao (14):
|
||||
include/linux/memcontrol.h: do not warn in page_memcg_rcu() if
|
||||
!CONFIG_MEMCG
|
||||
include/linux/nodemask.h: define next_memory_node() if !CONFIG_NUMA
|
||||
include/linux/cgroup.h: export cgroup_mutex
|
||||
mm, x86: support the access bit on non-leaf PMD entries
|
||||
mm/vmscan.c: refactor shrink_node()
|
||||
mm/workingset.c: refactor pack_shadow() and unpack_shadow()
|
||||
mm: multigenerational lru: groundwork
|
||||
mm: multigenerational lru: activation
|
||||
mm: multigenerational lru: mm_struct list
|
||||
mm: multigenerational lru: aging
|
||||
mm: multigenerational lru: eviction
|
||||
mm: multigenerational lru: user interface
|
||||
mm: multigenerational lru: Kconfig
|
||||
mm: multigenerational lru: documentation
|
||||
|
||||
Documentation/vm/index.rst | 1 +
|
||||
Documentation/vm/multigen_lru.rst | 143 ++
|
||||
arch/Kconfig | 9 +
|
||||
arch/x86/Kconfig | 1 +
|
||||
arch/x86/include/asm/pgtable.h | 2 +-
|
||||
arch/x86/mm/pgtable.c | 5 +-
|
||||
fs/exec.c | 2 +
|
||||
fs/fuse/dev.c | 3 +-
|
||||
include/linux/cgroup.h | 15 +-
|
||||
include/linux/memcontrol.h | 7 +-
|
||||
include/linux/mm.h | 2 +
|
||||
include/linux/mm_inline.h | 234 +++
|
||||
include/linux/mm_types.h | 107 ++
|
||||
include/linux/mmzone.h | 117 ++
|
||||
include/linux/nodemask.h | 1 +
|
||||
include/linux/page-flags-layout.h | 19 +-
|
||||
include/linux/page-flags.h | 4 +-
|
||||
include/linux/pgtable.h | 4 +-
|
||||
include/linux/swap.h | 4 +-
|
||||
kernel/bounds.c | 6 +
|
||||
kernel/events/uprobes.c | 2 +-
|
||||
kernel/exit.c | 1 +
|
||||
kernel/fork.c | 10 +
|
||||
kernel/kthread.c | 1 +
|
||||
kernel/sched/core.c | 2 +
|
||||
mm/Kconfig | 58 +
|
||||
mm/huge_memory.c | 5 +-
|
||||
mm/khugepaged.c | 2 +-
|
||||
mm/memcontrol.c | 28 +
|
||||
mm/memory.c | 10 +-
|
||||
mm/migrate.c | 2 +-
|
||||
mm/mm_init.c | 6 +-
|
||||
mm/mmzone.c | 2 +
|
||||
mm/rmap.c | 6 +
|
||||
mm/swap.c | 22 +-
|
||||
mm/swapfile.c | 6 +-
|
||||
mm/userfaultfd.c | 2 +-
|
||||
mm/vmscan.c | 2638 ++++++++++++++++++++++++++++-
|
||||
mm/workingset.c | 169 +-
|
||||
39 files changed, 3498 insertions(+), 160 deletions(-)
|
||||
create mode 100644 Documentation/vm/multigen_lru.rst
|
||||
|
||||
--
|
||||
2.31.1.751.gd2f1c929bd-goog
|
||||
|
||||
|
@ -0,0 +1,146 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Return-Path: <linux-kernel-owner@kernel.org>
|
||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
||||
aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
|
||||
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
|
||||
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
|
||||
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=ham autolearn_force=no
|
||||
version=3.4.0
|
||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
|
||||
by smtp.lore.kernel.org (Postfix) with ESMTP id D67E4C433B4
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:09 +0000 (UTC)
|
||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
|
||||
by mail.kernel.org (Postfix) with ESMTP id BF3A360E0B
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:09 +0000 (UTC)
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S230431AbhETGz2 (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:28 -0400
|
||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37864 "EHLO
|
||||
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S229534AbhETGzY (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:24 -0400
|
||||
Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a])
|
||||
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id DD592C061574
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:02 -0700 (PDT)
|
||||
Received: by mail-yb1-xb4a.google.com with SMTP id h67-20020a25d0460000b0290517e5f14ba4so2155434ybg.18
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:02 -0700 (PDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=google.com; s=20161025;
|
||||
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
|
||||
:cc;
|
||||
bh=rcGZDNyJ6vYh3nv1NbyctjSVuVLx0bLzs+7ceU1fwm4=;
|
||||
b=W+uE07dbkPRm0EI7rt0odOA402xB5GM5xsOKzyKKjPQdnq9FzvuhBH2EmYJx+e3w2P
|
||||
M+GjA/Y/0N577Zt0vRn1fv9k1GS93aX/OLI3asM1EluD+bF6m15Qua90BDPhuN6RLdFt
|
||||
9XaT7ugKFU1Zb0CN5pODFmCE1L4eWk8Idy1/MbWRtRICoOacDrCOBD3XXG+gene95EAz
|
||||
h6RenUUXrHuOEIq+2ZT1q6P10VKHqSaPsyoiUDDSBllpMLW3kYmkOWBQGnRaPndswvZ6
|
||||
VxYMBaR/6WNfgBuQGLp6vrXdw55euSCrNkjy2sf+vVpzlTPTbCCa8UgSnsOUvdDUidvY
|
||||
K/+A==
|
||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=1e100.net; s=20161025;
|
||||
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
|
||||
:references:subject:from:to:cc;
|
||||
bh=rcGZDNyJ6vYh3nv1NbyctjSVuVLx0bLzs+7ceU1fwm4=;
|
||||
b=jzFekar0HP/0GPy5CL4U72iLLcQoeWxnnbnFRr4z4BTc2/hrpionzQD3GXHT9lyHMR
|
||||
/YmLr75qJXGcWb0dXLEPIxzZ7UjTYADt32Jy07ZfWITJ2+jRh2k1W3Wgty9YjwFX9wmd
|
||||
/MWxRyZj/674SBQZu12SYNlqqqh/WZxiziHwdgJEnbvX2lzsMWl2I1dL0e+tJwPr9OlS
|
||||
sG5TsZvw0fciDahseafGyx4m1dmrCykPWBTkpCu+BJmF7Bt4PV/ogCZO0TIxn9ezbYr0
|
||||
AkpMnHQnddjXudvfygGW20ymE3ieIKJU8hRbuF0+DfL6jdFdbczkyxRLk2dp6uiavglX
|
||||
Eajg==
|
||||
X-Gm-Message-State: AOAM533a36jLBDzDifmN467aiI0KlSL/85xe4UvoMJPjnzoeRbTl9eck
|
||||
Ndk0rslscVb76VSHIoD/Mmkup4p5pgk=
|
||||
X-Google-Smtp-Source: ABdhPJxDZYdk2SRkfTUl/Zu3nhhCt+1mIXIL72HTmroHynNzAjrSh46FnvoimDPO7nUIugXPbacDy3rCHfQ=
|
||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
|
||||
(user=yuzhao job=sendgmr) by 2002:a5b:b92:: with SMTP id l18mr5146859ybq.414.1621493641980;
|
||||
Wed, 19 May 2021 23:54:01 -0700 (PDT)
|
||||
Date: Thu, 20 May 2021 00:53:42 -0600
|
||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
Message-Id: <20210520065355.2736558-2-yuzhao@google.com>
|
||||
Mime-Version: 1.0
|
||||
References: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
|
||||
Subject: [PATCH v3 01/14] include/linux/memcontrol.h: do not warn in
|
||||
page_memcg_rcu() if !CONFIG_MEMCG
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
To: linux-mm@kvack.org
|
||||
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
|
||||
Andrew Morton <akpm@linux-foundation.org>,
|
||||
Dave Chinner <david@fromorbit.com>,
|
||||
Dave Hansen <dave.hansen@linux.intel.com>,
|
||||
Donald Carr <sirspudd@gmail.com>,
|
||||
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
|
||||
Johannes Weiner <hannes@cmpxchg.org>,
|
||||
Jonathan Corbet <corbet@lwn.net>,
|
||||
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru>,
|
||||
Marcus Seyfarth <m.seyfarth@gmail.com>,
|
||||
Matthew Wilcox <willy@infradead.org>,
|
||||
Mel Gorman <mgorman@suse.de>,
|
||||
Miaohe Lin <linmiaohe@huawei.com>,
|
||||
Michael Larabel <michael@michaellarabel.com>,
|
||||
Michal Hocko <mhocko@suse.com>,
|
||||
Michel Lespinasse <michel@lespinasse.org>,
|
||||
Rik van Riel <riel@surriel.com>,
|
||||
Roman Gushchin <guro@fb.com>,
|
||||
Tim Chen <tim.c.chen@linux.intel.com>,
|
||||
Vlastimil Babka <vbabka@suse.cz>,
|
||||
Yang Shi <shy828301@gmail.com>,
|
||||
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
|
||||
linux-kernel@vger.kernel.org, lkp@lists.01.org,
|
||||
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
|
||||
Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
List-Archive: <https://lore.kernel.org/lkml/>
|
||||
|
||||
page_memcg_rcu() warns on !rcu_read_lock_held() regardless of
|
||||
CONFIG_MEMCG. The following legit code trips the warning when
|
||||
!CONFIG_MEMCG, since lock_page_memcg() and unlock_page_memcg() are
|
||||
empty for this config.
|
||||
|
||||
memcg = lock_page_memcg(page1)
|
||||
(rcu_read_lock() if CONFIG_MEMCG=y)
|
||||
|
||||
do something to page1
|
||||
|
||||
if (page_memcg_rcu(page2) == memcg)
|
||||
do something to page2 too as it cannot be migrated away from the
|
||||
memcg either.
|
||||
|
||||
unlock_page_memcg(page1)
|
||||
(rcu_read_unlock() if CONFIG_MEMCG=y)
|
||||
|
||||
Locking/unlocking rcu consistently for both configs is rigorous but it
|
||||
also forces unnecessary locking upon users who have no interest in
|
||||
CONFIG_MEMCG.
|
||||
|
||||
This patch removes the assertion for !CONFIG_MEMCG, because
|
||||
page_memcg_rcu() has a few callers and there are no concerns regarding
|
||||
their correctness at the moment.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
---
|
||||
include/linux/memcontrol.h | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
|
||||
index c193be760709..6bcac3d91dd1 100644
|
||||
--- a/include/linux/memcontrol.h
|
||||
+++ b/include/linux/memcontrol.h
|
||||
@@ -1131,7 +1131,6 @@ static inline struct mem_cgroup *page_memcg(struct page *page)
|
||||
|
||||
static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
|
||||
{
|
||||
- WARN_ON_ONCE(!rcu_read_lock_held());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1.751.gd2f1c929bd-goog
|
||||
|
||||
|
@ -0,0 +1,124 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Return-Path: <linux-kernel-owner@kernel.org>
|
||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
||||
aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
|
||||
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
|
||||
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
|
||||
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
|
||||
version=3.4.0
|
||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
|
||||
by smtp.lore.kernel.org (Postfix) with ESMTP id 4FED7C433B4
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:14 +0000 (UTC)
|
||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
|
||||
by mail.kernel.org (Postfix) with ESMTP id 33A1260E0B
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:14 +0000 (UTC)
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S229681AbhETGzd (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:33 -0400
|
||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37868 "EHLO
|
||||
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S230365AbhETGzZ (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:25 -0400
|
||||
Received: from mail-qv1-xf4a.google.com (mail-qv1-xf4a.google.com [IPv6:2607:f8b0:4864:20::f4a])
|
||||
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6DFF8C061574
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:04 -0700 (PDT)
|
||||
Received: by mail-qv1-xf4a.google.com with SMTP id e15-20020a0caa4f0000b02901eedbb09299so10877261qvb.15
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:04 -0700 (PDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=google.com; s=20161025;
|
||||
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
|
||||
:cc;
|
||||
bh=Vxs+IsfkjudRR9AMrF0VuRYwRpXVOuThAM5t2BO4Wtw=;
|
||||
b=gEeAhMTYbAQslSO01e0o8YIym/dFnsQfB9lZrmyFhl8uqTqmQEDIIu/28e1BOlyvt9
|
||||
rJm5/8Caqo1KIaunpdBy2LPtOXmfi9ZJDnuwRnb21JoByNrFkCChT4Z5xyeBqut1yHQm
|
||||
/TlMDm6OPoewZgMjaOhRgLuU1i+Q2viLaBK5TcX/f4jp7CkEtCTn5SioWFrXLpHFPgfg
|
||||
kYO7g2IN+CR6iy3EfEzmDy81m8wakeRxZZOx4HjJ7gGFFDfSfK4SyZnaOFS2lmsp1BrE
|
||||
F/LSnBPYHDzzGJCEqa0RfGIu9OFnYG7fyBb1AzMWt5UOwD1Z5Gw0p3PCAJBE1ykqsTQ7
|
||||
ooLQ==
|
||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=1e100.net; s=20161025;
|
||||
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
|
||||
:references:subject:from:to:cc;
|
||||
bh=Vxs+IsfkjudRR9AMrF0VuRYwRpXVOuThAM5t2BO4Wtw=;
|
||||
b=WqdLL8JjD4oD11umCae6nAxvQ9zl0qjRO1psbjN4FhRRkc/CQM+QJ7TCbayd5VCaUu
|
||||
rtoZfhuozKEHvBBclv4jQzorRkiOVjwZGIX52HwBCvcXpPAQF69jkqG0WRjQKrUK9laH
|
||||
TcyQDkdTbJztqqLLgoaMke0Pvw3qiJvr4a19wbHUQt+uwR4UXoL2NTQBotaVS1/Ji2zn
|
||||
MhoxUD+f8pgFoCCCn/G5DgDvOIOCN2Ed8dcKb+fMyQn2Lhjc9xLUnlaGN6WdKwBpOq9Z
|
||||
AFd/Ld6gumYWjtW7nBuum/ysfqbF5Au47sGCuLngPSTq/x4OIApMHQLjOikk1jB9ydlN
|
||||
GRzA==
|
||||
X-Gm-Message-State: AOAM5302cFNM+/KQLI85uCrC18olBzO35TXBlTOBABeaNnESgoIvYml8
|
||||
1LlliNnJr3ok2uXDGXaBzjDPiIU7ddM=
|
||||
X-Google-Smtp-Source: ABdhPJzxHNbGXEeZppZputW4eyMKgrpzCcHkInnC6cFjCLcGtMdRmVBNnbFHvD2nDRL//K+y6INSV9VSvtY=
|
||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
|
||||
(user=yuzhao job=sendgmr) by 2002:ad4:4184:: with SMTP id e4mr4097411qvp.13.1621493643517;
|
||||
Wed, 19 May 2021 23:54:03 -0700 (PDT)
|
||||
Date: Thu, 20 May 2021 00:53:43 -0600
|
||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
Message-Id: <20210520065355.2736558-3-yuzhao@google.com>
|
||||
Mime-Version: 1.0
|
||||
References: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
|
||||
Subject: [PATCH v3 02/14] include/linux/nodemask.h: define next_memory_node()
|
||||
if !CONFIG_NUMA
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
To: linux-mm@kvack.org
|
||||
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
|
||||
Andrew Morton <akpm@linux-foundation.org>,
|
||||
Dave Chinner <david@fromorbit.com>,
|
||||
Dave Hansen <dave.hansen@linux.intel.com>,
|
||||
Donald Carr <sirspudd@gmail.com>,
|
||||
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
|
||||
Johannes Weiner <hannes@cmpxchg.org>,
|
||||
Jonathan Corbet <corbet@lwn.net>,
|
||||
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru>,
|
||||
Marcus Seyfarth <m.seyfarth@gmail.com>,
|
||||
Matthew Wilcox <willy@infradead.org>,
|
||||
Mel Gorman <mgorman@suse.de>,
|
||||
Miaohe Lin <linmiaohe@huawei.com>,
|
||||
Michael Larabel <michael@michaellarabel.com>,
|
||||
Michal Hocko <mhocko@suse.com>,
|
||||
Michel Lespinasse <michel@lespinasse.org>,
|
||||
Rik van Riel <riel@surriel.com>,
|
||||
Roman Gushchin <guro@fb.com>,
|
||||
Tim Chen <tim.c.chen@linux.intel.com>,
|
||||
Vlastimil Babka <vbabka@suse.cz>,
|
||||
Yang Shi <shy828301@gmail.com>,
|
||||
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
|
||||
linux-kernel@vger.kernel.org, lkp@lists.01.org,
|
||||
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
|
||||
Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
List-Archive: <https://lore.kernel.org/lkml/>
|
||||
|
||||
Currently next_memory_node only exists when CONFIG_NUMA=y. This patch
|
||||
adds the macro for !CONFIG_NUMA.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
---
|
||||
include/linux/nodemask.h | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
|
||||
index ac398e143c9a..89fe4e3592f9 100644
|
||||
--- a/include/linux/nodemask.h
|
||||
+++ b/include/linux/nodemask.h
|
||||
@@ -486,6 +486,7 @@ static inline int num_node_state(enum node_states state)
|
||||
#define first_online_node 0
|
||||
#define first_memory_node 0
|
||||
#define next_online_node(nid) (MAX_NUMNODES)
|
||||
+#define next_memory_node(nid) (MAX_NUMNODES)
|
||||
#define nr_node_ids 1U
|
||||
#define nr_online_nodes 1U
|
||||
|
||||
--
|
||||
2.31.1.751.gd2f1c929bd-goog
|
||||
|
||||
|
@ -0,0 +1,150 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Return-Path: <linux-kernel-owner@kernel.org>
|
||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
||||
aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
|
||||
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
|
||||
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
|
||||
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
|
||||
version=3.4.0
|
||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
|
||||
by smtp.lore.kernel.org (Postfix) with ESMTP id 5894CC43460
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:18 +0000 (UTC)
|
||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
|
||||
by mail.kernel.org (Postfix) with ESMTP id 3DF1B61355
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:18 +0000 (UTC)
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S230492AbhETGzi (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:38 -0400
|
||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37880 "EHLO
|
||||
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S230430AbhETGz2 (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:28 -0400
|
||||
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
|
||||
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id F2439C061761
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:05 -0700 (PDT)
|
||||
Received: by mail-yb1-xb49.google.com with SMTP id p138-20020a2542900000b029051304a381d9so9042617yba.20
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:05 -0700 (PDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=google.com; s=20161025;
|
||||
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
|
||||
:cc;
|
||||
bh=u+gVIGEYIPA/2ZnKQ/jn+syjhmo8lEqvUYZxU0dhdmw=;
|
||||
b=ilLDWOeIOTKbkZAyTcceDtOpp2Z6oTFrQhsVyqk7X5l9/6+8NJYYI+dDrFdNy8GHPn
|
||||
TXNwPsD+oRKvYdGx4axZhlkFzOkdcr+xYDHDYwdfV6GJubHW0qUcVuNhCtoKbC5rA7Rc
|
||||
HLlOQqtRJZ/ivTzUig8CQccV040hHCbuz35dLgXbD1dVokwc1cOuKZaTLQpYVLsUP3Bu
|
||||
MGJAAygLFemJO4Lj2rtnjvJG8CDZr9Z0uZhqKEqHkyenPQKZNhlA4Evgi1wYHSSLSqnJ
|
||||
48ySo0abwH067PuNMNMETfFX32LpXeIda/dgmAGMAOCqUYbqyCKHzmjDuutRjkgtmoG5
|
||||
3meg==
|
||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=1e100.net; s=20161025;
|
||||
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
|
||||
:references:subject:from:to:cc;
|
||||
bh=u+gVIGEYIPA/2ZnKQ/jn+syjhmo8lEqvUYZxU0dhdmw=;
|
||||
b=kGPHggyxo0QzLlCv4G7r06Qb9zfq6cYJu4n3UotivJGpIGltx02xhr3dmw5myvMMTB
|
||||
+ZbHFgrPBlvHRFOzV44X/BNunb0sZE08NWoL7Ukl2CPGFW5C4ojk1f5ULcVpNMkwMHLM
|
||||
a8+V1TCPtJTZj/8tHL+9HnYHlYz/Bigq1ANURCC14mJjIXuCM5eCU6+JTuizAZzNiIIr
|
||||
u8eXfLqGhtBe68cHudHsJtza4h2srgcHTDTznQxwVhruHz3nU10Sni6vFJNWMxYzEfHy
|
||||
179XSwdgavLRIWc5CxN1biYoS2EkJnVyEf1eYcyMtFUArZKIHQtx6DsDN2wNQ2P6YXD5
|
||||
BUtw==
|
||||
X-Gm-Message-State: AOAM533YpaJcNo3c5ia3Q6gdyS41kyWgd490xt0zYDyNkyhH3hi4hj2m
|
||||
uiBbgl6yA1VtsOKBod8/iMtZhtd4aXg=
|
||||
X-Google-Smtp-Source: ABdhPJw2fnax4tdT5Wsay5+rPu2/CLppUyd3dEee27l274OvNNlEZeyxb+pNstV/LKb7q5/PkQmoAivSEq8=
|
||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
|
||||
(user=yuzhao job=sendgmr) by 2002:a05:6902:513:: with SMTP id
|
||||
x19mr5279236ybs.129.1621493645039; Wed, 19 May 2021 23:54:05 -0700 (PDT)
|
||||
Date: Thu, 20 May 2021 00:53:44 -0600
|
||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
Message-Id: <20210520065355.2736558-4-yuzhao@google.com>
|
||||
Mime-Version: 1.0
|
||||
References: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
|
||||
Subject: [PATCH v3 03/14] include/linux/cgroup.h: export cgroup_mutex
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
To: linux-mm@kvack.org
|
||||
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
|
||||
Andrew Morton <akpm@linux-foundation.org>,
|
||||
Dave Chinner <david@fromorbit.com>,
|
||||
Dave Hansen <dave.hansen@linux.intel.com>,
|
||||
Donald Carr <sirspudd@gmail.com>,
|
||||
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
|
||||
Johannes Weiner <hannes@cmpxchg.org>,
|
||||
Jonathan Corbet <corbet@lwn.net>,
|
||||
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru>,
|
||||
Marcus Seyfarth <m.seyfarth@gmail.com>,
|
||||
Matthew Wilcox <willy@infradead.org>,
|
||||
Mel Gorman <mgorman@suse.de>,
|
||||
Miaohe Lin <linmiaohe@huawei.com>,
|
||||
Michael Larabel <michael@michaellarabel.com>,
|
||||
Michal Hocko <mhocko@suse.com>,
|
||||
Michel Lespinasse <michel@lespinasse.org>,
|
||||
Rik van Riel <riel@surriel.com>,
|
||||
Roman Gushchin <guro@fb.com>,
|
||||
Tim Chen <tim.c.chen@linux.intel.com>,
|
||||
Vlastimil Babka <vbabka@suse.cz>,
|
||||
Yang Shi <shy828301@gmail.com>,
|
||||
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
|
||||
linux-kernel@vger.kernel.org, lkp@lists.01.org,
|
||||
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
|
||||
Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
List-Archive: <https://lore.kernel.org/lkml/>
|
||||
|
||||
cgroup_mutex is needed to synchronize with memcg creations.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
---
|
||||
include/linux/cgroup.h | 15 ++++++++++++++-
|
||||
1 file changed, 14 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
|
||||
index 4f2f79de083e..bd5744360cfa 100644
|
||||
--- a/include/linux/cgroup.h
|
||||
+++ b/include/linux/cgroup.h
|
||||
@@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgroup *cgrp)
|
||||
css_put(&cgrp->self);
|
||||
}
|
||||
|
||||
+extern struct mutex cgroup_mutex;
|
||||
+
|
||||
+static inline void cgroup_lock(void)
|
||||
+{
|
||||
+ mutex_lock(&cgroup_mutex);
|
||||
+}
|
||||
+
|
||||
+static inline void cgroup_unlock(void)
|
||||
+{
|
||||
+ mutex_unlock(&cgroup_mutex);
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* task_css_set_check - obtain a task's css_set with extra access conditions
|
||||
* @task: the task to obtain css_set for
|
||||
@@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgroup *cgrp)
|
||||
* as locks used during the cgroup_subsys::attach() methods.
|
||||
*/
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
-extern struct mutex cgroup_mutex;
|
||||
extern spinlock_t css_set_lock;
|
||||
#define task_css_set_check(task, __c) \
|
||||
rcu_dereference_check((task)->cgroups, \
|
||||
@@ -704,6 +715,8 @@ struct cgroup;
|
||||
static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; }
|
||||
static inline void css_get(struct cgroup_subsys_state *css) {}
|
||||
static inline void css_put(struct cgroup_subsys_state *css) {}
|
||||
+static inline void cgroup_lock(void) {}
|
||||
+static inline void cgroup_unlock(void) {}
|
||||
static inline int cgroup_attach_task_all(struct task_struct *from,
|
||||
struct task_struct *t) { return 0; }
|
||||
static inline int cgroupstats_build(struct cgroupstats *stats,
|
||||
--
|
||||
2.31.1.751.gd2f1c929bd-goog
|
||||
|
||||
|
@ -0,0 +1,214 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Return-Path: <linux-kernel-owner@kernel.org>
|
||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
||||
aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
|
||||
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
|
||||
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
|
||||
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
|
||||
version=3.4.0
|
||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
|
||||
by smtp.lore.kernel.org (Postfix) with ESMTP id 3FBCEC433B4
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:18 +0000 (UTC)
|
||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
|
||||
by mail.kernel.org (Postfix) with ESMTP id 2678D6108C
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:18 +0000 (UTC)
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S230467AbhETGzf (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:35 -0400
|
||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37890 "EHLO
|
||||
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S230435AbhETGz2 (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:28 -0400
|
||||
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
|
||||
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id DC741C0613CE
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:07 -0700 (PDT)
|
||||
Received: by mail-yb1-xb49.google.com with SMTP id 129-20020a2504870000b0290513326cc5e0so8674080ybe.10
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:07 -0700 (PDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=google.com; s=20161025;
|
||||
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
|
||||
:cc;
|
||||
bh=SyTpiiLQc7KnB8Q1E8X3BxXutjquJ4KYbVlpDQgVi/g=;
|
||||
b=F/5vtOLfn2hWqJCcZJILEPNvAi4G3UC/HROV8n8s10GH7JhHnrGHdEho6MiIGVETaO
|
||||
sHn+wn+lopXgJMLEqp5WqaQ769JJNG7YB4Pq15oo9pv+HRPYGP/d500+gP+KrGyChFzI
|
||||
iRtkvAcNwlgumar+mpa5HZRGCb08Jm1ZBJ5134Kg6M2RP3KBMa9LpRBW+jA/uB2ZH6dY
|
||||
SHmfSiGBjz0MLdKbjMO0ZC+E0iCgLKKyI3liy35dgrf7U0uAsmS+Tq+vBabFfUY8cvI2
|
||||
9S4m0Grod6BK2vh7Cxh9tBxuiOnpUkk6GOwodZ5MXTpgU9J25Ztod8Cas20KXVuUUu5L
|
||||
0caQ==
|
||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=1e100.net; s=20161025;
|
||||
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
|
||||
:references:subject:from:to:cc;
|
||||
bh=SyTpiiLQc7KnB8Q1E8X3BxXutjquJ4KYbVlpDQgVi/g=;
|
||||
b=I/lOZpe4tWCS0mala3TZsfeszZC2f6ezlF6QsZ4a/ik+ur09NYRA+x5bJGwlW5GDlq
|
||||
UnLRSTXE7lfK71dSWsWOnbdahNawKurDhQQHAWYazRhXZbx8wQr8tVsCQRJt62tjdH10
|
||||
3hySRpK88u8siMlEwsnnjOD9xPsxcVvC9q60ppd6Eg2OjbtByyWxV86qM2x45/Wp9SgS
|
||||
1K0/jwK6L1A8aN+ccrl6RiewC05OjfkXbKE3qp2X4jLzxo0Z5jchuk4yzUI9a9UtqjR2
|
||||
jluXY0tCVPqedtSHAZ1h+oHspBuclW26af/5c7EGck5IwrADYKN8hL9LfA0GmuxthLa8
|
||||
CT+g==
|
||||
X-Gm-Message-State: AOAM530i7XGlRCP9jxfMmcb0QrUXw8gBhH02/rSREHScRnCCUJvVu8zh
|
||||
smjHRFK1+f6BrShUrxMXHYEhJlEv03M=
|
||||
X-Google-Smtp-Source: ABdhPJxef10azDAzK7UEGotre/hx9MbP+rt8RSM5uDH+7LMog0h8qkdAugFcXq/qcLN78UEvnUxUn/nobaI=
|
||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
|
||||
(user=yuzhao job=sendgmr) by 2002:a25:2009:: with SMTP id g9mr4983935ybg.198.1621493646666;
|
||||
Wed, 19 May 2021 23:54:06 -0700 (PDT)
|
||||
Date: Thu, 20 May 2021 00:53:45 -0600
|
||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
Message-Id: <20210520065355.2736558-5-yuzhao@google.com>
|
||||
Mime-Version: 1.0
|
||||
References: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
|
||||
Subject: [PATCH v3 04/14] mm, x86: support the access bit on non-leaf PMD entries
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
To: linux-mm@kvack.org
|
||||
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
|
||||
Andrew Morton <akpm@linux-foundation.org>,
|
||||
Dave Chinner <david@fromorbit.com>,
|
||||
Dave Hansen <dave.hansen@linux.intel.com>,
|
||||
Donald Carr <sirspudd@gmail.com>,
|
||||
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
|
||||
Johannes Weiner <hannes@cmpxchg.org>,
|
||||
Jonathan Corbet <corbet@lwn.net>,
|
||||
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru>,
|
||||
Marcus Seyfarth <m.seyfarth@gmail.com>,
|
||||
Matthew Wilcox <willy@infradead.org>,
|
||||
Mel Gorman <mgorman@suse.de>,
|
||||
Miaohe Lin <linmiaohe@huawei.com>,
|
||||
Michael Larabel <michael@michaellarabel.com>,
|
||||
Michal Hocko <mhocko@suse.com>,
|
||||
Michel Lespinasse <michel@lespinasse.org>,
|
||||
Rik van Riel <riel@surriel.com>,
|
||||
Roman Gushchin <guro@fb.com>,
|
||||
Tim Chen <tim.c.chen@linux.intel.com>,
|
||||
Vlastimil Babka <vbabka@suse.cz>,
|
||||
Yang Shi <shy828301@gmail.com>,
|
||||
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
|
||||
linux-kernel@vger.kernel.org, lkp@lists.01.org,
|
||||
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
|
||||
Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
List-Archive: <https://lore.kernel.org/lkml/>
|
||||
|
||||
Some architectures support the accessed bit on non-leaf PMD entries
|
||||
(parents) in addition to leaf PTE entries (children) where pages are
|
||||
mapped, e.g., x86_64 sets the accessed bit on a parent when using it
|
||||
as part of linear-address translation [1]. Page table walkers who are
|
||||
interested in the accessed bit on children can take advantage of this:
|
||||
they do not need to search the children when the accessed bit is not
|
||||
set on a parent, given that they have previously cleared the accessed
|
||||
bit on this parent.
|
||||
|
||||
[1]: Intel 64 and IA-32 Architectures Software Developer's Manual
|
||||
Volume 3 (October 2019), section 4.8
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
---
|
||||
arch/Kconfig | 9 +++++++++
|
||||
arch/x86/Kconfig | 1 +
|
||||
arch/x86/include/asm/pgtable.h | 2 +-
|
||||
arch/x86/mm/pgtable.c | 5 ++++-
|
||||
include/linux/pgtable.h | 4 ++--
|
||||
5 files changed, 17 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/arch/Kconfig b/arch/Kconfig
|
||||
index c45b770d3579..e3812adc69f7 100644
|
||||
--- a/arch/Kconfig
|
||||
+++ b/arch/Kconfig
|
||||
@@ -826,6 +826,15 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE
|
||||
config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
|
||||
bool
|
||||
|
||||
+config HAVE_ARCH_PARENT_PMD_YOUNG
|
||||
+ bool
|
||||
+ depends on PGTABLE_LEVELS > 2
|
||||
+ help
|
||||
+ Architectures that select this are able to set the accessed bit on
|
||||
+ non-leaf PMD entries in addition to leaf PTE entries where pages are
|
||||
+ mapped. For them, page table walkers that clear the accessed bit may
|
||||
+ stop at non-leaf PMD entries if they do not see the accessed bit.
|
||||
+
|
||||
config HAVE_ARCH_HUGE_VMAP
|
||||
bool
|
||||
|
||||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
|
||||
index 0045e1b44190..f619055c4537 100644
|
||||
--- a/arch/x86/Kconfig
|
||||
+++ b/arch/x86/Kconfig
|
||||
@@ -170,6 +170,7 @@ config X86
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
|
||||
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
|
||||
+ select HAVE_ARCH_PARENT_PMD_YOUNG if X86_64
|
||||
select HAVE_ARCH_USERFAULTFD_WP if X86_64 && USERFAULTFD
|
||||
select HAVE_ARCH_USERFAULTFD_MINOR if X86_64 && USERFAULTFD
|
||||
select HAVE_ARCH_VMAP_STACK if X86_64
|
||||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
|
||||
index b1099f2d9800..3a24d2af4e9b 100644
|
||||
--- a/arch/x86/include/asm/pgtable.h
|
||||
+++ b/arch/x86/include/asm/pgtable.h
|
||||
@@ -846,7 +846,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
|
||||
|
||||
static inline int pmd_bad(pmd_t pmd)
|
||||
{
|
||||
- return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
|
||||
+ return ((pmd_flags(pmd) | _PAGE_ACCESSED) & ~_PAGE_USER) != _KERNPG_TABLE;
|
||||
}
|
||||
|
||||
static inline unsigned long pages_to_mb(unsigned long npg)
|
||||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
|
||||
index d27cf69e811d..b968d6bd28b6 100644
|
||||
--- a/arch/x86/mm/pgtable.c
|
||||
+++ b/arch/x86/mm/pgtable.c
|
||||
@@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||
return ret;
|
||||
}
|
||||
|
||||
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG)
|
||||
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||
unsigned long addr, pmd_t *pmdp)
|
||||
{
|
||||
@@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||
|
||||
return ret;
|
||||
}
|
||||
+#endif
|
||||
+
|
||||
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
int pudp_test_and_clear_young(struct vm_area_struct *vma,
|
||||
unsigned long addr, pud_t *pudp)
|
||||
{
|
||||
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
|
||||
index 46b13780c2c8..94ecc1d277a2 100644
|
||||
--- a/include/linux/pgtable.h
|
||||
+++ b/include/linux/pgtable.h
|
||||
@@ -193,7 +193,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
|
||||
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG)
|
||||
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
pmd_t *pmdp)
|
||||
@@ -214,7 +214,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
|
||||
BUILD_BUG();
|
||||
return 0;
|
||||
}
|
||||
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG */
|
||||
#endif
|
||||
|
||||
#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
||||
--
|
||||
2.31.1.751.gd2f1c929bd-goog
|
||||
|
||||
|
@ -0,0 +1,323 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Return-Path: <linux-kernel-owner@kernel.org>
|
||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
||||
aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
|
||||
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
|
||||
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
|
||||
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
|
||||
version=3.4.0
|
||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
|
||||
by smtp.lore.kernel.org (Postfix) with ESMTP id 614A3C43461
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:20 +0000 (UTC)
|
||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
|
||||
by mail.kernel.org (Postfix) with ESMTP id 44F1D61186
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:20 +0000 (UTC)
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S230499AbhETGzj (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:39 -0400
|
||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37910 "EHLO
|
||||
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S230452AbhETGzb (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:31 -0400
|
||||
Received: from mail-qv1-xf4a.google.com (mail-qv1-xf4a.google.com [IPv6:2607:f8b0:4864:20::f4a])
|
||||
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 245E9C06138B
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:09 -0700 (PDT)
|
||||
Received: by mail-qv1-xf4a.google.com with SMTP id c5-20020a0ca9c50000b02901aede9b5061so12455193qvb.14
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:09 -0700 (PDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=google.com; s=20161025;
|
||||
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
|
||||
:cc;
|
||||
bh=Mvah71zeYWGfuEGRbEsLqflL7nXzJ5AdEYR+UovaRYY=;
|
||||
b=EbaEOCfalGO/Os4OKgi4M0ux2tbj/9YV7PKsVCGQdr/8gcQO1wsCl7ywZY/pNC7eXz
|
||||
NoDBi8g1D9jnfogpVvkt+RSkZlQ/wIQfMR8guk0/qk6EZebG/utx01m5VEv0G0jHv0Zr
|
||||
k6d+sXr5o4NS2Kl/7Ur6tOhmyQYo1mJS8W6wy8htCD9qRhKO9rljjjcNNoQFh7jF53I2
|
||||
oqJdy/ZRwC1k/6/iastZquGfCQ1ZDPp9qbDEfPp6RfaePLHAvS2mEcu3b5IlddG8UjMG
|
||||
gnQkyzTi9RZ60CdCTtFo/33uy+SQMY1vKs2glF5gunlSHFA1EaqvtsVi1W2ngxGWiGKh
|
||||
ajyA==
|
||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=1e100.net; s=20161025;
|
||||
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
|
||||
:references:subject:from:to:cc;
|
||||
bh=Mvah71zeYWGfuEGRbEsLqflL7nXzJ5AdEYR+UovaRYY=;
|
||||
b=fKmHfVBojnVD7v0HScBBju/h6BeYHqtiGWztHdkYBBMbAblj6OoCIkOdzbET71QBoY
|
||||
R3bKjlFy/3PcckxPJrbFRhSVsTk4faV9uQfJlZuedG4G3O2EY6PeqhlGN722JnrxjpVk
|
||||
og7sFMeaPJCcbmkZyC+jm4xmnS5Ox2CVjXqgw96+ViRsnfjSJ3Vvu0mOk/ab+jfZ3/ZB
|
||||
HW3plDWWuAO8ijU0AazsQOOG3rvfr/szKKmeJs3e5a7HPpChkX9wvZKAnlyPw+6MvtLe
|
||||
ssM2BgmFkGzG2yd8AzvepX/afdU14K4bigWqSjN8IRR7JVDnGdBKYR4N/tH0JI/apOet
|
||||
Rn1g==
|
||||
X-Gm-Message-State: AOAM5337ZUQnuxydxDF/VBFRzHtx51o3/N5HDpf9MYMdQrx5kcdbVyhf
|
||||
HEyO2/+GFcfRnTIPxodPADdKQi6qUwA=
|
||||
X-Google-Smtp-Source: ABdhPJxyjS6uG8i8Sad50t/5Pf/9RTagtFbxDvAuxBuu8l0odJlhqIGjN9aFII0GYF+uFWIxSdphl51ZKHc=
|
||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
|
||||
(user=yuzhao job=sendgmr) by 2002:a0c:d84d:: with SMTP id i13mr3839330qvj.32.1621493648268;
|
||||
Wed, 19 May 2021 23:54:08 -0700 (PDT)
|
||||
Date: Thu, 20 May 2021 00:53:46 -0600
|
||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
Message-Id: <20210520065355.2736558-6-yuzhao@google.com>
|
||||
Mime-Version: 1.0
|
||||
References: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
|
||||
Subject: [PATCH v3 05/14] mm/vmscan.c: refactor shrink_node()
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
To: linux-mm@kvack.org
|
||||
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
|
||||
Andrew Morton <akpm@linux-foundation.org>,
|
||||
Dave Chinner <david@fromorbit.com>,
|
||||
Dave Hansen <dave.hansen@linux.intel.com>,
|
||||
Donald Carr <sirspudd@gmail.com>,
|
||||
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
|
||||
Johannes Weiner <hannes@cmpxchg.org>,
|
||||
Jonathan Corbet <corbet@lwn.net>,
|
||||
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru>,
|
||||
Marcus Seyfarth <m.seyfarth@gmail.com>,
|
||||
Matthew Wilcox <willy@infradead.org>,
|
||||
Mel Gorman <mgorman@suse.de>,
|
||||
Miaohe Lin <linmiaohe@huawei.com>,
|
||||
Michael Larabel <michael@michaellarabel.com>,
|
||||
Michal Hocko <mhocko@suse.com>,
|
||||
Michel Lespinasse <michel@lespinasse.org>,
|
||||
Rik van Riel <riel@surriel.com>,
|
||||
Roman Gushchin <guro@fb.com>,
|
||||
Tim Chen <tim.c.chen@linux.intel.com>,
|
||||
Vlastimil Babka <vbabka@suse.cz>,
|
||||
Yang Shi <shy828301@gmail.com>,
|
||||
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
|
||||
linux-kernel@vger.kernel.org, lkp@lists.01.org,
|
||||
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
|
||||
Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
List-Archive: <https://lore.kernel.org/lkml/>
|
||||
|
||||
Heuristics that determine scan balance between anon and file LRUs are
|
||||
rather independent. Move them into a separate function to improve
|
||||
readability.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
---
|
||||
mm/vmscan.c | 186 +++++++++++++++++++++++++++-------------------------
|
||||
1 file changed, 98 insertions(+), 88 deletions(-)
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 5199b9696bab..2339459c97d4 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -2421,6 +2421,103 @@ enum scan_balance {
|
||||
SCAN_FILE,
|
||||
};
|
||||
|
||||
+static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
|
||||
+{
|
||||
+ unsigned long file;
|
||||
+ struct lruvec *target_lruvec;
|
||||
+
|
||||
+ target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
|
||||
+
|
||||
+ /*
|
||||
+ * Determine the scan balance between anon and file LRUs.
|
||||
+ */
|
||||
+ spin_lock_irq(&target_lruvec->lru_lock);
|
||||
+ sc->anon_cost = target_lruvec->anon_cost;
|
||||
+ sc->file_cost = target_lruvec->file_cost;
|
||||
+ spin_unlock_irq(&target_lruvec->lru_lock);
|
||||
+
|
||||
+ /*
|
||||
+ * Target desirable inactive:active list ratios for the anon
|
||||
+ * and file LRU lists.
|
||||
+ */
|
||||
+ if (!sc->force_deactivate) {
|
||||
+ unsigned long refaults;
|
||||
+
|
||||
+ refaults = lruvec_page_state(target_lruvec,
|
||||
+ WORKINGSET_ACTIVATE_ANON);
|
||||
+ if (refaults != target_lruvec->refaults[0] ||
|
||||
+ inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
|
||||
+ sc->may_deactivate |= DEACTIVATE_ANON;
|
||||
+ else
|
||||
+ sc->may_deactivate &= ~DEACTIVATE_ANON;
|
||||
+
|
||||
+ /*
|
||||
+ * When refaults are being observed, it means a new
|
||||
+ * workingset is being established. Deactivate to get
|
||||
+ * rid of any stale active pages quickly.
|
||||
+ */
|
||||
+ refaults = lruvec_page_state(target_lruvec,
|
||||
+ WORKINGSET_ACTIVATE_FILE);
|
||||
+ if (refaults != target_lruvec->refaults[1] ||
|
||||
+ inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
|
||||
+ sc->may_deactivate |= DEACTIVATE_FILE;
|
||||
+ else
|
||||
+ sc->may_deactivate &= ~DEACTIVATE_FILE;
|
||||
+ } else
|
||||
+ sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
|
||||
+
|
||||
+ /*
|
||||
+ * If we have plenty of inactive file pages that aren't
|
||||
+ * thrashing, try to reclaim those first before touching
|
||||
+ * anonymous pages.
|
||||
+ */
|
||||
+ file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
|
||||
+ if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
|
||||
+ sc->cache_trim_mode = 1;
|
||||
+ else
|
||||
+ sc->cache_trim_mode = 0;
|
||||
+
|
||||
+ /*
|
||||
+ * Prevent the reclaimer from falling into the cache trap: as
|
||||
+ * cache pages start out inactive, every cache fault will tip
|
||||
+ * the scan balance towards the file LRU. And as the file LRU
|
||||
+ * shrinks, so does the window for rotation from references.
|
||||
+ * This means we have a runaway feedback loop where a tiny
|
||||
+ * thrashing file LRU becomes infinitely more attractive than
|
||||
+ * anon pages. Try to detect this based on file LRU size.
|
||||
+ */
|
||||
+ if (!cgroup_reclaim(sc)) {
|
||||
+ unsigned long total_high_wmark = 0;
|
||||
+ unsigned long free, anon;
|
||||
+ int z;
|
||||
+
|
||||
+ free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
|
||||
+ file = node_page_state(pgdat, NR_ACTIVE_FILE) +
|
||||
+ node_page_state(pgdat, NR_INACTIVE_FILE);
|
||||
+
|
||||
+ for (z = 0; z < MAX_NR_ZONES; z++) {
|
||||
+ struct zone *zone = &pgdat->node_zones[z];
|
||||
+
|
||||
+ if (!managed_zone(zone))
|
||||
+ continue;
|
||||
+
|
||||
+ total_high_wmark += high_wmark_pages(zone);
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
+ * Consider anon: if that's low too, this isn't a
|
||||
+ * runaway file reclaim problem, but rather just
|
||||
+ * extreme pressure. Reclaim as per usual then.
|
||||
+ */
|
||||
+ anon = node_page_state(pgdat, NR_INACTIVE_ANON);
|
||||
+
|
||||
+ sc->file_is_tiny =
|
||||
+ file + free <= total_high_wmark &&
|
||||
+ !(sc->may_deactivate & DEACTIVATE_ANON) &&
|
||||
+ anon >> sc->priority;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/*
|
||||
* Determine how aggressively the anon and file LRU lists should be
|
||||
* scanned. The relative value of each set of LRU lists is determined
|
||||
@@ -2866,7 +2963,6 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
|
||||
unsigned long nr_reclaimed, nr_scanned;
|
||||
struct lruvec *target_lruvec;
|
||||
bool reclaimable = false;
|
||||
- unsigned long file;
|
||||
|
||||
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
|
||||
|
||||
@@ -2876,93 +2972,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
|
||||
nr_reclaimed = sc->nr_reclaimed;
|
||||
nr_scanned = sc->nr_scanned;
|
||||
|
||||
- /*
|
||||
- * Determine the scan balance between anon and file LRUs.
|
||||
- */
|
||||
- spin_lock_irq(&target_lruvec->lru_lock);
|
||||
- sc->anon_cost = target_lruvec->anon_cost;
|
||||
- sc->file_cost = target_lruvec->file_cost;
|
||||
- spin_unlock_irq(&target_lruvec->lru_lock);
|
||||
-
|
||||
- /*
|
||||
- * Target desirable inactive:active list ratios for the anon
|
||||
- * and file LRU lists.
|
||||
- */
|
||||
- if (!sc->force_deactivate) {
|
||||
- unsigned long refaults;
|
||||
-
|
||||
- refaults = lruvec_page_state(target_lruvec,
|
||||
- WORKINGSET_ACTIVATE_ANON);
|
||||
- if (refaults != target_lruvec->refaults[0] ||
|
||||
- inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
|
||||
- sc->may_deactivate |= DEACTIVATE_ANON;
|
||||
- else
|
||||
- sc->may_deactivate &= ~DEACTIVATE_ANON;
|
||||
-
|
||||
- /*
|
||||
- * When refaults are being observed, it means a new
|
||||
- * workingset is being established. Deactivate to get
|
||||
- * rid of any stale active pages quickly.
|
||||
- */
|
||||
- refaults = lruvec_page_state(target_lruvec,
|
||||
- WORKINGSET_ACTIVATE_FILE);
|
||||
- if (refaults != target_lruvec->refaults[1] ||
|
||||
- inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
|
||||
- sc->may_deactivate |= DEACTIVATE_FILE;
|
||||
- else
|
||||
- sc->may_deactivate &= ~DEACTIVATE_FILE;
|
||||
- } else
|
||||
- sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
|
||||
-
|
||||
- /*
|
||||
- * If we have plenty of inactive file pages that aren't
|
||||
- * thrashing, try to reclaim those first before touching
|
||||
- * anonymous pages.
|
||||
- */
|
||||
- file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
|
||||
- if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
|
||||
- sc->cache_trim_mode = 1;
|
||||
- else
|
||||
- sc->cache_trim_mode = 0;
|
||||
-
|
||||
- /*
|
||||
- * Prevent the reclaimer from falling into the cache trap: as
|
||||
- * cache pages start out inactive, every cache fault will tip
|
||||
- * the scan balance towards the file LRU. And as the file LRU
|
||||
- * shrinks, so does the window for rotation from references.
|
||||
- * This means we have a runaway feedback loop where a tiny
|
||||
- * thrashing file LRU becomes infinitely more attractive than
|
||||
- * anon pages. Try to detect this based on file LRU size.
|
||||
- */
|
||||
- if (!cgroup_reclaim(sc)) {
|
||||
- unsigned long total_high_wmark = 0;
|
||||
- unsigned long free, anon;
|
||||
- int z;
|
||||
-
|
||||
- free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
|
||||
- file = node_page_state(pgdat, NR_ACTIVE_FILE) +
|
||||
- node_page_state(pgdat, NR_INACTIVE_FILE);
|
||||
-
|
||||
- for (z = 0; z < MAX_NR_ZONES; z++) {
|
||||
- struct zone *zone = &pgdat->node_zones[z];
|
||||
- if (!managed_zone(zone))
|
||||
- continue;
|
||||
-
|
||||
- total_high_wmark += high_wmark_pages(zone);
|
||||
- }
|
||||
-
|
||||
- /*
|
||||
- * Consider anon: if that's low too, this isn't a
|
||||
- * runaway file reclaim problem, but rather just
|
||||
- * extreme pressure. Reclaim as per usual then.
|
||||
- */
|
||||
- anon = node_page_state(pgdat, NR_INACTIVE_ANON);
|
||||
-
|
||||
- sc->file_is_tiny =
|
||||
- file + free <= total_high_wmark &&
|
||||
- !(sc->may_deactivate & DEACTIVATE_ANON) &&
|
||||
- anon >> sc->priority;
|
||||
- }
|
||||
+ prepare_scan_count(pgdat, sc);
|
||||
|
||||
shrink_node_memcgs(pgdat, sc);
|
||||
|
||||
--
|
||||
2.31.1.751.gd2f1c929bd-goog
|
||||
|
||||
|
@ -0,0 +1,233 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Return-Path: <linux-kernel-owner@kernel.org>
|
||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
||||
aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
|
||||
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
|
||||
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
|
||||
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
|
||||
version=3.4.0
|
||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
|
||||
by smtp.lore.kernel.org (Postfix) with ESMTP id 173FBC433B4
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:22 +0000 (UTC)
|
||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
|
||||
by mail.kernel.org (Postfix) with ESMTP id EA57F61261
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:21 +0000 (UTC)
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S230452AbhETGzl (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:41 -0400
|
||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37912 "EHLO
|
||||
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S230466AbhETGzc (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:32 -0400
|
||||
Received: from mail-qk1-x749.google.com (mail-qk1-x749.google.com [IPv6:2607:f8b0:4864:20::749])
|
||||
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C46CEC061574
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:10 -0700 (PDT)
|
||||
Received: by mail-qk1-x749.google.com with SMTP id d201-20020ae9efd20000b02902e9e9d8d9dcso11687575qkg.10
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:10 -0700 (PDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=google.com; s=20161025;
|
||||
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
|
||||
:cc;
|
||||
bh=Yl+DjwsBODMfyiL17kQw/9BLrEC9zXFz8Mxu59WzP4Y=;
|
||||
b=Mrzy2M2k9QAzqf4Qlq2wFgC1uycH9/GSOy89uVYR+gUD3oaKMWpOn95M8hs2EzT1FG
|
||||
/N40V1/URD7mP48ZsP72lzG3rMvI0SepioQCu+0asiEBUJUrtY6kEz2CKTJEB4MwAGRU
|
||||
xnH/e/C5szSot199E2rMI+ZUJo/y8pBDfNIzZ7XzQ811Wxr6oM1C4DVA6DHQSWtdqS5J
|
||||
VMMjdjvsXW8hHCzs+5W06EYb73kJeqPHOFZ+XFMWXFrm8l/F2qujro4FMOgux0JB/XLW
|
||||
32qxH7ovQyCHL8Gg6vGigkolgFZhe6oag4JfCx0cj6eFlP+2j2w5ryU0kRvvHVILA9Xq
|
||||
e6Hg==
|
||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=1e100.net; s=20161025;
|
||||
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
|
||||
:references:subject:from:to:cc;
|
||||
bh=Yl+DjwsBODMfyiL17kQw/9BLrEC9zXFz8Mxu59WzP4Y=;
|
||||
b=Nel79d/ExWs1HdZZs12GqadGHxgI3W8FnqvRED1IBxJnCtiluti6ndyhp/JXtjDIct
|
||||
YtIRkMNVwmwMk6EFx5QOv0Br5VYQ/72hZsMd8kNj9z/m8CUtpMnJluKVNeyNT+Livkww
|
||||
y2hwgGJiuvWxBIy+ja/GH64SkCJTuttiOxpNFaRxB1STfhM2PjwwwiQG5GTxbqkkn3Dg
|
||||
fKBlHYI17sQ05tZRovJETs0f+1wBQPftjwjm6PJzpZ3ooNcBXdB6hRUrGZ6Pmyf2bRBR
|
||||
BptRYxbaQdSprCABGMW/2ySltaJaFitv6fShejQMxDX8xe+JDYJ0kEn9/3aMVEt+Vy+X
|
||||
rTAw==
|
||||
X-Gm-Message-State: AOAM532IBC74aEi91Xqgl2rYw1QINB7mrdZT3v/EvERcHHCbI6v9/2cU
|
||||
Qe2UGq6f4OIKykMrwADvTCo5whMI+DQ=
|
||||
X-Google-Smtp-Source: ABdhPJyyAX33aocRYynTtALpNyjv0w+Wa7lDS9awJiNK6me024wMLg+4FL2RHzwNDLwZg9DFBZ+B1LiDSsc=
|
||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
|
||||
(user=yuzhao job=sendgmr) by 2002:a05:6214:18d:: with SMTP id
|
||||
q13mr3804726qvr.60.1621493649877; Wed, 19 May 2021 23:54:09 -0700 (PDT)
|
||||
Date: Thu, 20 May 2021 00:53:47 -0600
|
||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
Message-Id: <20210520065355.2736558-7-yuzhao@google.com>
|
||||
Mime-Version: 1.0
|
||||
References: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
|
||||
Subject: [PATCH v3 06/14] mm/workingset.c: refactor pack_shadow() and unpack_shadow()
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
To: linux-mm@kvack.org
|
||||
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
|
||||
Andrew Morton <akpm@linux-foundation.org>,
|
||||
Dave Chinner <david@fromorbit.com>,
|
||||
Dave Hansen <dave.hansen@linux.intel.com>,
|
||||
Donald Carr <sirspudd@gmail.com>,
|
||||
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
|
||||
Johannes Weiner <hannes@cmpxchg.org>,
|
||||
Jonathan Corbet <corbet@lwn.net>,
|
||||
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru>,
|
||||
Marcus Seyfarth <m.seyfarth@gmail.com>,
|
||||
Matthew Wilcox <willy@infradead.org>,
|
||||
Mel Gorman <mgorman@suse.de>,
|
||||
Miaohe Lin <linmiaohe@huawei.com>,
|
||||
Michael Larabel <michael@michaellarabel.com>,
|
||||
Michal Hocko <mhocko@suse.com>,
|
||||
Michel Lespinasse <michel@lespinasse.org>,
|
||||
Rik van Riel <riel@surriel.com>,
|
||||
Roman Gushchin <guro@fb.com>,
|
||||
Tim Chen <tim.c.chen@linux.intel.com>,
|
||||
Vlastimil Babka <vbabka@suse.cz>,
|
||||
Yang Shi <shy828301@gmail.com>,
|
||||
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
|
||||
linux-kernel@vger.kernel.org, lkp@lists.01.org,
|
||||
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
|
||||
Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
List-Archive: <https://lore.kernel.org/lkml/>
|
||||
|
||||
This patches moves the bucket order and PageWorkingset() out of
|
||||
pack_shadow() and unpack_shadow(). It has no merits on its own but
|
||||
makes the upcoming changes to mm/workingset.c less diffy.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
---
|
||||
mm/workingset.c | 53 ++++++++++++++++++++-----------------------------
|
||||
1 file changed, 22 insertions(+), 31 deletions(-)
|
||||
|
||||
diff --git a/mm/workingset.c b/mm/workingset.c
|
||||
index b7cdeca5a76d..edb8aed2587e 100644
|
||||
--- a/mm/workingset.c
|
||||
+++ b/mm/workingset.c
|
||||
@@ -168,9 +168,9 @@
|
||||
* refault distance will immediately activate the refaulting page.
|
||||
*/
|
||||
|
||||
-#define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \
|
||||
- 1 + NODES_SHIFT + MEM_CGROUP_ID_SHIFT)
|
||||
-#define EVICTION_MASK (~0UL >> EVICTION_SHIFT)
|
||||
+#define EVICTION_SHIFT (BITS_PER_XA_VALUE - MEM_CGROUP_ID_SHIFT - NODES_SHIFT)
|
||||
+#define EVICTION_MASK (BIT(EVICTION_SHIFT) - 1)
|
||||
+#define WORKINGSET_WIDTH 1
|
||||
|
||||
/*
|
||||
* Eviction timestamps need to be able to cover the full range of
|
||||
@@ -182,36 +182,23 @@
|
||||
*/
|
||||
static unsigned int bucket_order __read_mostly;
|
||||
|
||||
-static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction,
|
||||
- bool workingset)
|
||||
+static void *pack_shadow(int memcg_id, struct pglist_data *pgdat, unsigned long val)
|
||||
{
|
||||
- eviction >>= bucket_order;
|
||||
- eviction &= EVICTION_MASK;
|
||||
- eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
|
||||
- eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
|
||||
- eviction = (eviction << 1) | workingset;
|
||||
+ val = (val << MEM_CGROUP_ID_SHIFT) | memcg_id;
|
||||
+ val = (val << NODES_SHIFT) | pgdat->node_id;
|
||||
|
||||
- return xa_mk_value(eviction);
|
||||
+ return xa_mk_value(val);
|
||||
}
|
||||
|
||||
-static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
|
||||
- unsigned long *evictionp, bool *workingsetp)
|
||||
+static unsigned long unpack_shadow(void *shadow, int *memcg_id, struct pglist_data **pgdat)
|
||||
{
|
||||
- unsigned long entry = xa_to_value(shadow);
|
||||
- int memcgid, nid;
|
||||
- bool workingset;
|
||||
+ unsigned long val = xa_to_value(shadow);
|
||||
|
||||
- workingset = entry & 1;
|
||||
- entry >>= 1;
|
||||
- nid = entry & ((1UL << NODES_SHIFT) - 1);
|
||||
- entry >>= NODES_SHIFT;
|
||||
- memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
|
||||
- entry >>= MEM_CGROUP_ID_SHIFT;
|
||||
+ *pgdat = NODE_DATA(val & (BIT(NODES_SHIFT) - 1));
|
||||
+ val >>= NODES_SHIFT;
|
||||
+ *memcg_id = val & (BIT(MEM_CGROUP_ID_SHIFT) - 1);
|
||||
|
||||
- *memcgidp = memcgid;
|
||||
- *pgdat = NODE_DATA(nid);
|
||||
- *evictionp = entry << bucket_order;
|
||||
- *workingsetp = workingset;
|
||||
+ return val >> MEM_CGROUP_ID_SHIFT;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -266,8 +253,10 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
|
||||
/* XXX: target_memcg can be NULL, go through lruvec */
|
||||
memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
|
||||
eviction = atomic_long_read(&lruvec->nonresident_age);
|
||||
+ eviction >>= bucket_order;
|
||||
+ eviction = (eviction << WORKINGSET_WIDTH) | PageWorkingset(page);
|
||||
workingset_age_nonresident(lruvec, thp_nr_pages(page));
|
||||
- return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
|
||||
+ return pack_shadow(memcgid, pgdat, eviction);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -294,7 +283,7 @@ void workingset_refault(struct page *page, void *shadow)
|
||||
bool workingset;
|
||||
int memcgid;
|
||||
|
||||
- unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
|
||||
+ eviction = unpack_shadow(shadow, &memcgid, &pgdat);
|
||||
|
||||
rcu_read_lock();
|
||||
/*
|
||||
@@ -318,6 +307,8 @@ void workingset_refault(struct page *page, void *shadow)
|
||||
goto out;
|
||||
eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);
|
||||
refault = atomic_long_read(&eviction_lruvec->nonresident_age);
|
||||
+ workingset = eviction & (BIT(WORKINGSET_WIDTH) - 1);
|
||||
+ eviction = (eviction >> WORKINGSET_WIDTH) << bucket_order;
|
||||
|
||||
/*
|
||||
* Calculate the refault distance
|
||||
@@ -335,7 +326,7 @@ void workingset_refault(struct page *page, void *shadow)
|
||||
* longest time, so the occasional inappropriate activation
|
||||
* leading to pressure on the active list is not a problem.
|
||||
*/
|
||||
- refault_distance = (refault - eviction) & EVICTION_MASK;
|
||||
+ refault_distance = (refault - eviction) & (EVICTION_MASK >> WORKINGSET_WIDTH);
|
||||
|
||||
/*
|
||||
* The activation decision for this page is made at the level
|
||||
@@ -593,7 +584,7 @@ static int __init workingset_init(void)
|
||||
unsigned int max_order;
|
||||
int ret;
|
||||
|
||||
- BUILD_BUG_ON(BITS_PER_LONG < EVICTION_SHIFT);
|
||||
+ BUILD_BUG_ON(EVICTION_SHIFT < WORKINGSET_WIDTH);
|
||||
/*
|
||||
* Calculate the eviction bucket size to cover the longest
|
||||
* actionable refault distance, which is currently half of
|
||||
@@ -601,7 +592,7 @@ static int __init workingset_init(void)
|
||||
* some more pages at runtime, so keep working with up to
|
||||
* double the initial memory by using totalram_pages as-is.
|
||||
*/
|
||||
- timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT;
|
||||
+ timestamp_bits = EVICTION_SHIFT - WORKINGSET_WIDTH;
|
||||
max_order = fls_long(totalram_pages() - 1);
|
||||
if (max_order > timestamp_bits)
|
||||
bucket_order = max_order - timestamp_bits;
|
||||
--
|
||||
2.31.1.751.gd2f1c929bd-goog
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,686 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Return-Path: <linux-kernel-owner@kernel.org>
|
||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
||||
aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
|
||||
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
|
||||
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
|
||||
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
|
||||
version=3.4.0
|
||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
|
||||
by smtp.lore.kernel.org (Postfix) with ESMTP id AEEEDC43461
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:26 +0000 (UTC)
|
||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
|
||||
by mail.kernel.org (Postfix) with ESMTP id 4D74961186
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:26 +0000 (UTC)
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S231144AbhETGzp (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:45 -0400
|
||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37910 "EHLO
|
||||
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S230478AbhETGzg (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:36 -0400
|
||||
Received: from mail-qv1-xf4a.google.com (mail-qv1-xf4a.google.com [IPv6:2607:f8b0:4864:20::f4a])
|
||||
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5CD6FC061763
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:14 -0700 (PDT)
|
||||
Received: by mail-qv1-xf4a.google.com with SMTP id x2-20020a0cda020000b02901edb4c412fdso12424236qvj.11
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:14 -0700 (PDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=google.com; s=20161025;
|
||||
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
|
||||
:cc;
|
||||
bh=Jb580jSe4IcT6fVqPR22jrL3z+VNcMEKM2UgbfL90k4=;
|
||||
b=rTxj5e7tRY5wx29jetDGP8dUly4vBHNX0SBJeZKRsCOEiHaQ+coy05du1f4bT6oCWw
|
||||
rJWrdbUyp5aci9MKmCQ2Z5qPBf7F+zDTL+8wpoufyGbRvdGkfwDkAgQV6LLsi9xZzdyr
|
||||
bpcyHItG1lIReRXOkR0GKWNz8GfEVNO7lE+G6Sc1sHPUEEfw3FF5Vl/Wta1OxKsGQQe4
|
||||
02oeo8STGdqGF0yOczRyqWZ/SBFcNGiPQ7nrGaWA3FguRBAwZ2dOrTrmM5ug10rbOQmf
|
||||
L/m3eja1mOwffFkrgumZ0Sm9KZ5sbKJNbLAjPYQAmAcoXhU/NVnLrMVtxSGppGFwdyOz
|
||||
NMsw==
|
||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=1e100.net; s=20161025;
|
||||
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
|
||||
:references:subject:from:to:cc;
|
||||
bh=Jb580jSe4IcT6fVqPR22jrL3z+VNcMEKM2UgbfL90k4=;
|
||||
b=i0Py5qwrQv4OOBWcpJcYxjG5lgHvV4Gq3X3fG5L0aB3lLLnS3mObKdM6XG+uYC1b1G
|
||||
z/Sfx4n/1+/0EPZnFoo80K1ry0Y7SD/W30OUEPR8PValuCLHEHzVeoVK2+TPI8DMEzz1
|
||||
r+jWpxZkah8B613QrPIvvcZSIb0lxcsV6JxpYjFixO/mizct7mrdls35j1Thb7ehgWtO
|
||||
W5aAGiMIxBprDhKHJ2D2Oz85hWRyQYND4jEA68bzh9ybz4cYMVIX3C+9uH+cVIhZ6JZL
|
||||
febwADPME4CsH8gMntK/GWzf5Yu+sdeBYn+6VJKrG/4c7dWi0xgFGWYrgCtSlk8kVgOe
|
||||
bH1w==
|
||||
X-Gm-Message-State: AOAM530OUX0GiyYChE/1C1GuJXPP4zDS9QrWZKB+3aIFDiz73ADIQaxu
|
||||
gJxNX12VvCvNdCSId0kuSWl88ETTfcg=
|
||||
X-Google-Smtp-Source: ABdhPJz4m/yxkWn5wBamzXd/wEoVvHq3AOPsnc1+c/ewg4oojPM6XcGKJYYybO2Mtsb6BDRPtu5ccAJRcHw=
|
||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
|
||||
(user=yuzhao job=sendgmr) by 2002:a0c:edcf:: with SMTP id i15mr4021372qvr.10.1621493653456;
|
||||
Wed, 19 May 2021 23:54:13 -0700 (PDT)
|
||||
Date: Thu, 20 May 2021 00:53:49 -0600
|
||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
Message-Id: <20210520065355.2736558-9-yuzhao@google.com>
|
||||
Mime-Version: 1.0
|
||||
References: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
|
||||
Subject: [PATCH v3 08/14] mm: multigenerational lru: activation
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
To: linux-mm@kvack.org
|
||||
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
|
||||
Andrew Morton <akpm@linux-foundation.org>,
|
||||
Dave Chinner <david@fromorbit.com>,
|
||||
Dave Hansen <dave.hansen@linux.intel.com>,
|
||||
Donald Carr <sirspudd@gmail.com>,
|
||||
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
|
||||
Johannes Weiner <hannes@cmpxchg.org>,
|
||||
Jonathan Corbet <corbet@lwn.net>,
|
||||
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru>,
|
||||
Marcus Seyfarth <m.seyfarth@gmail.com>,
|
||||
Matthew Wilcox <willy@infradead.org>,
|
||||
Mel Gorman <mgorman@suse.de>,
|
||||
Miaohe Lin <linmiaohe@huawei.com>,
|
||||
Michael Larabel <michael@michaellarabel.com>,
|
||||
Michal Hocko <mhocko@suse.com>,
|
||||
Michel Lespinasse <michel@lespinasse.org>,
|
||||
Rik van Riel <riel@surriel.com>,
|
||||
Roman Gushchin <guro@fb.com>,
|
||||
Tim Chen <tim.c.chen@linux.intel.com>,
|
||||
Vlastimil Babka <vbabka@suse.cz>,
|
||||
Yang Shi <shy828301@gmail.com>,
|
||||
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
|
||||
linux-kernel@vger.kernel.org, lkp@lists.01.org,
|
||||
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
|
||||
Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
List-Archive: <https://lore.kernel.org/lkml/>
|
||||
|
||||
For pages accessed multiple times via file descriptors, instead of
|
||||
activating them upon the second access, we activate them based on the
|
||||
refault rates of their tiers. Each generation contains at most
|
||||
MAX_NR_TIERS tiers, and they require additional MAX_NR_TIERS-2 bits in
|
||||
page->flags. Pages accessed N times via file descriptors belong to
|
||||
tier order_base_2(N). Tier 0 is the base tier and it contains pages
|
||||
read ahead, accessed once via file descriptors and accessed only via
|
||||
page tables. Pages from the base tier are evicted regardless of the
|
||||
refault rate. Pages from upper tiers that have higher refault rates
|
||||
than the base tier will be moved to the next generation. A feedback
|
||||
loop modeled after the PID controller monitors refault rates across
|
||||
all tiers and decides when to activate pages from which upper tiers
|
||||
in the reclaim path. The advantages of this model are:
|
||||
1) It has a negligible cost in the buffered IO access path because
|
||||
activations are done optionally in the reclaim path.
|
||||
2) It takes mapped pages into account and avoids overprotecting
|
||||
pages accessed multiple times via file descriptors.
|
||||
3) More tiers offer better protection to pages accessed more than
|
||||
twice when workloads doing intensive buffered IO are under memory
|
||||
pressure.
|
||||
|
||||
For pages mapped upon page faults, the accessed bit is set during the
|
||||
initial faults. Ideally we add them to the per-zone lists index by
|
||||
max_seq, i.e., the youngest generation, so that eviction will not
|
||||
consider them before the aging has scanned them. For anon pages not in
|
||||
swap cache, this can be done easily in the page fault path: we rename
|
||||
lru_cache_add_inactive_or_unevictable() to lru_cache_add_page_vma()
|
||||
and add a new parameter, which is set to true for pages mapped upon
|
||||
page faults. For pages in page cache or swap cache, we cannot
|
||||
differentiate the page fault path from the read ahead path at the time
|
||||
we call lru_cache_add(). So we add them to the per-zone lists index by
|
||||
min_seq, i.e., the oldest generation, for now.
|
||||
|
||||
Finally, we need to make sure deactivation works when the
|
||||
multigenerational lru is enabled. We cannot use PageActive() because
|
||||
it is not set on pages from active generations, in order to spare the
|
||||
aging the trouble of clearing it when active generations become
|
||||
inactive. So we deactivate pages unconditionally since deactivation is
|
||||
not a hot code path worth additional optimizations.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
---
|
||||
include/linux/mm_inline.h | 40 ++++++++++++++
|
||||
include/linux/swap.h | 4 +-
|
||||
kernel/events/uprobes.c | 2 +-
|
||||
mm/huge_memory.c | 2 +-
|
||||
mm/khugepaged.c | 2 +-
|
||||
mm/memory.c | 10 ++--
|
||||
mm/migrate.c | 2 +-
|
||||
mm/swap.c | 22 +++++---
|
||||
mm/swapfile.c | 2 +-
|
||||
mm/userfaultfd.c | 2 +-
|
||||
mm/vmscan.c | 91 ++++++++++++++++++++++++++++++-
|
||||
mm/workingset.c | 112 ++++++++++++++++++++++++++++++++++++++
|
||||
12 files changed, 269 insertions(+), 22 deletions(-)
|
||||
|
||||
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
|
||||
index ae3e3826dd7f..f3b99f65a652 100644
|
||||
--- a/include/linux/mm_inline.h
|
||||
+++ b/include/linux/mm_inline.h
|
||||
@@ -103,6 +103,12 @@ static inline int lru_gen_from_seq(unsigned long seq)
|
||||
return seq % MAX_NR_GENS;
|
||||
}
|
||||
|
||||
+/* Convert the level of usage to a tier. See the comment on MAX_NR_TIERS. */
|
||||
+static inline int lru_tier_from_usage(int usage)
|
||||
+{
|
||||
+ return order_base_2(usage + 1);
|
||||
+}
|
||||
+
|
||||
/* Return a proper index regardless whether we keep a full history of stats. */
|
||||
static inline int hist_from_seq_or_gen(int seq_or_gen)
|
||||
{
|
||||
@@ -245,6 +251,36 @@ static inline bool lru_gen_deletion(struct page *page, struct lruvec *lruvec)
|
||||
return true;
|
||||
}
|
||||
|
||||
+/* Return the level of usage of a page. See the comment on MAX_NR_TIERS. */
|
||||
+static inline int page_tier_usage(struct page *page)
|
||||
+{
|
||||
+ unsigned long flags = READ_ONCE(page->flags);
|
||||
+
|
||||
+ return flags & BIT(PG_workingset) ?
|
||||
+ ((flags & LRU_USAGE_MASK) >> LRU_USAGE_PGOFF) + 1 : 0;
|
||||
+}
|
||||
+
|
||||
+/* Increment the usage counter after a page is accessed via file descriptors. */
|
||||
+static inline void page_inc_usage(struct page *page)
|
||||
+{
|
||||
+ unsigned long usage;
|
||||
+ unsigned long old_flags, new_flags;
|
||||
+
|
||||
+ do {
|
||||
+ old_flags = READ_ONCE(page->flags);
|
||||
+
|
||||
+ if (!(old_flags & BIT(PG_workingset))) {
|
||||
+ new_flags = old_flags | BIT(PG_workingset);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ usage = (old_flags & LRU_USAGE_MASK) + BIT(LRU_USAGE_PGOFF);
|
||||
+
|
||||
+ new_flags = (old_flags & ~LRU_USAGE_MASK) | min(usage, LRU_USAGE_MASK);
|
||||
+ } while (new_flags != old_flags &&
|
||||
+ cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
|
||||
+}
|
||||
+
|
||||
#else /* CONFIG_LRU_GEN */
|
||||
|
||||
static inline bool lru_gen_enabled(void)
|
||||
@@ -262,6 +298,10 @@ static inline bool lru_gen_deletion(struct page *page, struct lruvec *lruvec)
|
||||
return false;
|
||||
}
|
||||
|
||||
+static inline void page_inc_usage(struct page *page)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
#endif /* CONFIG_LRU_GEN */
|
||||
|
||||
static __always_inline void add_page_to_lru_list(struct page *page,
|
||||
diff --git a/include/linux/swap.h b/include/linux/swap.h
|
||||
index 144727041e78..30b1f15f5c6e 100644
|
||||
--- a/include/linux/swap.h
|
||||
+++ b/include/linux/swap.h
|
||||
@@ -365,8 +365,8 @@ extern void deactivate_page(struct page *page);
|
||||
extern void mark_page_lazyfree(struct page *page);
|
||||
extern void swap_setup(void);
|
||||
|
||||
-extern void lru_cache_add_inactive_or_unevictable(struct page *page,
|
||||
- struct vm_area_struct *vma);
|
||||
+extern void lru_cache_add_page_vma(struct page *page, struct vm_area_struct *vma,
|
||||
+ bool faulting);
|
||||
|
||||
/* linux/mm/vmscan.c */
|
||||
extern unsigned long zone_reclaimable_pages(struct zone *zone);
|
||||
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
|
||||
index 6addc9780319..4e93e5602723 100644
|
||||
--- a/kernel/events/uprobes.c
|
||||
+++ b/kernel/events/uprobes.c
|
||||
@@ -184,7 +184,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
|
||||
if (new_page) {
|
||||
get_page(new_page);
|
||||
page_add_new_anon_rmap(new_page, vma, addr, false);
|
||||
- lru_cache_add_inactive_or_unevictable(new_page, vma);
|
||||
+ lru_cache_add_page_vma(new_page, vma, false);
|
||||
} else
|
||||
/* no new page, just dec_mm_counter for old_page */
|
||||
dec_mm_counter(mm, MM_ANONPAGES);
|
||||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
|
||||
index 8ac9093e5a0d..681da4a3cf61 100644
|
||||
--- a/mm/huge_memory.c
|
||||
+++ b/mm/huge_memory.c
|
||||
@@ -636,7 +636,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
|
||||
entry = mk_huge_pmd(page, vma->vm_page_prot);
|
||||
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
|
||||
page_add_new_anon_rmap(page, vma, haddr, true);
|
||||
- lru_cache_add_inactive_or_unevictable(page, vma);
|
||||
+ lru_cache_add_page_vma(page, vma, true);
|
||||
pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
|
||||
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
|
||||
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
|
||||
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
|
||||
index 6c0185fdd815..09e5346c2754 100644
|
||||
--- a/mm/khugepaged.c
|
||||
+++ b/mm/khugepaged.c
|
||||
@@ -1198,7 +1198,7 @@ static void collapse_huge_page(struct mm_struct *mm,
|
||||
spin_lock(pmd_ptl);
|
||||
BUG_ON(!pmd_none(*pmd));
|
||||
page_add_new_anon_rmap(new_page, vma, address, true);
|
||||
- lru_cache_add_inactive_or_unevictable(new_page, vma);
|
||||
+ lru_cache_add_page_vma(new_page, vma, true);
|
||||
pgtable_trans_huge_deposit(mm, pmd, pgtable);
|
||||
set_pmd_at(mm, address, pmd, _pmd);
|
||||
update_mmu_cache_pmd(vma, address, pmd);
|
||||
diff --git a/mm/memory.c b/mm/memory.c
|
||||
index 730daa00952b..a76196885f92 100644
|
||||
--- a/mm/memory.c
|
||||
+++ b/mm/memory.c
|
||||
@@ -839,7 +839,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
|
||||
copy_user_highpage(new_page, page, addr, src_vma);
|
||||
__SetPageUptodate(new_page);
|
||||
page_add_new_anon_rmap(new_page, dst_vma, addr, false);
|
||||
- lru_cache_add_inactive_or_unevictable(new_page, dst_vma);
|
||||
+ lru_cache_add_page_vma(new_page, dst_vma, false);
|
||||
rss[mm_counter(new_page)]++;
|
||||
|
||||
/* All done, just insert the new page copy in the child */
|
||||
@@ -2950,7 +2950,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
|
||||
*/
|
||||
ptep_clear_flush_notify(vma, vmf->address, vmf->pte);
|
||||
page_add_new_anon_rmap(new_page, vma, vmf->address, false);
|
||||
- lru_cache_add_inactive_or_unevictable(new_page, vma);
|
||||
+ lru_cache_add_page_vma(new_page, vma, true);
|
||||
/*
|
||||
* We call the notify macro here because, when using secondary
|
||||
* mmu page tables (such as kvm shadow page tables), we want the
|
||||
@@ -3479,7 +3479,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
|
||||
/* ksm created a completely new copy */
|
||||
if (unlikely(page != swapcache && swapcache)) {
|
||||
page_add_new_anon_rmap(page, vma, vmf->address, false);
|
||||
- lru_cache_add_inactive_or_unevictable(page, vma);
|
||||
+ lru_cache_add_page_vma(page, vma, true);
|
||||
} else {
|
||||
do_page_add_anon_rmap(page, vma, vmf->address, exclusive);
|
||||
}
|
||||
@@ -3625,7 +3625,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
|
||||
|
||||
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
|
||||
page_add_new_anon_rmap(page, vma, vmf->address, false);
|
||||
- lru_cache_add_inactive_or_unevictable(page, vma);
|
||||
+ lru_cache_add_page_vma(page, vma, true);
|
||||
setpte:
|
||||
set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
|
||||
|
||||
@@ -3793,7 +3793,7 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
|
||||
if (write && !(vma->vm_flags & VM_SHARED)) {
|
||||
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
|
||||
page_add_new_anon_rmap(page, vma, addr, false);
|
||||
- lru_cache_add_inactive_or_unevictable(page, vma);
|
||||
+ lru_cache_add_page_vma(page, vma, true);
|
||||
} else {
|
||||
inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
|
||||
page_add_file_rmap(page, false);
|
||||
diff --git a/mm/migrate.c b/mm/migrate.c
|
||||
index b234c3f3acb7..d3307c9eced4 100644
|
||||
--- a/mm/migrate.c
|
||||
+++ b/mm/migrate.c
|
||||
@@ -2967,7 +2967,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
|
||||
inc_mm_counter(mm, MM_ANONPAGES);
|
||||
page_add_new_anon_rmap(page, vma, addr, false);
|
||||
if (!is_zone_device_page(page))
|
||||
- lru_cache_add_inactive_or_unevictable(page, vma);
|
||||
+ lru_cache_add_page_vma(page, vma, false);
|
||||
get_page(page);
|
||||
|
||||
if (flush) {
|
||||
diff --git a/mm/swap.c b/mm/swap.c
|
||||
index dfb48cf9c2c9..96ce95eeb2c9 100644
|
||||
--- a/mm/swap.c
|
||||
+++ b/mm/swap.c
|
||||
@@ -433,6 +433,8 @@ void mark_page_accessed(struct page *page)
|
||||
* this list is never rotated or maintained, so marking an
|
||||
* evictable page accessed has no effect.
|
||||
*/
|
||||
+ } else if (lru_gen_enabled()) {
|
||||
+ page_inc_usage(page);
|
||||
} else if (!PageActive(page)) {
|
||||
/*
|
||||
* If the page is on the LRU, queue it for activation via
|
||||
@@ -478,15 +480,14 @@ void lru_cache_add(struct page *page)
|
||||
EXPORT_SYMBOL(lru_cache_add);
|
||||
|
||||
/**
|
||||
- * lru_cache_add_inactive_or_unevictable
|
||||
+ * lru_cache_add_page_vma
|
||||
* @page: the page to be added to LRU
|
||||
* @vma: vma in which page is mapped for determining reclaimability
|
||||
*
|
||||
- * Place @page on the inactive or unevictable LRU list, depending on its
|
||||
- * evictability.
|
||||
+ * Place @page on an LRU list, depending on its evictability.
|
||||
*/
|
||||
-void lru_cache_add_inactive_or_unevictable(struct page *page,
|
||||
- struct vm_area_struct *vma)
|
||||
+void lru_cache_add_page_vma(struct page *page, struct vm_area_struct *vma,
|
||||
+ bool faulting)
|
||||
{
|
||||
bool unevictable;
|
||||
|
||||
@@ -503,6 +504,11 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
|
||||
__mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
|
||||
count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
|
||||
}
|
||||
+
|
||||
+ /* tell the multigenerational lru that the page is being faulted in */
|
||||
+ if (lru_gen_enabled() && !unevictable && faulting)
|
||||
+ SetPageActive(page);
|
||||
+
|
||||
lru_cache_add(page);
|
||||
}
|
||||
|
||||
@@ -529,7 +535,7 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
|
||||
*/
|
||||
static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
|
||||
{
|
||||
- bool active = PageActive(page);
|
||||
+ bool active = PageActive(page) || lru_gen_enabled();
|
||||
int nr_pages = thp_nr_pages(page);
|
||||
|
||||
if (PageUnevictable(page))
|
||||
@@ -569,7 +575,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
|
||||
|
||||
static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
|
||||
{
|
||||
- if (PageActive(page) && !PageUnevictable(page)) {
|
||||
+ if (!PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) {
|
||||
int nr_pages = thp_nr_pages(page);
|
||||
|
||||
del_page_from_lru_list(page, lruvec);
|
||||
@@ -684,7 +690,7 @@ void deactivate_file_page(struct page *page)
|
||||
*/
|
||||
void deactivate_page(struct page *page)
|
||||
{
|
||||
- if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
|
||||
+ if (PageLRU(page) && !PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) {
|
||||
struct pagevec *pvec;
|
||||
|
||||
local_lock(&lru_pvecs.lock);
|
||||
diff --git a/mm/swapfile.c b/mm/swapfile.c
|
||||
index 3598b668f533..549e94318b2f 100644
|
||||
--- a/mm/swapfile.c
|
||||
+++ b/mm/swapfile.c
|
||||
@@ -1936,7 +1936,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
|
||||
page_add_anon_rmap(page, vma, addr, false);
|
||||
} else { /* ksm created a completely new copy */
|
||||
page_add_new_anon_rmap(page, vma, addr, false);
|
||||
- lru_cache_add_inactive_or_unevictable(page, vma);
|
||||
+ lru_cache_add_page_vma(page, vma, false);
|
||||
}
|
||||
swap_free(entry);
|
||||
out:
|
||||
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
|
||||
index e14b3820c6a8..175d55b4f594 100644
|
||||
--- a/mm/userfaultfd.c
|
||||
+++ b/mm/userfaultfd.c
|
||||
@@ -123,7 +123,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
|
||||
|
||||
inc_mm_counter(dst_mm, MM_ANONPAGES);
|
||||
page_add_new_anon_rmap(page, dst_vma, dst_addr, false);
|
||||
- lru_cache_add_inactive_or_unevictable(page, dst_vma);
|
||||
+ lru_cache_add_page_vma(page, dst_vma, true);
|
||||
|
||||
set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index f7bbfc0b1ebd..84d25079092e 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -1094,9 +1094,11 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
|
||||
|
||||
if (PageSwapCache(page)) {
|
||||
swp_entry_t swap = { .val = page_private(page) };
|
||||
- mem_cgroup_swapout(page, swap);
|
||||
+
|
||||
+ /* get a shadow entry before page_memcg() is cleared */
|
||||
if (reclaimed && !mapping_exiting(mapping))
|
||||
shadow = workingset_eviction(page, target_memcg);
|
||||
+ mem_cgroup_swapout(page, swap);
|
||||
__delete_from_swap_cache(page, swap, shadow);
|
||||
xa_unlock_irqrestore(&mapping->i_pages, flags);
|
||||
put_swap_page(page, swap);
|
||||
@@ -2780,6 +2782,93 @@ static bool __maybe_unused seq_is_valid(struct lruvec *lruvec)
|
||||
get_nr_gens(lruvec, 1) <= MAX_NR_GENS;
|
||||
}
|
||||
|
||||
+/******************************************************************************
|
||||
+ * refault feedback loop
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+/*
|
||||
+ * A feedback loop modeled after the PID controller. Currently supports the
|
||||
+ * proportional (P) and the integral (I) terms; the derivative (D) term can be
|
||||
+ * added if necessary. The setpoint (SP) is the desired position; the process
|
||||
+ * variable (PV) is the measured position. The error is the difference between
|
||||
+ * the SP and the PV. A positive error results in a positive control output
|
||||
+ * correction, which, in our case, is to allow eviction.
|
||||
+ *
|
||||
+ * The P term is the current refault rate refaulted/(evicted+activated), which
|
||||
+ * has a weight of 1. The I term is the arithmetic mean of the last N refault
|
||||
+ * rates, weighted by geometric series 1/2, 1/4, ..., 1/(1<<N).
|
||||
+ *
|
||||
+ * Our goal is to make sure upper tiers have similar refault rates as the base
|
||||
+ * tier. That is we try to be fair to all tiers by maintaining similar refault
|
||||
+ * rates across them.
|
||||
+ */
|
||||
+struct controller_pos {
|
||||
+ unsigned long refaulted;
|
||||
+ unsigned long total;
|
||||
+ int gain;
|
||||
+};
|
||||
+
|
||||
+static void read_controller_pos(struct controller_pos *pos, struct lruvec *lruvec,
|
||||
+ int type, int tier, int gain)
|
||||
+{
|
||||
+ struct lrugen *lrugen = &lruvec->evictable;
|
||||
+ int hist = hist_from_seq_or_gen(lrugen->min_seq[type]);
|
||||
+
|
||||
+ pos->refaulted = lrugen->avg_refaulted[type][tier] +
|
||||
+ atomic_long_read(&lrugen->refaulted[hist][type][tier]);
|
||||
+ pos->total = lrugen->avg_total[type][tier] +
|
||||
+ atomic_long_read(&lrugen->evicted[hist][type][tier]);
|
||||
+ if (tier)
|
||||
+ pos->total += lrugen->activated[hist][type][tier - 1];
|
||||
+ pos->gain = gain;
|
||||
+}
|
||||
+
|
||||
+static void reset_controller_pos(struct lruvec *lruvec, int gen, int type)
|
||||
+{
|
||||
+ int tier;
|
||||
+ int hist = hist_from_seq_or_gen(gen);
|
||||
+ struct lrugen *lrugen = &lruvec->evictable;
|
||||
+ bool carryover = gen == lru_gen_from_seq(lrugen->min_seq[type]);
|
||||
+
|
||||
+ if (!carryover && NR_STAT_GENS == 1)
|
||||
+ return;
|
||||
+
|
||||
+ for (tier = 0; tier < MAX_NR_TIERS; tier++) {
|
||||
+ if (carryover) {
|
||||
+ unsigned long sum;
|
||||
+
|
||||
+ sum = lrugen->avg_refaulted[type][tier] +
|
||||
+ atomic_long_read(&lrugen->refaulted[hist][type][tier]);
|
||||
+ WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2);
|
||||
+
|
||||
+ sum = lrugen->avg_total[type][tier] +
|
||||
+ atomic_long_read(&lrugen->evicted[hist][type][tier]);
|
||||
+ if (tier)
|
||||
+ sum += lrugen->activated[hist][type][tier - 1];
|
||||
+ WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2);
|
||||
+
|
||||
+ if (NR_STAT_GENS > 1)
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ atomic_long_set(&lrugen->refaulted[hist][type][tier], 0);
|
||||
+ atomic_long_set(&lrugen->evicted[hist][type][tier], 0);
|
||||
+ if (tier)
|
||||
+ WRITE_ONCE(lrugen->activated[hist][type][tier - 1], 0);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static bool positive_ctrl_err(struct controller_pos *sp, struct controller_pos *pv)
|
||||
+{
|
||||
+ /*
|
||||
+ * Allow eviction if the PV has a limited number of refaulted pages or a
|
||||
+ * lower refault rate than the SP.
|
||||
+ */
|
||||
+ return pv->refaulted < SWAP_CLUSTER_MAX ||
|
||||
+ pv->refaulted * max(sp->total, 1UL) * sp->gain <=
|
||||
+ sp->refaulted * max(pv->total, 1UL) * pv->gain;
|
||||
+}
|
||||
+
|
||||
/******************************************************************************
|
||||
* state change
|
||||
******************************************************************************/
|
||||
diff --git a/mm/workingset.c b/mm/workingset.c
|
||||
index edb8aed2587e..3f3f03d51ea7 100644
|
||||
--- a/mm/workingset.c
|
||||
+++ b/mm/workingset.c
|
||||
@@ -201,6 +201,110 @@ static unsigned long unpack_shadow(void *shadow, int *memcg_id, struct pglist_da
|
||||
return val >> MEM_CGROUP_ID_SHIFT;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+
|
||||
+#if LRU_GEN_SHIFT + LRU_USAGE_SHIFT >= EVICTION_SHIFT
|
||||
+#error "Please try smaller NODES_SHIFT, NR_LRU_GENS and TIERS_PER_GEN configurations"
|
||||
+#endif
|
||||
+
|
||||
+static void page_set_usage(struct page *page, int usage)
|
||||
+{
|
||||
+ unsigned long old_flags, new_flags;
|
||||
+
|
||||
+ VM_BUG_ON(usage > BIT(LRU_USAGE_WIDTH));
|
||||
+
|
||||
+ if (!usage)
|
||||
+ return;
|
||||
+
|
||||
+ do {
|
||||
+ old_flags = READ_ONCE(page->flags);
|
||||
+ new_flags = (old_flags & ~LRU_USAGE_MASK) | LRU_TIER_FLAGS |
|
||||
+ ((usage - 1UL) << LRU_USAGE_PGOFF);
|
||||
+ } while (new_flags != old_flags &&
|
||||
+ cmpxchg(&page->flags, old_flags, new_flags) != old_flags);
|
||||
+}
|
||||
+
|
||||
+/* Return a token to be stored in the shadow entry of a page being evicted. */
|
||||
+static void *lru_gen_eviction(struct page *page)
|
||||
+{
|
||||
+ int hist, tier;
|
||||
+ unsigned long token;
|
||||
+ unsigned long min_seq;
|
||||
+ struct lruvec *lruvec;
|
||||
+ struct lrugen *lrugen;
|
||||
+ int type = page_is_file_lru(page);
|
||||
+ int usage = page_tier_usage(page);
|
||||
+ struct mem_cgroup *memcg = page_memcg(page);
|
||||
+ struct pglist_data *pgdat = page_pgdat(page);
|
||||
+
|
||||
+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
+ lrugen = &lruvec->evictable;
|
||||
+ min_seq = READ_ONCE(lrugen->min_seq[type]);
|
||||
+ token = (min_seq << LRU_USAGE_SHIFT) | usage;
|
||||
+
|
||||
+ hist = hist_from_seq_or_gen(min_seq);
|
||||
+ tier = lru_tier_from_usage(usage);
|
||||
+ atomic_long_add(thp_nr_pages(page), &lrugen->evicted[hist][type][tier]);
|
||||
+
|
||||
+ return pack_shadow(mem_cgroup_id(memcg), pgdat, token);
|
||||
+}
|
||||
+
|
||||
+/* Account a refaulted page based on the token stored in its shadow entry. */
|
||||
+static void lru_gen_refault(struct page *page, void *shadow)
|
||||
+{
|
||||
+ int hist, tier, usage;
|
||||
+ int memcg_id;
|
||||
+ unsigned long token;
|
||||
+ unsigned long min_seq;
|
||||
+ struct lruvec *lruvec;
|
||||
+ struct lrugen *lrugen;
|
||||
+ struct pglist_data *pgdat;
|
||||
+ struct mem_cgroup *memcg;
|
||||
+ int type = page_is_file_lru(page);
|
||||
+
|
||||
+ token = unpack_shadow(shadow, &memcg_id, &pgdat);
|
||||
+ if (page_pgdat(page) != pgdat)
|
||||
+ return;
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ memcg = page_memcg_rcu(page);
|
||||
+ if (mem_cgroup_id(memcg) != memcg_id)
|
||||
+ goto unlock;
|
||||
+
|
||||
+ usage = token & (BIT(LRU_USAGE_SHIFT) - 1);
|
||||
+ token >>= LRU_USAGE_SHIFT;
|
||||
+
|
||||
+ lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
+ lrugen = &lruvec->evictable;
|
||||
+ min_seq = READ_ONCE(lrugen->min_seq[type]);
|
||||
+ if (token != (min_seq & (EVICTION_MASK >> LRU_USAGE_SHIFT)))
|
||||
+ goto unlock;
|
||||
+
|
||||
+ page_set_usage(page, usage);
|
||||
+
|
||||
+ hist = hist_from_seq_or_gen(min_seq);
|
||||
+ tier = lru_tier_from_usage(usage);
|
||||
+ atomic_long_add(thp_nr_pages(page), &lrugen->refaulted[hist][type][tier]);
|
||||
+ inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + type);
|
||||
+ if (tier)
|
||||
+ inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type);
|
||||
+unlock:
|
||||
+ rcu_read_unlock();
|
||||
+}
|
||||
+
|
||||
+#else /* CONFIG_LRU_GEN */
|
||||
+
|
||||
+static void *lru_gen_eviction(struct page *page)
|
||||
+{
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_refault(struct page *page, void *shadow)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+#endif /* CONFIG_LRU_GEN */
|
||||
+
|
||||
/**
|
||||
* workingset_age_nonresident - age non-resident entries as LRU ages
|
||||
* @lruvec: the lruvec that was aged
|
||||
@@ -249,6 +353,9 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
|
||||
VM_BUG_ON_PAGE(page_count(page), page);
|
||||
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
||||
|
||||
+ if (lru_gen_enabled())
|
||||
+ return lru_gen_eviction(page);
|
||||
+
|
||||
lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
|
||||
/* XXX: target_memcg can be NULL, go through lruvec */
|
||||
memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
|
||||
@@ -283,6 +390,11 @@ void workingset_refault(struct page *page, void *shadow)
|
||||
bool workingset;
|
||||
int memcgid;
|
||||
|
||||
+ if (lru_gen_enabled()) {
|
||||
+ lru_gen_refault(page, shadow);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
eviction = unpack_shadow(shadow, &memcgid, &pgdat);
|
||||
|
||||
rcu_read_lock();
|
||||
--
|
||||
2.31.1.751.gd2f1c929bd-goog
|
||||
|
||||
|
@ -0,0 +1,789 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Return-Path: <linux-kernel-owner@kernel.org>
|
||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
||||
aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
|
||||
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
|
||||
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
|
||||
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
|
||||
version=3.4.0
|
||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
|
||||
by smtp.lore.kernel.org (Postfix) with ESMTP id 658D8C433B4
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:28 +0000 (UTC)
|
||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
|
||||
by mail.kernel.org (Postfix) with ESMTP id 477CA611BE
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:28 +0000 (UTC)
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S231165AbhETGzr (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:47 -0400
|
||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37908 "EHLO
|
||||
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S230424AbhETGzh (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:37 -0400
|
||||
Received: from mail-qv1-xf4a.google.com (mail-qv1-xf4a.google.com [IPv6:2607:f8b0:4864:20::f4a])
|
||||
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id EAF95C061574
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:15 -0700 (PDT)
|
||||
Received: by mail-qv1-xf4a.google.com with SMTP id r11-20020a0cb28b0000b02901c87a178503so12393761qve.22
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:15 -0700 (PDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=google.com; s=20161025;
|
||||
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
|
||||
:cc;
|
||||
bh=P78haeNjzr5Qg1JjQymXtCqtqXQumRFjJWFx1f2kmKM=;
|
||||
b=Tjsj7/GeS8mUtREXLxPPRM0sVotzXnOQ/Dq8MvDajXLm9nT1QjyleqN5ONXOxfHJSb
|
||||
gOKQ1YJhBwyuC3HCKJXdOCqgqOmQbjJGjOkM9uXhZa9/9W+Bvnszx1RDX4YRwIqqWgFX
|
||||
flJvQvCE2SODYJwvTs6wKWKKQlvvw9WY05ct8oakXuEPnAOblfqTR+pbk7GoCJo67kNf
|
||||
enTegbyR2yRwGi9N5coUMJM8TYP+BoBWQaHNTVR3nL7a6nEjAg1IrL1w4WaZ+/fsdDdF
|
||||
6FlorKJ31sPCd2wxkCOnn+o98vuymHUDmyr+h9KxZtecLKHCkTsolSRuLiyHQvlzqY3q
|
||||
md3Q==
|
||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=1e100.net; s=20161025;
|
||||
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
|
||||
:references:subject:from:to:cc;
|
||||
bh=P78haeNjzr5Qg1JjQymXtCqtqXQumRFjJWFx1f2kmKM=;
|
||||
b=oK3flk/MdWi/bqnKFxC7O7BqH1b1apkGTQgT4OLVuSurUs5o7HcTTMvjXuljN/KmMh
|
||||
/OGEWIkS+BHD6OkEE9W7Q/5GoGXL7Np1sLByjbiNrfCNZHtmEvYLHtP9lYulkcWaLTgA
|
||||
XEr3n9zWofP9Jw0bPM24RW8jqzAlzld2tkrpDSgnfmMEpyzmjuFEURnKsx/ubUbuQ8Vd
|
||||
rkIngqIt1YDBI+x6EZEdq4OpP+8H9TDr8KZBjUVfzpvASnMYn2y9gZX4Obd5/t+wys2m
|
||||
zn5+4aqeR8mtxQVzHwPM48LG5wPqbTtMF0+Mhoba0Enk55ZL29+xKT00ltswnvHNJDj9
|
||||
UduQ==
|
||||
X-Gm-Message-State: AOAM5324lhHETXZQ7vXVsQ3UhfF140iLgXV/soebRFc0ECp355pnwH5X
|
||||
pEYaLnlH20Lc9hBvEeYp/HXipMEwsdE=
|
||||
X-Google-Smtp-Source: ABdhPJxAkOjDRLTPPi669WBE6Bb6QiyW8Wr0JRRG09c2L2y7UvYt7Th6JQxML99ZXqbjrM7T5yJPx76NwGo=
|
||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
|
||||
(user=yuzhao job=sendgmr) by 2002:a05:6214:76b:: with SMTP id
|
||||
f11mr3992753qvz.8.1621493655061; Wed, 19 May 2021 23:54:15 -0700 (PDT)
|
||||
Date: Thu, 20 May 2021 00:53:50 -0600
|
||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
Message-Id: <20210520065355.2736558-10-yuzhao@google.com>
|
||||
Mime-Version: 1.0
|
||||
References: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
|
||||
Subject: [PATCH v3 09/14] mm: multigenerational lru: mm_struct list
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
To: linux-mm@kvack.org
|
||||
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
|
||||
Andrew Morton <akpm@linux-foundation.org>,
|
||||
Dave Chinner <david@fromorbit.com>,
|
||||
Dave Hansen <dave.hansen@linux.intel.com>,
|
||||
Donald Carr <sirspudd@gmail.com>,
|
||||
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
|
||||
Johannes Weiner <hannes@cmpxchg.org>,
|
||||
Jonathan Corbet <corbet@lwn.net>,
|
||||
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru>,
|
||||
Marcus Seyfarth <m.seyfarth@gmail.com>,
|
||||
Matthew Wilcox <willy@infradead.org>,
|
||||
Mel Gorman <mgorman@suse.de>,
|
||||
Miaohe Lin <linmiaohe@huawei.com>,
|
||||
Michael Larabel <michael@michaellarabel.com>,
|
||||
Michal Hocko <mhocko@suse.com>,
|
||||
Michel Lespinasse <michel@lespinasse.org>,
|
||||
Rik van Riel <riel@surriel.com>,
|
||||
Roman Gushchin <guro@fb.com>,
|
||||
Tim Chen <tim.c.chen@linux.intel.com>,
|
||||
Vlastimil Babka <vbabka@suse.cz>,
|
||||
Yang Shi <shy828301@gmail.com>,
|
||||
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
|
||||
linux-kernel@vger.kernel.org, lkp@lists.01.org,
|
||||
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
|
||||
Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
List-Archive: <https://lore.kernel.org/lkml/>
|
||||
|
||||
In order to scan page tables, we add an infrastructure to maintain
|
||||
either a system-wide mm_struct list or per-memcg mm_struct lists, and
|
||||
track whether an mm_struct is being used or has been used since the
|
||||
last scan.
|
||||
|
||||
Multiple threads can concurrently work on the same mm_struct list, and
|
||||
each of them will be given a different mm_struct belonging to a
|
||||
process that has been scheduled since the last scan.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
---
|
||||
fs/exec.c | 2 +
|
||||
include/linux/memcontrol.h | 6 +
|
||||
include/linux/mm_types.h | 107 ++++++++++++
|
||||
kernel/exit.c | 1 +
|
||||
kernel/fork.c | 10 ++
|
||||
kernel/kthread.c | 1 +
|
||||
kernel/sched/core.c | 2 +
|
||||
mm/memcontrol.c | 28 ++++
|
||||
mm/vmscan.c | 324 +++++++++++++++++++++++++++++++++++++
|
||||
9 files changed, 481 insertions(+)
|
||||
|
||||
diff --git a/fs/exec.c b/fs/exec.c
|
||||
index 18594f11c31f..c691d4d7720c 100644
|
||||
--- a/fs/exec.c
|
||||
+++ b/fs/exec.c
|
||||
@@ -1008,6 +1008,7 @@ static int exec_mmap(struct mm_struct *mm)
|
||||
active_mm = tsk->active_mm;
|
||||
tsk->active_mm = mm;
|
||||
tsk->mm = mm;
|
||||
+ lru_gen_add_mm(mm);
|
||||
/*
|
||||
* This prevents preemption while active_mm is being loaded and
|
||||
* it and mm are being updated, which could cause problems for
|
||||
@@ -1018,6 +1019,7 @@ static int exec_mmap(struct mm_struct *mm)
|
||||
if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
|
||||
local_irq_enable();
|
||||
activate_mm(active_mm, mm);
|
||||
+ lru_gen_switch_mm(active_mm, mm);
|
||||
if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
|
||||
local_irq_enable();
|
||||
tsk->mm->vmacache_seqnum = 0;
|
||||
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
|
||||
index 6bcac3d91dd1..60601a997433 100644
|
||||
--- a/include/linux/memcontrol.h
|
||||
+++ b/include/linux/memcontrol.h
|
||||
@@ -230,6 +230,8 @@ struct obj_cgroup {
|
||||
};
|
||||
};
|
||||
|
||||
+struct lru_gen_mm_list;
|
||||
+
|
||||
/*
|
||||
* The memory controller data structure. The memory controller controls both
|
||||
* page cache and RSS per cgroup. We would eventually like to provide
|
||||
@@ -349,6 +351,10 @@ struct mem_cgroup {
|
||||
struct deferred_split deferred_split_queue;
|
||||
#endif
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+ struct lru_gen_mm_list *mm_list;
|
||||
+#endif
|
||||
+
|
||||
struct mem_cgroup_per_node *nodeinfo[0];
|
||||
/* WARNING: nodeinfo must be the last member here */
|
||||
};
|
||||
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
|
||||
index 5aacc1c10a45..b0f662555eae 100644
|
||||
--- a/include/linux/mm_types.h
|
||||
+++ b/include/linux/mm_types.h
|
||||
@@ -15,6 +15,8 @@
|
||||
#include <linux/page-flags-layout.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/seqlock.h>
|
||||
+#include <linux/nodemask.h>
|
||||
+#include <linux/mmdebug.h>
|
||||
|
||||
#include <asm/mmu.h>
|
||||
|
||||
@@ -561,6 +563,22 @@ struct mm_struct {
|
||||
|
||||
#ifdef CONFIG_IOMMU_SUPPORT
|
||||
u32 pasid;
|
||||
+#endif
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+ struct {
|
||||
+ /* the node of a global or per-memcg mm_struct list */
|
||||
+ struct list_head list;
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ /* points to the memcg of the owner task above */
|
||||
+ struct mem_cgroup *memcg;
|
||||
+#endif
|
||||
+ /* whether this mm_struct has been used since the last walk */
|
||||
+ nodemask_t nodes;
|
||||
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
+ /* the number of CPUs using this mm_struct */
|
||||
+ atomic_t nr_cpus;
|
||||
+#endif
|
||||
+ } lrugen;
|
||||
#endif
|
||||
} __randomize_layout;
|
||||
|
||||
@@ -588,6 +606,95 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
|
||||
return (struct cpumask *)&mm->cpu_bitmap;
|
||||
}
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+
|
||||
+void lru_gen_init_mm(struct mm_struct *mm);
|
||||
+void lru_gen_add_mm(struct mm_struct *mm);
|
||||
+void lru_gen_del_mm(struct mm_struct *mm);
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+int lru_gen_alloc_mm_list(struct mem_cgroup *memcg);
|
||||
+void lru_gen_free_mm_list(struct mem_cgroup *memcg);
|
||||
+void lru_gen_migrate_mm(struct mm_struct *mm);
|
||||
+#endif
|
||||
+
|
||||
+/* Track the usage of each mm_struct so that we can skip inactive ones. */
|
||||
+static inline void lru_gen_switch_mm(struct mm_struct *old, struct mm_struct *new)
|
||||
+{
|
||||
+ /* exclude init_mm, efi_mm, etc. */
|
||||
+ if (!core_kernel_data((unsigned long)old)) {
|
||||
+ VM_BUG_ON(old == &init_mm);
|
||||
+
|
||||
+ nodes_setall(old->lrugen.nodes);
|
||||
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
+ atomic_dec(&old->lrugen.nr_cpus);
|
||||
+ VM_BUG_ON_MM(atomic_read(&old->lrugen.nr_cpus) < 0, old);
|
||||
+#endif
|
||||
+ } else
|
||||
+ VM_BUG_ON_MM(READ_ONCE(old->lrugen.list.prev) ||
|
||||
+ READ_ONCE(old->lrugen.list.next), old);
|
||||
+
|
||||
+ if (!core_kernel_data((unsigned long)new)) {
|
||||
+ VM_BUG_ON(new == &init_mm);
|
||||
+
|
||||
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
+ atomic_inc(&new->lrugen.nr_cpus);
|
||||
+ VM_BUG_ON_MM(atomic_read(&new->lrugen.nr_cpus) < 0, new);
|
||||
+#endif
|
||||
+ } else
|
||||
+ VM_BUG_ON_MM(READ_ONCE(new->lrugen.list.prev) ||
|
||||
+ READ_ONCE(new->lrugen.list.next), new);
|
||||
+}
|
||||
+
|
||||
+/* Return whether this mm_struct is being used on any CPUs. */
|
||||
+static inline bool lru_gen_mm_is_active(struct mm_struct *mm)
|
||||
+{
|
||||
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
+ return !cpumask_empty(mm_cpumask(mm));
|
||||
+#else
|
||||
+ return atomic_read(&mm->lrugen.nr_cpus);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+#else /* CONFIG_LRU_GEN */
|
||||
+
|
||||
+static inline void lru_gen_init_mm(struct mm_struct *mm)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static inline void lru_gen_add_mm(struct mm_struct *mm)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static inline void lru_gen_del_mm(struct mm_struct *mm)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+static inline int lru_gen_alloc_mm_list(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static inline void lru_gen_free_mm_list(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static inline void lru_gen_migrate_mm(struct mm_struct *mm)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static inline void lru_gen_switch_mm(struct mm_struct *old, struct mm_struct *new)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static inline bool lru_gen_mm_is_active(struct mm_struct *mm)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+#endif /* CONFIG_LRU_GEN */
|
||||
+
|
||||
struct mmu_gather;
|
||||
extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm);
|
||||
extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm);
|
||||
diff --git a/kernel/exit.c b/kernel/exit.c
|
||||
index fd1c04193e18..b362179852f1 100644
|
||||
--- a/kernel/exit.c
|
||||
+++ b/kernel/exit.c
|
||||
@@ -423,6 +423,7 @@ void mm_update_next_owner(struct mm_struct *mm)
|
||||
goto retry;
|
||||
}
|
||||
WRITE_ONCE(mm->owner, c);
|
||||
+ lru_gen_migrate_mm(mm);
|
||||
task_unlock(c);
|
||||
put_task_struct(c);
|
||||
}
|
||||
diff --git a/kernel/fork.c b/kernel/fork.c
|
||||
index dc06afd725cb..2fd7dae9afcb 100644
|
||||
--- a/kernel/fork.c
|
||||
+++ b/kernel/fork.c
|
||||
@@ -669,6 +669,7 @@ static void check_mm(struct mm_struct *mm)
|
||||
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
|
||||
VM_BUG_ON_MM(mm->pmd_huge_pte, mm);
|
||||
#endif
|
||||
+ VM_BUG_ON_MM(lru_gen_mm_is_active(mm), mm);
|
||||
}
|
||||
|
||||
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
|
||||
@@ -1061,6 +1062,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
||||
goto fail_nocontext;
|
||||
|
||||
mm->user_ns = get_user_ns(user_ns);
|
||||
+ lru_gen_init_mm(mm);
|
||||
return mm;
|
||||
|
||||
fail_nocontext:
|
||||
@@ -1103,6 +1105,7 @@ static inline void __mmput(struct mm_struct *mm)
|
||||
}
|
||||
if (mm->binfmt)
|
||||
module_put(mm->binfmt->module);
|
||||
+ lru_gen_del_mm(mm);
|
||||
mmdrop(mm);
|
||||
}
|
||||
|
||||
@@ -2524,6 +2527,13 @@ pid_t kernel_clone(struct kernel_clone_args *args)
|
||||
get_task_struct(p);
|
||||
}
|
||||
|
||||
+ if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) {
|
||||
+ /* lock the task to synchronize with memcg migration */
|
||||
+ task_lock(p);
|
||||
+ lru_gen_add_mm(p->mm);
|
||||
+ task_unlock(p);
|
||||
+ }
|
||||
+
|
||||
wake_up_new_task(p);
|
||||
|
||||
/* forking complete and child started to run, tell ptracer */
|
||||
diff --git a/kernel/kthread.c b/kernel/kthread.c
|
||||
index fe3f2a40d61e..b81e49ed31a7 100644
|
||||
--- a/kernel/kthread.c
|
||||
+++ b/kernel/kthread.c
|
||||
@@ -1325,6 +1325,7 @@ void kthread_use_mm(struct mm_struct *mm)
|
||||
tsk->mm = mm;
|
||||
membarrier_update_current_mm(mm);
|
||||
switch_mm_irqs_off(active_mm, mm, tsk);
|
||||
+ lru_gen_switch_mm(active_mm, mm);
|
||||
local_irq_enable();
|
||||
task_unlock(tsk);
|
||||
#ifdef finish_arch_post_lock_switch
|
||||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
|
||||
index 5226cc26a095..2d4b77f173db 100644
|
||||
--- a/kernel/sched/core.c
|
||||
+++ b/kernel/sched/core.c
|
||||
@@ -4323,6 +4323,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
|
||||
* finish_task_switch()'s mmdrop().
|
||||
*/
|
||||
switch_mm_irqs_off(prev->active_mm, next->mm, next);
|
||||
+ lru_gen_switch_mm(prev->active_mm, next->mm);
|
||||
|
||||
if (!prev->mm) { // from kernel
|
||||
/* will mmdrop() in finish_task_switch(). */
|
||||
@@ -7603,6 +7604,7 @@ void idle_task_exit(void)
|
||||
|
||||
if (mm != &init_mm) {
|
||||
switch_mm(mm, &init_mm, current);
|
||||
+ lru_gen_switch_mm(mm, &init_mm);
|
||||
finish_arch_post_lock_switch();
|
||||
}
|
||||
|
||||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
|
||||
index 64ada9e650a5..58b610ffa0e0 100644
|
||||
--- a/mm/memcontrol.c
|
||||
+++ b/mm/memcontrol.c
|
||||
@@ -4981,6 +4981,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
|
||||
for_each_node(node)
|
||||
free_mem_cgroup_per_node_info(memcg, node);
|
||||
free_percpu(memcg->vmstats_percpu);
|
||||
+ lru_gen_free_mm_list(memcg);
|
||||
kfree(memcg);
|
||||
}
|
||||
|
||||
@@ -5030,6 +5031,9 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
|
||||
if (alloc_mem_cgroup_per_node_info(memcg, node))
|
||||
goto fail;
|
||||
|
||||
+ if (lru_gen_alloc_mm_list(memcg))
|
||||
+ goto fail;
|
||||
+
|
||||
if (memcg_wb_domain_init(memcg, GFP_KERNEL))
|
||||
goto fail;
|
||||
|
||||
@@ -5991,6 +5995,29 @@ static void mem_cgroup_move_task(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+static void mem_cgroup_attach(struct cgroup_taskset *tset)
|
||||
+{
|
||||
+ struct cgroup_subsys_state *css;
|
||||
+ struct task_struct *task = NULL;
|
||||
+
|
||||
+ cgroup_taskset_for_each_leader(task, css, tset)
|
||||
+ ;
|
||||
+
|
||||
+ if (!task)
|
||||
+ return;
|
||||
+
|
||||
+ task_lock(task);
|
||||
+ if (task->mm && task->mm->owner == task)
|
||||
+ lru_gen_migrate_mm(task->mm);
|
||||
+ task_unlock(task);
|
||||
+}
|
||||
+#else
|
||||
+static void mem_cgroup_attach(struct cgroup_taskset *tset)
|
||||
+{
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
|
||||
{
|
||||
if (value == PAGE_COUNTER_MAX)
|
||||
@@ -6332,6 +6359,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
|
||||
.css_reset = mem_cgroup_css_reset,
|
||||
.css_rstat_flush = mem_cgroup_css_rstat_flush,
|
||||
.can_attach = mem_cgroup_can_attach,
|
||||
+ .attach = mem_cgroup_attach,
|
||||
.cancel_attach = mem_cgroup_cancel_attach,
|
||||
.post_attach = mem_cgroup_move_task,
|
||||
.dfl_cftypes = memory_files,
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 84d25079092e..d93d2272e475 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -2869,6 +2869,323 @@ static bool positive_ctrl_err(struct controller_pos *sp, struct controller_pos *
|
||||
sp->refaulted * max(pv->total, 1UL) * pv->gain;
|
||||
}
|
||||
|
||||
+/******************************************************************************
|
||||
+ * mm_struct list
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+enum {
|
||||
+ MM_SCHED_ACTIVE, /* running processes */
|
||||
+ MM_SCHED_INACTIVE, /* sleeping processes */
|
||||
+ MM_LOCK_CONTENTION, /* lock contentions */
|
||||
+ MM_VMA_INTERVAL, /* VMAs within the range of each PUD/PMD/PTE */
|
||||
+ MM_LEAF_OTHER_NODE, /* entries not from the node under reclaim */
|
||||
+ MM_LEAF_OTHER_MEMCG, /* entries not from the memcg under reclaim */
|
||||
+ MM_LEAF_OLD, /* old entries */
|
||||
+ MM_LEAF_YOUNG, /* young entries */
|
||||
+ MM_LEAF_DIRTY, /* dirty entries */
|
||||
+ MM_LEAF_HOLE, /* non-present entries */
|
||||
+ MM_NONLEAF_OLD, /* old non-leaf PMD entries */
|
||||
+ MM_NONLEAF_YOUNG, /* young non-leaf PMD entries */
|
||||
+ NR_MM_STATS
|
||||
+};
|
||||
+
|
||||
+/* mnemonic codes for the stats above */
|
||||
+#define MM_STAT_CODES "aicvnmoydhlu"
|
||||
+
|
||||
+struct lru_gen_mm_list {
|
||||
+ /* the head of a global or per-memcg mm_struct list */
|
||||
+ struct list_head head;
|
||||
+ /* protects the list */
|
||||
+ spinlock_t lock;
|
||||
+ struct {
|
||||
+ /* set to max_seq after each round of walk */
|
||||
+ unsigned long cur_seq;
|
||||
+ /* the next mm on the list to walk */
|
||||
+ struct list_head *iter;
|
||||
+ /* to wait for the last worker to finish */
|
||||
+ struct wait_queue_head wait;
|
||||
+ /* the number of concurrent workers */
|
||||
+ int nr_workers;
|
||||
+ /* stats for debugging */
|
||||
+ unsigned long stats[NR_STAT_GENS][NR_MM_STATS];
|
||||
+ } nodes[0];
|
||||
+};
|
||||
+
|
||||
+static struct lru_gen_mm_list *global_mm_list;
|
||||
+
|
||||
+static struct lru_gen_mm_list *alloc_mm_list(void)
|
||||
+{
|
||||
+ int nid;
|
||||
+ struct lru_gen_mm_list *mm_list;
|
||||
+
|
||||
+ mm_list = kzalloc(struct_size(mm_list, nodes, nr_node_ids), GFP_KERNEL);
|
||||
+ if (!mm_list)
|
||||
+ return NULL;
|
||||
+
|
||||
+ INIT_LIST_HEAD(&mm_list->head);
|
||||
+ spin_lock_init(&mm_list->lock);
|
||||
+
|
||||
+ for_each_node(nid) {
|
||||
+ mm_list->nodes[nid].cur_seq = MIN_NR_GENS;
|
||||
+ mm_list->nodes[nid].iter = &mm_list->head;
|
||||
+ init_waitqueue_head(&mm_list->nodes[nid].wait);
|
||||
+ }
|
||||
+
|
||||
+ return mm_list;
|
||||
+}
|
||||
+
|
||||
+static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ if (!mem_cgroup_disabled())
|
||||
+ return memcg ? memcg->mm_list : root_mem_cgroup->mm_list;
|
||||
+#endif
|
||||
+ VM_BUG_ON(memcg);
|
||||
+
|
||||
+ return global_mm_list;
|
||||
+}
|
||||
+
|
||||
+void lru_gen_init_mm(struct mm_struct *mm)
|
||||
+{
|
||||
+ INIT_LIST_HEAD(&mm->lrugen.list);
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ mm->lrugen.memcg = NULL;
|
||||
+#endif
|
||||
+#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
|
||||
+ atomic_set(&mm->lrugen.nr_cpus, 0);
|
||||
+#endif
|
||||
+ nodes_clear(mm->lrugen.nodes);
|
||||
+}
|
||||
+
|
||||
+void lru_gen_add_mm(struct mm_struct *mm)
|
||||
+{
|
||||
+ struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm);
|
||||
+ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
|
||||
+
|
||||
+ VM_BUG_ON_MM(!list_empty(&mm->lrugen.list), mm);
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ VM_BUG_ON_MM(mm->lrugen.memcg, mm);
|
||||
+ WRITE_ONCE(mm->lrugen.memcg, memcg);
|
||||
+#endif
|
||||
+ spin_lock(&mm_list->lock);
|
||||
+ list_add_tail(&mm->lrugen.list, &mm_list->head);
|
||||
+ spin_unlock(&mm_list->lock);
|
||||
+}
|
||||
+
|
||||
+void lru_gen_del_mm(struct mm_struct *mm)
|
||||
+{
|
||||
+ int nid;
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ struct lru_gen_mm_list *mm_list = get_mm_list(mm->lrugen.memcg);
|
||||
+#else
|
||||
+ struct lru_gen_mm_list *mm_list = get_mm_list(NULL);
|
||||
+#endif
|
||||
+
|
||||
+ spin_lock(&mm_list->lock);
|
||||
+
|
||||
+ for_each_node(nid) {
|
||||
+ if (mm_list->nodes[nid].iter != &mm->lrugen.list)
|
||||
+ continue;
|
||||
+
|
||||
+ mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next;
|
||||
+ if (mm_list->nodes[nid].iter == &mm_list->head)
|
||||
+ WRITE_ONCE(mm_list->nodes[nid].cur_seq,
|
||||
+ mm_list->nodes[nid].cur_seq + 1);
|
||||
+ }
|
||||
+
|
||||
+ list_del_init(&mm->lrugen.list);
|
||||
+
|
||||
+ spin_unlock(&mm_list->lock);
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ mem_cgroup_put(mm->lrugen.memcg);
|
||||
+ WRITE_ONCE(mm->lrugen.memcg, NULL);
|
||||
+#endif
|
||||
+}
|
||||
+
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+int lru_gen_alloc_mm_list(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ if (mem_cgroup_disabled())
|
||||
+ return 0;
|
||||
+
|
||||
+ memcg->mm_list = alloc_mm_list();
|
||||
+
|
||||
+ return memcg->mm_list ? 0 : -ENOMEM;
|
||||
+}
|
||||
+
|
||||
+void lru_gen_free_mm_list(struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ kfree(memcg->mm_list);
|
||||
+ memcg->mm_list = NULL;
|
||||
+}
|
||||
+
|
||||
+void lru_gen_migrate_mm(struct mm_struct *mm)
|
||||
+{
|
||||
+ struct mem_cgroup *memcg;
|
||||
+
|
||||
+ lockdep_assert_held(&mm->owner->alloc_lock);
|
||||
+
|
||||
+ if (mem_cgroup_disabled())
|
||||
+ return;
|
||||
+
|
||||
+ rcu_read_lock();
|
||||
+ memcg = mem_cgroup_from_task(mm->owner);
|
||||
+ rcu_read_unlock();
|
||||
+ if (memcg == mm->lrugen.memcg)
|
||||
+ return;
|
||||
+
|
||||
+ VM_BUG_ON_MM(!mm->lrugen.memcg, mm);
|
||||
+ VM_BUG_ON_MM(list_empty(&mm->lrugen.list), mm);
|
||||
+
|
||||
+ lru_gen_del_mm(mm);
|
||||
+ lru_gen_add_mm(mm);
|
||||
+}
|
||||
+
|
||||
+static bool mm_has_migrated(struct mm_struct *mm, struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ return READ_ONCE(mm->lrugen.memcg) != memcg;
|
||||
+}
|
||||
+#else
|
||||
+static bool mm_has_migrated(struct mm_struct *mm, struct mem_cgroup *memcg)
|
||||
+{
|
||||
+ return false;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+struct mm_walk_args {
|
||||
+ struct mem_cgroup *memcg;
|
||||
+ unsigned long max_seq;
|
||||
+ unsigned long start_pfn;
|
||||
+ unsigned long end_pfn;
|
||||
+ unsigned long next_addr;
|
||||
+ int node_id;
|
||||
+ int swappiness;
|
||||
+ int batch_size;
|
||||
+ int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES];
|
||||
+ int mm_stats[NR_MM_STATS];
|
||||
+ unsigned long bitmap[0];
|
||||
+};
|
||||
+
|
||||
+static int size_of_mm_walk_args(void)
|
||||
+{
|
||||
+ int size = sizeof(struct mm_walk_args);
|
||||
+
|
||||
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ||
|
||||
+ IS_ENABLED(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG))
|
||||
+ size += sizeof(unsigned long) * BITS_TO_LONGS(PTRS_PER_PMD);
|
||||
+
|
||||
+ return size;
|
||||
+}
|
||||
+
|
||||
+static void reset_mm_stats(struct lru_gen_mm_list *mm_list, bool last,
|
||||
+ struct mm_walk_args *args)
|
||||
+{
|
||||
+ int i;
|
||||
+ int nid = args->node_id;
|
||||
+ int hist = hist_from_seq_or_gen(args->max_seq);
|
||||
+
|
||||
+ lockdep_assert_held(&mm_list->lock);
|
||||
+
|
||||
+ for (i = 0; i < NR_MM_STATS; i++) {
|
||||
+ WRITE_ONCE(mm_list->nodes[nid].stats[hist][i],
|
||||
+ mm_list->nodes[nid].stats[hist][i] + args->mm_stats[i]);
|
||||
+ args->mm_stats[i] = 0;
|
||||
+ }
|
||||
+
|
||||
+ if (!last || NR_STAT_GENS == 1)
|
||||
+ return;
|
||||
+
|
||||
+ hist = hist_from_seq_or_gen(args->max_seq + 1);
|
||||
+ for (i = 0; i < NR_MM_STATS; i++)
|
||||
+ WRITE_ONCE(mm_list->nodes[nid].stats[hist][i], 0);
|
||||
+}
|
||||
+
|
||||
+static bool should_skip_mm(struct mm_struct *mm, struct mm_walk_args *args)
|
||||
+{
|
||||
+ int type;
|
||||
+ unsigned long size = 0;
|
||||
+
|
||||
+ if (!lru_gen_mm_is_active(mm) && !node_isset(args->node_id, mm->lrugen.nodes))
|
||||
+ return true;
|
||||
+
|
||||
+ if (mm_is_oom_victim(mm))
|
||||
+ return true;
|
||||
+
|
||||
+ for (type = !args->swappiness; type < ANON_AND_FILE; type++) {
|
||||
+ size += type ? get_mm_counter(mm, MM_FILEPAGES) :
|
||||
+ get_mm_counter(mm, MM_ANONPAGES) +
|
||||
+ get_mm_counter(mm, MM_SHMEMPAGES);
|
||||
+ }
|
||||
+
|
||||
+ /* leave the legwork to the rmap if mappings are too sparse */
|
||||
+ if (size < max(SWAP_CLUSTER_MAX, mm_pgtables_bytes(mm) / PAGE_SIZE))
|
||||
+ return true;
|
||||
+
|
||||
+ return !mmget_not_zero(mm);
|
||||
+}
|
||||
+
|
||||
+/* To support multiple workers that concurrently walk an mm_struct list. */
|
||||
+static bool get_next_mm(struct mm_walk_args *args, struct mm_struct **iter)
|
||||
+{
|
||||
+ bool last = true;
|
||||
+ struct mm_struct *mm = NULL;
|
||||
+ int nid = args->node_id;
|
||||
+ struct lru_gen_mm_list *mm_list = get_mm_list(args->memcg);
|
||||
+
|
||||
+ if (*iter)
|
||||
+ mmput_async(*iter);
|
||||
+ else if (args->max_seq <= READ_ONCE(mm_list->nodes[nid].cur_seq))
|
||||
+ return false;
|
||||
+
|
||||
+ spin_lock(&mm_list->lock);
|
||||
+
|
||||
+ VM_BUG_ON(args->max_seq > mm_list->nodes[nid].cur_seq + 1);
|
||||
+ VM_BUG_ON(*iter && args->max_seq < mm_list->nodes[nid].cur_seq);
|
||||
+ VM_BUG_ON(*iter && !mm_list->nodes[nid].nr_workers);
|
||||
+
|
||||
+ if (args->max_seq <= mm_list->nodes[nid].cur_seq) {
|
||||
+ last = *iter;
|
||||
+ goto done;
|
||||
+ }
|
||||
+
|
||||
+ if (mm_list->nodes[nid].iter == &mm_list->head) {
|
||||
+ VM_BUG_ON(*iter || mm_list->nodes[nid].nr_workers);
|
||||
+ mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next;
|
||||
+ }
|
||||
+
|
||||
+ while (!mm && mm_list->nodes[nid].iter != &mm_list->head) {
|
||||
+ mm = list_entry(mm_list->nodes[nid].iter, struct mm_struct, lrugen.list);
|
||||
+ mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next;
|
||||
+ if (should_skip_mm(mm, args))
|
||||
+ mm = NULL;
|
||||
+
|
||||
+ args->mm_stats[mm ? MM_SCHED_ACTIVE : MM_SCHED_INACTIVE]++;
|
||||
+ }
|
||||
+
|
||||
+ if (mm_list->nodes[nid].iter == &mm_list->head)
|
||||
+ WRITE_ONCE(mm_list->nodes[nid].cur_seq,
|
||||
+ mm_list->nodes[nid].cur_seq + 1);
|
||||
+done:
|
||||
+ if (*iter && !mm)
|
||||
+ mm_list->nodes[nid].nr_workers--;
|
||||
+ if (!*iter && mm)
|
||||
+ mm_list->nodes[nid].nr_workers++;
|
||||
+
|
||||
+ last = last && !mm_list->nodes[nid].nr_workers &&
|
||||
+ mm_list->nodes[nid].iter == &mm_list->head;
|
||||
+
|
||||
+ reset_mm_stats(mm_list, last, args);
|
||||
+
|
||||
+ spin_unlock(&mm_list->lock);
|
||||
+
|
||||
+ *iter = mm;
|
||||
+ if (mm)
|
||||
+ node_clear(nid, mm->lrugen.nodes);
|
||||
+
|
||||
+ return last;
|
||||
+}
|
||||
+
|
||||
/******************************************************************************
|
||||
* state change
|
||||
******************************************************************************/
|
||||
@@ -3096,6 +3413,13 @@ static int __init init_lru_gen(void)
|
||||
{
|
||||
BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS);
|
||||
BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS);
|
||||
+ BUILD_BUG_ON(sizeof(MM_STAT_CODES) != NR_MM_STATS + 1);
|
||||
+
|
||||
+ if (mem_cgroup_disabled()) {
|
||||
+ global_mm_list = alloc_mm_list();
|
||||
+ if (WARN_ON_ONCE(!global_mm_list))
|
||||
+ return -ENOMEM;
|
||||
+ }
|
||||
|
||||
if (hotplug_memory_notifier(lru_gen_online_mem, 0))
|
||||
pr_err("lru_gen: failed to subscribe hotplug notifications\n");
|
||||
--
|
||||
2.31.1.751.gd2f1c929bd-goog
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,768 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Return-Path: <linux-kernel-owner@kernel.org>
|
||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
||||
aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
|
||||
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
|
||||
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
|
||||
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
|
||||
version=3.4.0
|
||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
|
||||
by smtp.lore.kernel.org (Postfix) with ESMTP id A01B6C433B4
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:39 +0000 (UTC)
|
||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
|
||||
by mail.kernel.org (Postfix) with ESMTP id 7B61B6108C
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:39 +0000 (UTC)
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S231207AbhETGz5 (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:57 -0400
|
||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37946 "EHLO
|
||||
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S230519AbhETGzl (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:41 -0400
|
||||
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
|
||||
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2E0BFC06175F
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:19 -0700 (PDT)
|
||||
Received: by mail-yb1-xb49.google.com with SMTP id o6-20020a5b06460000b02905004326697dso21269948ybq.22
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:19 -0700 (PDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=google.com; s=20161025;
|
||||
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
|
||||
:cc;
|
||||
bh=u1VH9oezkgLqdxJ2J45QA+bE6HFSfI1t2pM/Z9SSfcw=;
|
||||
b=ZCwqDV3PtHM/LJ1Jk3mVLSR0meKIBgFwo1J8fy1XCqpRUSN2IaxDKRl6kQ+Kr5x6il
|
||||
ONEGQ71NTF3X5YriYi1HDhha3PmMaPofh1moI1cvhXBQ3BC7QtVM3R2+bDqRzq1heN8I
|
||||
AIXSKXUdwikQDrunmGAxvTK29DMwl/KeHCe+4v24DaVODm4+A+McG4cMpvigEHvQjTyF
|
||||
v8VcycT2kwKRw3j6yPu6tWP+l/IwnXQiY+KsQ1ti1IgPSlH/WyvqWlUCVB7h2C+o5ZS+
|
||||
/wKVmM36EtyVbHuHWwWCJkvkjGaJnzvDjISmaVK9XCh1D8kFXjAL3uXkcExirtkdXQBN
|
||||
na+A==
|
||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=1e100.net; s=20161025;
|
||||
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
|
||||
:references:subject:from:to:cc;
|
||||
bh=u1VH9oezkgLqdxJ2J45QA+bE6HFSfI1t2pM/Z9SSfcw=;
|
||||
b=LJDAHspg6fnPnue5XfzSf/BNfnVkyVlvJxf0/6UoekRIfXnLhw2K1izgIrVveHAz3I
|
||||
J/NCoJhs8jZ/aPXP/cQXIGGSdtjJW5eDfEf4zm2qn/9oNaQnLZ7BV6aCDANBoPUCqa/r
|
||||
AJIKGX9sXqevfdwgMdyFFNCF5HROG3lCnszQrQm+Y91p8HixZQRUngPI+mUlfY2VvvbM
|
||||
MK5IMhmus/o35uuc/UPt0oRdz7fAgWg0WMJL5aZJMbFtZ9K8x8KpzSQqTf5tF9cNg3jZ
|
||||
+0F1HWd4vCibOEBYJ0aENh+LFGxZoC+et9tQi5mJM42r+AlcYVzilVzRLzpjG0KKaHNi
|
||||
FKBQ==
|
||||
X-Gm-Message-State: AOAM5318NWZEOBXD8F42C3giE31Ee6cXk+kglz/8je4dTQkxkTvmIP2F
|
||||
D7kBlXDwCzr93Jm/4pjbod1wRW/kBWo=
|
||||
X-Google-Smtp-Source: ABdhPJxfQJokAdXpqmzcdeUweiPSNNLZPWEWbQ8Rs0Vczp0sf0utxd24KmzGgE8wHfpeqesf2U2Y+MnqF+Q=
|
||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
|
||||
(user=yuzhao job=sendgmr) by 2002:a25:b3c3:: with SMTP id x3mr5173887ybf.334.1621493658204;
|
||||
Wed, 19 May 2021 23:54:18 -0700 (PDT)
|
||||
Date: Thu, 20 May 2021 00:53:52 -0600
|
||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
Message-Id: <20210520065355.2736558-12-yuzhao@google.com>
|
||||
Mime-Version: 1.0
|
||||
References: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
|
||||
Subject: [PATCH v3 11/14] mm: multigenerational lru: eviction
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
To: linux-mm@kvack.org
|
||||
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
|
||||
Andrew Morton <akpm@linux-foundation.org>,
|
||||
Dave Chinner <david@fromorbit.com>,
|
||||
Dave Hansen <dave.hansen@linux.intel.com>,
|
||||
Donald Carr <sirspudd@gmail.com>,
|
||||
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
|
||||
Johannes Weiner <hannes@cmpxchg.org>,
|
||||
Jonathan Corbet <corbet@lwn.net>,
|
||||
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru>,
|
||||
Marcus Seyfarth <m.seyfarth@gmail.com>,
|
||||
Matthew Wilcox <willy@infradead.org>,
|
||||
Mel Gorman <mgorman@suse.de>,
|
||||
Miaohe Lin <linmiaohe@huawei.com>,
|
||||
Michael Larabel <michael@michaellarabel.com>,
|
||||
Michal Hocko <mhocko@suse.com>,
|
||||
Michel Lespinasse <michel@lespinasse.org>,
|
||||
Rik van Riel <riel@surriel.com>,
|
||||
Roman Gushchin <guro@fb.com>,
|
||||
Tim Chen <tim.c.chen@linux.intel.com>,
|
||||
Vlastimil Babka <vbabka@suse.cz>,
|
||||
Yang Shi <shy828301@gmail.com>,
|
||||
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
|
||||
linux-kernel@vger.kernel.org, lkp@lists.01.org,
|
||||
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
|
||||
Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
List-Archive: <https://lore.kernel.org/lkml/>
|
||||
|
||||
The eviction consumes old generations. Given an lruvec, the eviction
|
||||
scans the pages on the per-zone lists indexed by either of min_seq[2].
|
||||
It first tries to select a type based on the values of min_seq[2].
|
||||
When anon and file types are both available from the same generation,
|
||||
it selects the one that has a lower refault rate.
|
||||
|
||||
During a scan, the eviction sorts pages according to their new
|
||||
generation numbers, if the aging has found them referenced. It also
|
||||
moves pages from the tiers that have higher refault rates than tier 0
|
||||
to the next generation. When it finds all the per-zone lists of a
|
||||
selected type are empty, the eviction increments min_seq[2] indexed by
|
||||
this selected type.
|
||||
|
||||
With the aging and the eviction in place, we can build page reclaim in
|
||||
a straightforward manner:
|
||||
1) In order to reduce the latency, direct reclaim only invokes the
|
||||
aging when both min_seq[2] reaches max_seq-1; otherwise it invokes
|
||||
the eviction.
|
||||
2) In order to avoid the aging in the direct reclaim path, kswapd
|
||||
does the background aging. It invokes the aging when either of
|
||||
min_seq[2] reaches max_seq-1; otherwise it invokes the eviction.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
---
|
||||
include/linux/mmzone.h | 5 +
|
||||
mm/vmscan.c | 540 +++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 545 insertions(+)
|
||||
|
||||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
|
||||
index 38de59fcbe54..ded72f44d7e7 100644
|
||||
--- a/include/linux/mmzone.h
|
||||
+++ b/include/linux/mmzone.h
|
||||
@@ -863,6 +863,8 @@ struct deferred_split {
|
||||
};
|
||||
#endif
|
||||
|
||||
+struct mm_walk_args;
|
||||
+
|
||||
/*
|
||||
* On NUMA machines, each NUMA node would have a pg_data_t to describe
|
||||
* it's memory layout. On UMA machines there is a single pglist_data which
|
||||
@@ -968,6 +970,9 @@ typedef struct pglist_data {
|
||||
|
||||
unsigned long flags;
|
||||
|
||||
+#ifdef CONFIG_LRU_GEN
|
||||
+ struct mm_walk_args *mm_walk_args;
|
||||
+#endif
|
||||
ZONE_PADDING(_pad2_)
|
||||
|
||||
/* Per-node vmstats */
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 837d5e6a821e..2f86dcc04c56 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -1311,6 +1311,11 @@ static unsigned int shrink_page_list(struct list_head *page_list,
|
||||
if (!sc->may_unmap && page_mapped(page))
|
||||
goto keep_locked;
|
||||
|
||||
+ /* in case the page was found accessed by lru_gen_scan_around() */
|
||||
+ if (lru_gen_enabled() && !ignore_references &&
|
||||
+ page_mapped(page) && PageReferenced(page))
|
||||
+ goto keep_locked;
|
||||
+
|
||||
may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
|
||||
(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
|
||||
|
||||
@@ -2431,6 +2436,9 @@ static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc)
|
||||
unsigned long file;
|
||||
struct lruvec *target_lruvec;
|
||||
|
||||
+ if (lru_gen_enabled())
|
||||
+ return;
|
||||
+
|
||||
target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
|
||||
|
||||
/*
|
||||
@@ -3970,6 +3978,489 @@ void lru_gen_scan_around(struct page_vma_mapped_walk *pvmw)
|
||||
set_page_dirty(pte_page(pte[i]));
|
||||
}
|
||||
|
||||
+/******************************************************************************
|
||||
+ * the eviction
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+static bool should_skip_page(struct page *page, struct scan_control *sc)
|
||||
+{
|
||||
+ if (!sc->may_unmap && page_mapped(page))
|
||||
+ return true;
|
||||
+
|
||||
+ if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) &&
|
||||
+ (PageDirty(page) || (PageAnon(page) && !PageSwapCache(page))))
|
||||
+ return true;
|
||||
+
|
||||
+ if (!get_page_unless_zero(page))
|
||||
+ return true;
|
||||
+
|
||||
+ if (!TestClearPageLRU(page)) {
|
||||
+ put_page(page);
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static bool sort_page(struct page *page, struct lruvec *lruvec, int tier_to_isolate)
|
||||
+{
|
||||
+ bool success;
|
||||
+ int gen = page_lru_gen(page);
|
||||
+ int type = page_is_file_lru(page);
|
||||
+ int zone = page_zonenum(page);
|
||||
+ int tier = lru_tier_from_usage(page_tier_usage(page));
|
||||
+ struct lrugen *lrugen = &lruvec->evictable;
|
||||
+
|
||||
+ VM_BUG_ON_PAGE(gen == -1, page);
|
||||
+ VM_BUG_ON_PAGE(tier_to_isolate < 0, page);
|
||||
+
|
||||
+ /* a lazy-free page that has been written into? */
|
||||
+ if (type && PageDirty(page) && PageAnon(page)) {
|
||||
+ success = lru_gen_deletion(page, lruvec);
|
||||
+ VM_BUG_ON_PAGE(!success, page);
|
||||
+ SetPageSwapBacked(page);
|
||||
+ add_page_to_lru_list_tail(page, lruvec);
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ /* page_update_gen() has updated the gen #? */
|
||||
+ if (gen != lru_gen_from_seq(lrugen->min_seq[type])) {
|
||||
+ list_move(&page->lru, &lrugen->lists[gen][type][zone]);
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ /* activate this page if its tier has a higher refault rate */
|
||||
+ if (tier_to_isolate < tier) {
|
||||
+ int hist = hist_from_seq_or_gen(gen);
|
||||
+
|
||||
+ page_inc_gen(page, lruvec, false);
|
||||
+ WRITE_ONCE(lrugen->activated[hist][type][tier - 1],
|
||||
+ lrugen->activated[hist][type][tier - 1] + thp_nr_pages(page));
|
||||
+ inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type);
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ /* mark this page for reclaim if it's pending writeback */
|
||||
+ if (PageWriteback(page) || (type && PageDirty(page))) {
|
||||
+ page_inc_gen(page, lruvec, true);
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+static void isolate_page(struct page *page, struct lruvec *lruvec)
|
||||
+{
|
||||
+ bool success;
|
||||
+
|
||||
+ success = lru_gen_deletion(page, lruvec);
|
||||
+ VM_BUG_ON_PAGE(!success, page);
|
||||
+
|
||||
+ if (PageActive(page)) {
|
||||
+ ClearPageActive(page);
|
||||
+ /* make sure shrink_page_list() rejects this page */
|
||||
+ SetPageReferenced(page);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* make sure shrink_page_list() doesn't try to write this page */
|
||||
+ ClearPageReclaim(page);
|
||||
+ /* make sure shrink_page_list() doesn't reject this page */
|
||||
+ ClearPageReferenced(page);
|
||||
+}
|
||||
+
|
||||
+static int scan_pages(struct lruvec *lruvec, struct scan_control *sc, long *nr_to_scan,
|
||||
+ int type, int tier, struct list_head *list)
|
||||
+{
|
||||
+ bool success;
|
||||
+ int gen, zone;
|
||||
+ enum vm_event_item item;
|
||||
+ int sorted = 0;
|
||||
+ int scanned = 0;
|
||||
+ int isolated = 0;
|
||||
+ int batch_size = 0;
|
||||
+ struct lrugen *lrugen = &lruvec->evictable;
|
||||
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
+
|
||||
+ VM_BUG_ON(!list_empty(list));
|
||||
+
|
||||
+ if (get_nr_gens(lruvec, type) == MIN_NR_GENS)
|
||||
+ return -ENOENT;
|
||||
+
|
||||
+ gen = lru_gen_from_seq(lrugen->min_seq[type]);
|
||||
+
|
||||
+ for (zone = sc->reclaim_idx; zone >= 0; zone--) {
|
||||
+ LIST_HEAD(moved);
|
||||
+ int skipped = 0;
|
||||
+ struct list_head *head = &lrugen->lists[gen][type][zone];
|
||||
+
|
||||
+ while (!list_empty(head)) {
|
||||
+ struct page *page = lru_to_page(head);
|
||||
+ int delta = thp_nr_pages(page);
|
||||
+
|
||||
+ VM_BUG_ON_PAGE(PageTail(page), page);
|
||||
+ VM_BUG_ON_PAGE(PageUnevictable(page), page);
|
||||
+ VM_BUG_ON_PAGE(PageActive(page), page);
|
||||
+ VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page);
|
||||
+ VM_BUG_ON_PAGE(page_zonenum(page) != zone, page);
|
||||
+
|
||||
+ prefetchw_prev_lru_page(page, head, flags);
|
||||
+
|
||||
+ scanned += delta;
|
||||
+
|
||||
+ if (sort_page(page, lruvec, tier))
|
||||
+ sorted += delta;
|
||||
+ else if (should_skip_page(page, sc)) {
|
||||
+ list_move(&page->lru, &moved);
|
||||
+ skipped += delta;
|
||||
+ } else {
|
||||
+ isolate_page(page, lruvec);
|
||||
+ list_add(&page->lru, list);
|
||||
+ isolated += delta;
|
||||
+ }
|
||||
+
|
||||
+ if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX ||
|
||||
+ ++batch_size == MAX_BATCH_SIZE)
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ list_splice(&moved, head);
|
||||
+ __count_zid_vm_events(PGSCAN_SKIP, zone, skipped);
|
||||
+
|
||||
+ if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX ||
|
||||
+ batch_size == MAX_BATCH_SIZE)
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ success = try_inc_min_seq(lruvec, type);
|
||||
+
|
||||
+ item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
|
||||
+ if (!cgroup_reclaim(sc)) {
|
||||
+ __count_vm_events(item, scanned);
|
||||
+ __count_vm_events(PGREFILL, sorted);
|
||||
+ }
|
||||
+ __count_memcg_events(memcg, item, scanned);
|
||||
+ __count_memcg_events(memcg, PGREFILL, sorted);
|
||||
+ __count_vm_events(PGSCAN_ANON + type, scanned);
|
||||
+
|
||||
+ *nr_to_scan -= scanned;
|
||||
+
|
||||
+ if (*nr_to_scan <= 0 || success || isolated)
|
||||
+ return isolated;
|
||||
+ /*
|
||||
+ * We may have trouble finding eligible pages due to reclaim_idx,
|
||||
+ * may_unmap and may_writepage. The following check makes sure we won't
|
||||
+ * be stuck if we aren't making enough progress.
|
||||
+ */
|
||||
+ return batch_size == MAX_BATCH_SIZE && sorted >= SWAP_CLUSTER_MAX ? 0 : -ENOENT;
|
||||
+}
|
||||
+
|
||||
+static int get_tier_to_isolate(struct lruvec *lruvec, int type)
|
||||
+{
|
||||
+ int tier;
|
||||
+ struct controller_pos sp, pv;
|
||||
+
|
||||
+ /*
|
||||
+ * Ideally we don't want to evict upper tiers that have higher refault
|
||||
+ * rates. However, we need to leave a margin for the fluctuations in
|
||||
+ * refault rates. So we use a larger gain factor to make sure upper
|
||||
+ * tiers are indeed more active. We choose 2 because the lowest upper
|
||||
+ * tier would have twice of the refault rate of the base tier, according
|
||||
+ * to their numbers of accesses.
|
||||
+ */
|
||||
+ read_controller_pos(&sp, lruvec, type, 0, 1);
|
||||
+ for (tier = 1; tier < MAX_NR_TIERS; tier++) {
|
||||
+ read_controller_pos(&pv, lruvec, type, tier, 2);
|
||||
+ if (!positive_ctrl_err(&sp, &pv))
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ return tier - 1;
|
||||
+}
|
||||
+
|
||||
+static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_to_isolate)
|
||||
+{
|
||||
+ int type, tier;
|
||||
+ struct controller_pos sp, pv;
|
||||
+ int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness };
|
||||
+
|
||||
+ /*
|
||||
+ * Compare the refault rates between the base tiers of anon and file to
|
||||
+ * determine which type to evict. Also need to compare the refault rates
|
||||
+ * of the upper tiers of the selected type with that of the base tier of
|
||||
+ * the other type to determine which tier of the selected type to evict.
|
||||
+ */
|
||||
+ read_controller_pos(&sp, lruvec, 0, 0, gain[0]);
|
||||
+ read_controller_pos(&pv, lruvec, 1, 0, gain[1]);
|
||||
+ type = positive_ctrl_err(&sp, &pv);
|
||||
+
|
||||
+ read_controller_pos(&sp, lruvec, !type, 0, gain[!type]);
|
||||
+ for (tier = 1; tier < MAX_NR_TIERS; tier++) {
|
||||
+ read_controller_pos(&pv, lruvec, type, tier, gain[type]);
|
||||
+ if (!positive_ctrl_err(&sp, &pv))
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ *tier_to_isolate = tier - 1;
|
||||
+
|
||||
+ return type;
|
||||
+}
|
||||
+
|
||||
+static int isolate_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
|
||||
+ long *nr_to_scan, int *type_to_scan, struct list_head *list)
|
||||
+{
|
||||
+ int i;
|
||||
+ int type;
|
||||
+ int isolated;
|
||||
+ int tier = -1;
|
||||
+ DEFINE_MAX_SEQ();
|
||||
+ DEFINE_MIN_SEQ();
|
||||
+
|
||||
+ VM_BUG_ON(!seq_is_valid(lruvec));
|
||||
+
|
||||
+ if (max_nr_gens(max_seq, min_seq, swappiness) == MIN_NR_GENS)
|
||||
+ return 0;
|
||||
+ /*
|
||||
+ * Try to select a type based on generations and swappiness, and if that
|
||||
+ * fails, fall back to get_type_to_scan(). When anon and file are both
|
||||
+ * available from the same generation, swappiness 200 is interpreted as
|
||||
+ * anon first and swappiness 1 is interpreted as file first.
|
||||
+ */
|
||||
+ type = !swappiness || min_seq[0] > min_seq[1] ||
|
||||
+ (min_seq[0] == min_seq[1] && swappiness != 200 &&
|
||||
+ (swappiness == 1 || get_type_to_scan(lruvec, swappiness, &tier)));
|
||||
+
|
||||
+ if (tier == -1)
|
||||
+ tier = get_tier_to_isolate(lruvec, type);
|
||||
+
|
||||
+ for (i = !swappiness; i < ANON_AND_FILE; i++) {
|
||||
+ isolated = scan_pages(lruvec, sc, nr_to_scan, type, tier, list);
|
||||
+ if (isolated >= 0)
|
||||
+ break;
|
||||
+
|
||||
+ type = !type;
|
||||
+ tier = get_tier_to_isolate(lruvec, type);
|
||||
+ }
|
||||
+
|
||||
+ if (isolated < 0)
|
||||
+ isolated = *nr_to_scan = 0;
|
||||
+
|
||||
+ *type_to_scan = type;
|
||||
+
|
||||
+ return isolated;
|
||||
+}
|
||||
+
|
||||
+/* Main function used by the foreground, the background and the user-triggered eviction. */
|
||||
+static bool evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness,
|
||||
+ long *nr_to_scan)
|
||||
+{
|
||||
+ int type;
|
||||
+ int isolated;
|
||||
+ int reclaimed;
|
||||
+ LIST_HEAD(list);
|
||||
+ struct page *page;
|
||||
+ enum vm_event_item item;
|
||||
+ struct reclaim_stat stat;
|
||||
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
||||
+
|
||||
+ spin_lock_irq(&lruvec->lru_lock);
|
||||
+
|
||||
+ isolated = isolate_pages(lruvec, sc, swappiness, nr_to_scan, &type, &list);
|
||||
+ VM_BUG_ON(list_empty(&list) == !!isolated);
|
||||
+
|
||||
+ if (isolated)
|
||||
+ __mod_node_page_state(pgdat, NR_ISOLATED_ANON + type, isolated);
|
||||
+
|
||||
+ spin_unlock_irq(&lruvec->lru_lock);
|
||||
+
|
||||
+ if (!isolated)
|
||||
+ goto done;
|
||||
+
|
||||
+ reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false);
|
||||
+ /*
|
||||
+ * We need to prevent rejected pages from being added back to the same
|
||||
+ * lists they were isolated from. Otherwise we may risk looping on them
|
||||
+ * forever. We use PageActive() or !PageReferenced() && PageWorkingset()
|
||||
+ * to tell lru_gen_addition() not to add them to the oldest generation.
|
||||
+ */
|
||||
+ list_for_each_entry(page, &list, lru) {
|
||||
+ if (PageMlocked(page))
|
||||
+ continue;
|
||||
+
|
||||
+ if (page_mapped(page) && PageReferenced(page))
|
||||
+ SetPageActive(page);
|
||||
+ else {
|
||||
+ ClearPageActive(page);
|
||||
+ SetPageWorkingset(page);
|
||||
+ }
|
||||
+ ClearPageReferenced(page);
|
||||
+ }
|
||||
+
|
||||
+ spin_lock_irq(&lruvec->lru_lock);
|
||||
+
|
||||
+ move_pages_to_lru(lruvec, &list);
|
||||
+
|
||||
+ __mod_node_page_state(pgdat, NR_ISOLATED_ANON + type, -isolated);
|
||||
+
|
||||
+ item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
|
||||
+ if (!cgroup_reclaim(sc))
|
||||
+ __count_vm_events(item, reclaimed);
|
||||
+ __count_memcg_events(lruvec_memcg(lruvec), item, reclaimed);
|
||||
+ __count_vm_events(PGSTEAL_ANON + type, reclaimed);
|
||||
+
|
||||
+ spin_unlock_irq(&lruvec->lru_lock);
|
||||
+
|
||||
+ mem_cgroup_uncharge_list(&list);
|
||||
+ free_unref_page_list(&list);
|
||||
+
|
||||
+ sc->nr_reclaimed += reclaimed;
|
||||
+done:
|
||||
+ return *nr_to_scan > 0 && sc->nr_reclaimed < sc->nr_to_reclaim;
|
||||
+}
|
||||
+
|
||||
+/******************************************************************************
|
||||
+ * page reclaim
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+static int get_swappiness(struct lruvec *lruvec)
|
||||
+{
|
||||
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
+ int swappiness = mem_cgroup_get_nr_swap_pages(memcg) >= (long)SWAP_CLUSTER_MAX ?
|
||||
+ mem_cgroup_swappiness(memcg) : 0;
|
||||
+
|
||||
+ VM_BUG_ON(swappiness > 200U);
|
||||
+
|
||||
+ return swappiness;
|
||||
+}
|
||||
+
|
||||
+static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
|
||||
+ int swappiness)
|
||||
+{
|
||||
+ int gen, type, zone;
|
||||
+ long nr_to_scan = 0;
|
||||
+ struct lrugen *lrugen = &lruvec->evictable;
|
||||
+ DEFINE_MAX_SEQ();
|
||||
+ DEFINE_MIN_SEQ();
|
||||
+
|
||||
+ lru_add_drain();
|
||||
+
|
||||
+ for (type = !swappiness; type < ANON_AND_FILE; type++) {
|
||||
+ unsigned long seq;
|
||||
+
|
||||
+ for (seq = min_seq[type]; seq <= max_seq; seq++) {
|
||||
+ gen = lru_gen_from_seq(seq);
|
||||
+
|
||||
+ for (zone = 0; zone <= sc->reclaim_idx; zone++)
|
||||
+ nr_to_scan += READ_ONCE(lrugen->sizes[gen][type][zone]);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ nr_to_scan = max(nr_to_scan, 0L);
|
||||
+ nr_to_scan = round_up(nr_to_scan >> sc->priority, SWAP_CLUSTER_MAX);
|
||||
+
|
||||
+ if (max_nr_gens(max_seq, min_seq, swappiness) > MIN_NR_GENS)
|
||||
+ return nr_to_scan;
|
||||
+
|
||||
+ /* kswapd uses lru_gen_age_node() */
|
||||
+ if (current_is_kswapd())
|
||||
+ return 0;
|
||||
+
|
||||
+ return walk_mm_list(lruvec, max_seq, sc, swappiness, NULL) ? nr_to_scan : 0;
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
+{
|
||||
+ struct blk_plug plug;
|
||||
+ unsigned long scanned = 0;
|
||||
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
+
|
||||
+ blk_start_plug(&plug);
|
||||
+
|
||||
+ while (true) {
|
||||
+ long nr_to_scan;
|
||||
+ int swappiness = sc->may_swap ? get_swappiness(lruvec) : 0;
|
||||
+
|
||||
+ nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness) - scanned;
|
||||
+ if (nr_to_scan < (long)SWAP_CLUSTER_MAX)
|
||||
+ break;
|
||||
+
|
||||
+ scanned += nr_to_scan;
|
||||
+
|
||||
+ if (!evict_pages(lruvec, sc, swappiness, &nr_to_scan))
|
||||
+ break;
|
||||
+
|
||||
+ scanned -= nr_to_scan;
|
||||
+
|
||||
+ if (mem_cgroup_below_min(memcg) ||
|
||||
+ (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
|
||||
+ break;
|
||||
+
|
||||
+ cond_resched();
|
||||
+ }
|
||||
+
|
||||
+ blk_finish_plug(&plug);
|
||||
+}
|
||||
+
|
||||
+/******************************************************************************
|
||||
+ * the background aging
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+static int lru_gen_spread = MIN_NR_GENS;
|
||||
+
|
||||
+static void try_walk_mm_list(struct lruvec *lruvec, struct scan_control *sc)
|
||||
+{
|
||||
+ int gen, type, zone;
|
||||
+ long old_and_young[2] = {};
|
||||
+ int spread = READ_ONCE(lru_gen_spread);
|
||||
+ int swappiness = get_swappiness(lruvec);
|
||||
+ struct lrugen *lrugen = &lruvec->evictable;
|
||||
+ struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
||||
+ DEFINE_MAX_SEQ();
|
||||
+ DEFINE_MIN_SEQ();
|
||||
+
|
||||
+ lru_add_drain();
|
||||
+
|
||||
+ for (type = !swappiness; type < ANON_AND_FILE; type++) {
|
||||
+ unsigned long seq;
|
||||
+
|
||||
+ for (seq = min_seq[type]; seq <= max_seq; seq++) {
|
||||
+ gen = lru_gen_from_seq(seq);
|
||||
+
|
||||
+ for (zone = 0; zone < MAX_NR_ZONES; zone++)
|
||||
+ old_and_young[seq == max_seq] +=
|
||||
+ READ_ONCE(lrugen->sizes[gen][type][zone]);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ old_and_young[0] = max(old_and_young[0], 0L);
|
||||
+ old_and_young[1] = max(old_and_young[1], 0L);
|
||||
+
|
||||
+ /* try to spread pages out across spread+1 generations */
|
||||
+ if (old_and_young[0] >= old_and_young[1] * spread &&
|
||||
+ min_nr_gens(max_seq, min_seq, swappiness) > max(spread, MIN_NR_GENS))
|
||||
+ return;
|
||||
+
|
||||
+ walk_mm_list(lruvec, max_seq, sc, swappiness, pgdat->mm_walk_args);
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
+{
|
||||
+ struct mem_cgroup *memcg;
|
||||
+
|
||||
+ VM_BUG_ON(!current_is_kswapd());
|
||||
+
|
||||
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
||||
+ do {
|
||||
+ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||
+
|
||||
+ if (!mem_cgroup_below_min(memcg) &&
|
||||
+ (!mem_cgroup_below_low(memcg) || sc->memcg_low_reclaim))
|
||||
+ try_walk_mm_list(lruvec, sc);
|
||||
+
|
||||
+ cond_resched();
|
||||
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
||||
+}
|
||||
+
|
||||
/******************************************************************************
|
||||
* state change
|
||||
******************************************************************************/
|
||||
@@ -4172,6 +4663,21 @@ static int __meminit __maybe_unused lru_gen_online_mem(struct notifier_block *se
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
+static void lru_gen_start_kswapd(int nid)
|
||||
+{
|
||||
+ struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
+
|
||||
+ pgdat->mm_walk_args = kvzalloc_node(size_of_mm_walk_args(), GFP_KERNEL, nid);
|
||||
+ WARN_ON_ONCE(!pgdat->mm_walk_args);
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_stop_kswapd(int nid)
|
||||
+{
|
||||
+ struct pglist_data *pgdat = NODE_DATA(nid);
|
||||
+
|
||||
+ kvfree(pgdat->mm_walk_args);
|
||||
+}
|
||||
+
|
||||
/******************************************************************************
|
||||
* initialization
|
||||
******************************************************************************/
|
||||
@@ -4220,6 +4726,24 @@ static int __init init_lru_gen(void)
|
||||
*/
|
||||
arch_initcall(init_lru_gen);
|
||||
|
||||
+#else /* CONFIG_LRU_GEN */
|
||||
+
|
||||
+static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_start_kswapd(int nid)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_stop_kswapd(int nid)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
#endif /* CONFIG_LRU_GEN */
|
||||
|
||||
static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
@@ -4233,6 +4757,11 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
|
||||
struct blk_plug plug;
|
||||
bool scan_adjusted;
|
||||
|
||||
+ if (lru_gen_enabled()) {
|
||||
+ lru_gen_shrink_lruvec(lruvec, sc);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
get_scan_count(lruvec, sc, nr);
|
||||
|
||||
/* Record the original scan target for proportional adjustments later */
|
||||
@@ -4699,6 +5228,9 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat)
|
||||
struct lruvec *target_lruvec;
|
||||
unsigned long refaults;
|
||||
|
||||
+ if (lru_gen_enabled())
|
||||
+ return;
|
||||
+
|
||||
target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
|
||||
refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON);
|
||||
target_lruvec->refaults[0] = refaults;
|
||||
@@ -5073,6 +5605,11 @@ static void age_active_anon(struct pglist_data *pgdat,
|
||||
struct mem_cgroup *memcg;
|
||||
struct lruvec *lruvec;
|
||||
|
||||
+ if (lru_gen_enabled()) {
|
||||
+ lru_gen_age_node(pgdat, sc);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
if (!total_swap_pages)
|
||||
return;
|
||||
|
||||
@@ -5753,6 +6290,8 @@ int kswapd_run(int nid)
|
||||
if (pgdat->kswapd)
|
||||
return 0;
|
||||
|
||||
+ lru_gen_start_kswapd(nid);
|
||||
+
|
||||
pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
|
||||
if (IS_ERR(pgdat->kswapd)) {
|
||||
/* failure at boot is fatal */
|
||||
@@ -5775,6 +6314,7 @@ void kswapd_stop(int nid)
|
||||
if (kswapd) {
|
||||
kthread_stop(kswapd);
|
||||
NODE_DATA(nid)->kswapd = NULL;
|
||||
+ lru_gen_stop_kswapd(nid);
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.31.1.751.gd2f1c929bd-goog
|
||||
|
||||
|
@ -0,0 +1,572 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Return-Path: <linux-kernel-owner@kernel.org>
|
||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
||||
aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
|
||||
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
|
||||
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
|
||||
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
|
||||
version=3.4.0
|
||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
|
||||
by smtp.lore.kernel.org (Postfix) with ESMTP id 17ED0C43460
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:41 +0000 (UTC)
|
||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
|
||||
by mail.kernel.org (Postfix) with ESMTP id E04C861184
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:40 +0000 (UTC)
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S231223AbhETG4A (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
|
||||
Thu, 20 May 2021 02:56:00 -0400
|
||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37944 "EHLO
|
||||
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S230473AbhETGzl (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:41 -0400
|
||||
Received: from mail-qv1-xf49.google.com (mail-qv1-xf49.google.com [IPv6:2607:f8b0:4864:20::f49])
|
||||
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C0A0DC061761
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:20 -0700 (PDT)
|
||||
Received: by mail-qv1-xf49.google.com with SMTP id d9-20020a0ce4490000b02901f0bee07112so6151672qvm.7
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:20 -0700 (PDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=google.com; s=20161025;
|
||||
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
|
||||
:cc;
|
||||
bh=D+kCP8KjWdhzq6b9AfWqzFHrIcC1HBgTAlg7o1thC8s=;
|
||||
b=Ao3JFmOKgU6GUK7wOdKwO7smRq1lLjob3ltec82Ju9mPzN+QmdjLHzBqk1xnUggESF
|
||||
TqhhI3jybr858NfIj3PCXK9+qR3zojc5Pd/Quyp44VSHbor2BjBUQqP/t8M487uM4XwV
|
||||
WngIjYnvrYzwh9qjiSWbyBv7yV1ee386Z4r6QxKE99zk0yauu04cnFkSyQcJzvL7ST9Y
|
||||
gunIrZGlwh/QB3VgMvJBx8LLRtENwU2C6hFb2JqIhNx7ECiYmfTdxZ3hqTeciT6fp1mo
|
||||
VJhTuLMD0zN+BmbL7udJFNaRaLEzDq8aaX3Qgn7+HzfVXcaIkWuHdLfLiqx6NOEuXJPh
|
||||
aFOw==
|
||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=1e100.net; s=20161025;
|
||||
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
|
||||
:references:subject:from:to:cc;
|
||||
bh=D+kCP8KjWdhzq6b9AfWqzFHrIcC1HBgTAlg7o1thC8s=;
|
||||
b=Gjw4qCU2aoOaAwRGh+lY4+hcMXHU7TPGrsgdc0GeQGBjEbSelYAeLx6lfapzEMs4gS
|
||||
OINghBuL7TEDPHzWY92K4Snh4Pm597qGEmIgplE4cMHoWrN8rxc/C+gB/gsW/UgvllX2
|
||||
o0zgNR9ve4/y3vOdD7xGYl0wDq608mGKsoRKDgVf/SEkDCldm1xmB/MaJihWPSw4niAH
|
||||
KRDP84OugEgIRgRj3MrqoREu5cyjw4ClxbQ8HeaRnw1wt6isXGqlXjiBVveyjSbrV+Q/
|
||||
luG3YEEGwMlCYbMovQTSmBB7n0pN8Ihg0qVPmr6GcmbpcwYKQWv1tIves1vbV0kdb4aN
|
||||
u9HQ==
|
||||
X-Gm-Message-State: AOAM530vodqQk47GPTvruvJy4njfXeKD7559Rhxl39MVYv2cMgQ/XPuI
|
||||
uuzSatZDrJOCQGdTQCyuRTP/IMipOlM=
|
||||
X-Google-Smtp-Source: ABdhPJwiri0QbWt8YjsEa+N+Ooz0Ku0LVYpwKy1ZvcZJzOwoHQf1X931BLtxTF10spH4XfRsXE5x6SYzq+w=
|
||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
|
||||
(user=yuzhao job=sendgmr) by 2002:ad4:5767:: with SMTP id r7mr3879143qvx.1.1621493659852;
|
||||
Wed, 19 May 2021 23:54:19 -0700 (PDT)
|
||||
Date: Thu, 20 May 2021 00:53:53 -0600
|
||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
Message-Id: <20210520065355.2736558-13-yuzhao@google.com>
|
||||
Mime-Version: 1.0
|
||||
References: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
|
||||
Subject: [PATCH v3 12/14] mm: multigenerational lru: user interface
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
To: linux-mm@kvack.org
|
||||
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
|
||||
Andrew Morton <akpm@linux-foundation.org>,
|
||||
Dave Chinner <david@fromorbit.com>,
|
||||
Dave Hansen <dave.hansen@linux.intel.com>,
|
||||
Donald Carr <sirspudd@gmail.com>,
|
||||
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
|
||||
Johannes Weiner <hannes@cmpxchg.org>,
|
||||
Jonathan Corbet <corbet@lwn.net>,
|
||||
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru>,
|
||||
Marcus Seyfarth <m.seyfarth@gmail.com>,
|
||||
Matthew Wilcox <willy@infradead.org>,
|
||||
Mel Gorman <mgorman@suse.de>,
|
||||
Miaohe Lin <linmiaohe@huawei.com>,
|
||||
Michael Larabel <michael@michaellarabel.com>,
|
||||
Michal Hocko <mhocko@suse.com>,
|
||||
Michel Lespinasse <michel@lespinasse.org>,
|
||||
Rik van Riel <riel@surriel.com>,
|
||||
Roman Gushchin <guro@fb.com>,
|
||||
Tim Chen <tim.c.chen@linux.intel.com>,
|
||||
Vlastimil Babka <vbabka@suse.cz>,
|
||||
Yang Shi <shy828301@gmail.com>,
|
||||
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
|
||||
linux-kernel@vger.kernel.org, lkp@lists.01.org,
|
||||
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
|
||||
Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
List-Archive: <https://lore.kernel.org/lkml/>
|
||||
|
||||
Add a sysfs file /sys/kernel/mm/lru_gen/enabled to enable and disable
|
||||
the multigenerational lru at runtime.
|
||||
|
||||
Add a sysfs file /sys/kernel/mm/lru_gen/spread to optionally spread
|
||||
pages out across more than three generations. More generations make
|
||||
the background aging more aggressive.
|
||||
|
||||
Add a debugfs file /sys/kernel/debug/lru_gen to monitor the
|
||||
multigenerational lru and trigger the aging and the eviction. This
|
||||
file has the following output:
|
||||
memcg memcg_id memcg_path
|
||||
node node_id
|
||||
min_gen birth_time anon_size file_size
|
||||
...
|
||||
max_gen birth_time anon_size file_size
|
||||
|
||||
Given a memcg and a node, "min_gen" is the oldest generation (number)
|
||||
and "max_gen" is the youngest. Birth time is in milliseconds. The
|
||||
sizes of anon and file types are in pages.
|
||||
|
||||
This file takes the following input:
|
||||
+ memcg_id node_id gen [swappiness]
|
||||
- memcg_id node_id gen [swappiness] [nr_to_reclaim]
|
||||
|
||||
The first command line accounts referenced pages to generation
|
||||
"max_gen" and creates the next generation "max_gen"+1. In this case,
|
||||
"gen" should be equal to "max_gen". A swap file and a non-zero
|
||||
"swappiness" are required to scan anon type. If swapping is not
|
||||
desired, set vm.swappiness to 0. The second command line evicts
|
||||
generations less than or equal to "gen". In this case, "gen" should be
|
||||
less than "max_gen"-1 as "max_gen" and "max_gen"-1 are active
|
||||
generations and therefore protected from the eviction. Use
|
||||
"nr_to_reclaim" to limit the number of pages to evict. Multiple
|
||||
command lines are supported, so does concatenation with delimiters ","
|
||||
and ";".
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
---
|
||||
mm/vmscan.c | 403 ++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 403 insertions(+)
|
||||
|
||||
diff --git a/mm/vmscan.c b/mm/vmscan.c
|
||||
index 2f86dcc04c56..ff2deec24c64 100644
|
||||
--- a/mm/vmscan.c
|
||||
+++ b/mm/vmscan.c
|
||||
@@ -52,6 +52,8 @@
|
||||
#include <linux/memory.h>
|
||||
#include <linux/pagewalk.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
+#include <linux/ctype.h>
|
||||
+#include <linux/debugfs.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/div64.h>
|
||||
@@ -4678,6 +4680,401 @@ static void lru_gen_stop_kswapd(int nid)
|
||||
kvfree(pgdat->mm_walk_args);
|
||||
}
|
||||
|
||||
+/******************************************************************************
|
||||
+ * sysfs interface
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+static ssize_t show_lru_gen_spread(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
+ char *buf)
|
||||
+{
|
||||
+ return sprintf(buf, "%d\n", READ_ONCE(lru_gen_spread));
|
||||
+}
|
||||
+
|
||||
+static ssize_t store_lru_gen_spread(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
+ const char *buf, size_t len)
|
||||
+{
|
||||
+ int spread;
|
||||
+
|
||||
+ if (kstrtoint(buf, 10, &spread) || spread >= MAX_NR_GENS)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ WRITE_ONCE(lru_gen_spread, spread);
|
||||
+
|
||||
+ return len;
|
||||
+}
|
||||
+
|
||||
+static struct kobj_attribute lru_gen_spread_attr = __ATTR(
|
||||
+ spread, 0644, show_lru_gen_spread, store_lru_gen_spread
|
||||
+);
|
||||
+
|
||||
+static ssize_t show_lru_gen_enabled(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
+ char *buf)
|
||||
+{
|
||||
+ return snprintf(buf, PAGE_SIZE, "%d\n", lru_gen_enabled());
|
||||
+}
|
||||
+
|
||||
+static ssize_t store_lru_gen_enabled(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
+ const char *buf, size_t len)
|
||||
+{
|
||||
+ int enable;
|
||||
+
|
||||
+ if (kstrtoint(buf, 10, &enable))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ lru_gen_set_state(enable, true, false);
|
||||
+
|
||||
+ return len;
|
||||
+}
|
||||
+
|
||||
+static struct kobj_attribute lru_gen_enabled_attr = __ATTR(
|
||||
+ enabled, 0644, show_lru_gen_enabled, store_lru_gen_enabled
|
||||
+);
|
||||
+
|
||||
+static struct attribute *lru_gen_attrs[] = {
|
||||
+ &lru_gen_spread_attr.attr,
|
||||
+ &lru_gen_enabled_attr.attr,
|
||||
+ NULL
|
||||
+};
|
||||
+
|
||||
+static struct attribute_group lru_gen_attr_group = {
|
||||
+ .name = "lru_gen",
|
||||
+ .attrs = lru_gen_attrs,
|
||||
+};
|
||||
+
|
||||
+/******************************************************************************
|
||||
+ * debugfs interface
|
||||
+ ******************************************************************************/
|
||||
+
|
||||
+static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos)
|
||||
+{
|
||||
+ struct mem_cgroup *memcg;
|
||||
+ loff_t nr_to_skip = *pos;
|
||||
+
|
||||
+ m->private = kzalloc(PATH_MAX, GFP_KERNEL);
|
||||
+ if (!m->private)
|
||||
+ return ERR_PTR(-ENOMEM);
|
||||
+
|
||||
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
||||
+ do {
|
||||
+ int nid;
|
||||
+
|
||||
+ for_each_node_state(nid, N_MEMORY) {
|
||||
+ if (!nr_to_skip--)
|
||||
+ return mem_cgroup_lruvec(memcg, NODE_DATA(nid));
|
||||
+ }
|
||||
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_seq_stop(struct seq_file *m, void *v)
|
||||
+{
|
||||
+ if (!IS_ERR_OR_NULL(v))
|
||||
+ mem_cgroup_iter_break(NULL, lruvec_memcg(v));
|
||||
+
|
||||
+ kfree(m->private);
|
||||
+ m->private = NULL;
|
||||
+}
|
||||
+
|
||||
+static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
+{
|
||||
+ int nid = lruvec_pgdat(v)->node_id;
|
||||
+ struct mem_cgroup *memcg = lruvec_memcg(v);
|
||||
+
|
||||
+ ++*pos;
|
||||
+
|
||||
+ nid = next_memory_node(nid);
|
||||
+ if (nid == MAX_NUMNODES) {
|
||||
+ memcg = mem_cgroup_iter(NULL, memcg, NULL);
|
||||
+ if (!memcg)
|
||||
+ return NULL;
|
||||
+
|
||||
+ nid = first_memory_node;
|
||||
+ }
|
||||
+
|
||||
+ return mem_cgroup_lruvec(memcg, NODE_DATA(nid));
|
||||
+}
|
||||
+
|
||||
+static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
|
||||
+ unsigned long max_seq, unsigned long *min_seq,
|
||||
+ unsigned long seq)
|
||||
+{
|
||||
+ int i;
|
||||
+ int type, tier;
|
||||
+ int hist = hist_from_seq_or_gen(seq);
|
||||
+ struct lrugen *lrugen = &lruvec->evictable;
|
||||
+ int nid = lruvec_pgdat(lruvec)->node_id;
|
||||
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
+ struct lru_gen_mm_list *mm_list = get_mm_list(memcg);
|
||||
+
|
||||
+ for (tier = 0; tier < MAX_NR_TIERS; tier++) {
|
||||
+ seq_printf(m, " %10d", tier);
|
||||
+ for (type = 0; type < ANON_AND_FILE; type++) {
|
||||
+ unsigned long n[3] = {};
|
||||
+
|
||||
+ if (seq == max_seq) {
|
||||
+ n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]);
|
||||
+ n[1] = READ_ONCE(lrugen->avg_total[type][tier]);
|
||||
+
|
||||
+ seq_printf(m, " %10luR %10luT %10lu ", n[0], n[1], n[2]);
|
||||
+ } else if (seq == min_seq[type] || NR_STAT_GENS > 1) {
|
||||
+ n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]);
|
||||
+ n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]);
|
||||
+ if (tier)
|
||||
+ n[2] = READ_ONCE(lrugen->activated[hist][type][tier - 1]);
|
||||
+
|
||||
+ seq_printf(m, " %10lur %10lue %10lua", n[0], n[1], n[2]);
|
||||
+ } else
|
||||
+ seq_puts(m, " 0 0 0 ");
|
||||
+ }
|
||||
+ seq_putc(m, '\n');
|
||||
+ }
|
||||
+
|
||||
+ seq_puts(m, " ");
|
||||
+ for (i = 0; i < NR_MM_STATS; i++) {
|
||||
+ if (seq == max_seq && NR_STAT_GENS == 1)
|
||||
+ seq_printf(m, " %10lu%c", READ_ONCE(mm_list->nodes[nid].stats[hist][i]),
|
||||
+ toupper(MM_STAT_CODES[i]));
|
||||
+ else if (seq != max_seq && NR_STAT_GENS > 1)
|
||||
+ seq_printf(m, " %10lu%c", READ_ONCE(mm_list->nodes[nid].stats[hist][i]),
|
||||
+ MM_STAT_CODES[i]);
|
||||
+ else
|
||||
+ seq_puts(m, " 0 ");
|
||||
+ }
|
||||
+ seq_putc(m, '\n');
|
||||
+}
|
||||
+
|
||||
+static int lru_gen_seq_show(struct seq_file *m, void *v)
|
||||
+{
|
||||
+ unsigned long seq;
|
||||
+ bool full = !debugfs_real_fops(m->file)->write;
|
||||
+ struct lruvec *lruvec = v;
|
||||
+ struct lrugen *lrugen = &lruvec->evictable;
|
||||
+ int nid = lruvec_pgdat(lruvec)->node_id;
|
||||
+ struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||
+ DEFINE_MAX_SEQ();
|
||||
+ DEFINE_MIN_SEQ();
|
||||
+
|
||||
+ if (nid == first_memory_node) {
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ if (memcg)
|
||||
+ cgroup_path(memcg->css.cgroup, m->private, PATH_MAX);
|
||||
+#endif
|
||||
+ seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), (char *)m->private);
|
||||
+ }
|
||||
+
|
||||
+ seq_printf(m, " node %5d\n", nid);
|
||||
+
|
||||
+ seq = full ? (max_seq < MAX_NR_GENS ? 0 : max_seq - MAX_NR_GENS + 1) :
|
||||
+ min(min_seq[0], min_seq[1]);
|
||||
+
|
||||
+ for (; seq <= max_seq; seq++) {
|
||||
+ int gen, type, zone;
|
||||
+ unsigned int msecs;
|
||||
+
|
||||
+ gen = lru_gen_from_seq(seq);
|
||||
+ msecs = jiffies_to_msecs(jiffies - READ_ONCE(lrugen->timestamps[gen]));
|
||||
+
|
||||
+ seq_printf(m, " %10lu %10u", seq, msecs);
|
||||
+
|
||||
+ for (type = 0; type < ANON_AND_FILE; type++) {
|
||||
+ long size = 0;
|
||||
+
|
||||
+ if (seq < min_seq[type]) {
|
||||
+ seq_puts(m, " -0 ");
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ for (zone = 0; zone < MAX_NR_ZONES; zone++)
|
||||
+ size += READ_ONCE(lrugen->sizes[gen][type][zone]);
|
||||
+
|
||||
+ seq_printf(m, " %10lu ", max(size, 0L));
|
||||
+ }
|
||||
+
|
||||
+ seq_putc(m, '\n');
|
||||
+
|
||||
+ if (full)
|
||||
+ lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq);
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static const struct seq_operations lru_gen_seq_ops = {
|
||||
+ .start = lru_gen_seq_start,
|
||||
+ .stop = lru_gen_seq_stop,
|
||||
+ .next = lru_gen_seq_next,
|
||||
+ .show = lru_gen_seq_show,
|
||||
+};
|
||||
+
|
||||
+static int advance_max_seq(struct lruvec *lruvec, unsigned long seq, int swappiness)
|
||||
+{
|
||||
+ struct scan_control sc = {
|
||||
+ .target_mem_cgroup = lruvec_memcg(lruvec),
|
||||
+ };
|
||||
+ DEFINE_MAX_SEQ();
|
||||
+
|
||||
+ if (seq == max_seq)
|
||||
+ walk_mm_list(lruvec, max_seq, &sc, swappiness, NULL);
|
||||
+
|
||||
+ return seq > max_seq ? -EINVAL : 0;
|
||||
+}
|
||||
+
|
||||
+static int advance_min_seq(struct lruvec *lruvec, unsigned long seq, int swappiness,
|
||||
+ unsigned long nr_to_reclaim)
|
||||
+{
|
||||
+ struct blk_plug plug;
|
||||
+ int err = -EINTR;
|
||||
+ long nr_to_scan = LONG_MAX;
|
||||
+ struct scan_control sc = {
|
||||
+ .nr_to_reclaim = nr_to_reclaim,
|
||||
+ .target_mem_cgroup = lruvec_memcg(lruvec),
|
||||
+ .may_writepage = 1,
|
||||
+ .may_unmap = 1,
|
||||
+ .may_swap = 1,
|
||||
+ .reclaim_idx = MAX_NR_ZONES - 1,
|
||||
+ .gfp_mask = GFP_KERNEL,
|
||||
+ };
|
||||
+ DEFINE_MAX_SEQ();
|
||||
+
|
||||
+ if (seq >= max_seq - 1)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ blk_start_plug(&plug);
|
||||
+
|
||||
+ while (!signal_pending(current)) {
|
||||
+ DEFINE_MIN_SEQ();
|
||||
+
|
||||
+ if (seq < min(min_seq[!swappiness], min_seq[swappiness < 200]) ||
|
||||
+ !evict_pages(lruvec, &sc, swappiness, &nr_to_scan)) {
|
||||
+ err = 0;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ cond_resched();
|
||||
+ }
|
||||
+
|
||||
+ blk_finish_plug(&plug);
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static int advance_seq(char cmd, int memcg_id, int nid, unsigned long seq,
|
||||
+ int swappiness, unsigned long nr_to_reclaim)
|
||||
+{
|
||||
+ struct lruvec *lruvec;
|
||||
+ int err = -EINVAL;
|
||||
+ struct mem_cgroup *memcg = NULL;
|
||||
+
|
||||
+ if (!mem_cgroup_disabled()) {
|
||||
+ rcu_read_lock();
|
||||
+ memcg = mem_cgroup_from_id(memcg_id);
|
||||
+#ifdef CONFIG_MEMCG
|
||||
+ if (memcg && !css_tryget(&memcg->css))
|
||||
+ memcg = NULL;
|
||||
+#endif
|
||||
+ rcu_read_unlock();
|
||||
+
|
||||
+ if (!memcg)
|
||||
+ goto done;
|
||||
+ }
|
||||
+ if (memcg_id != mem_cgroup_id(memcg))
|
||||
+ goto done;
|
||||
+
|
||||
+ if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY))
|
||||
+ goto done;
|
||||
+
|
||||
+ lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
|
||||
+
|
||||
+ if (swappiness == -1)
|
||||
+ swappiness = get_swappiness(lruvec);
|
||||
+ else if (swappiness > 200U)
|
||||
+ goto done;
|
||||
+
|
||||
+ switch (cmd) {
|
||||
+ case '+':
|
||||
+ err = advance_max_seq(lruvec, seq, swappiness);
|
||||
+ break;
|
||||
+ case '-':
|
||||
+ err = advance_min_seq(lruvec, seq, swappiness, nr_to_reclaim);
|
||||
+ break;
|
||||
+ }
|
||||
+done:
|
||||
+ mem_cgroup_put(memcg);
|
||||
+
|
||||
+ return err;
|
||||
+}
|
||||
+
|
||||
+static ssize_t lru_gen_seq_write(struct file *file, const char __user *src,
|
||||
+ size_t len, loff_t *pos)
|
||||
+{
|
||||
+ void *buf;
|
||||
+ char *cur, *next;
|
||||
+ int err = 0;
|
||||
+
|
||||
+ buf = kvmalloc(len + 1, GFP_USER);
|
||||
+ if (!buf)
|
||||
+ return -ENOMEM;
|
||||
+
|
||||
+ if (copy_from_user(buf, src, len)) {
|
||||
+ kvfree(buf);
|
||||
+ return -EFAULT;
|
||||
+ }
|
||||
+
|
||||
+ next = buf;
|
||||
+ next[len] = '\0';
|
||||
+
|
||||
+ while ((cur = strsep(&next, ",;\n"))) {
|
||||
+ int n;
|
||||
+ int end;
|
||||
+ char cmd;
|
||||
+ unsigned int memcg_id;
|
||||
+ unsigned int nid;
|
||||
+ unsigned long seq;
|
||||
+ unsigned int swappiness = -1;
|
||||
+ unsigned long nr_to_reclaim = -1;
|
||||
+
|
||||
+ cur = skip_spaces(cur);
|
||||
+ if (!*cur)
|
||||
+ continue;
|
||||
+
|
||||
+ n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid,
|
||||
+ &seq, &end, &swappiness, &end, &nr_to_reclaim, &end);
|
||||
+ if (n < 4 || cur[end]) {
|
||||
+ err = -EINVAL;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ err = advance_seq(cmd, memcg_id, nid, seq, swappiness, nr_to_reclaim);
|
||||
+ if (err)
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ kvfree(buf);
|
||||
+
|
||||
+ return err ? : len;
|
||||
+}
|
||||
+
|
||||
+static int lru_gen_seq_open(struct inode *inode, struct file *file)
|
||||
+{
|
||||
+ return seq_open(file, &lru_gen_seq_ops);
|
||||
+}
|
||||
+
|
||||
+static const struct file_operations lru_gen_rw_fops = {
|
||||
+ .open = lru_gen_seq_open,
|
||||
+ .read = seq_read,
|
||||
+ .write = lru_gen_seq_write,
|
||||
+ .llseek = seq_lseek,
|
||||
+ .release = seq_release,
|
||||
+};
|
||||
+
|
||||
+static const struct file_operations lru_gen_ro_fops = {
|
||||
+ .open = lru_gen_seq_open,
|
||||
+ .read = seq_read,
|
||||
+ .llseek = seq_lseek,
|
||||
+ .release = seq_release,
|
||||
+};
|
||||
+
|
||||
/******************************************************************************
|
||||
* initialization
|
||||
******************************************************************************/
|
||||
@@ -4718,6 +5115,12 @@ static int __init init_lru_gen(void)
|
||||
if (hotplug_memory_notifier(lru_gen_online_mem, 0))
|
||||
pr_err("lru_gen: failed to subscribe hotplug notifications\n");
|
||||
|
||||
+ if (sysfs_create_group(mm_kobj, &lru_gen_attr_group))
|
||||
+ pr_err("lru_gen: failed to create sysfs group\n");
|
||||
+
|
||||
+ debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops);
|
||||
+ debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops);
|
||||
+
|
||||
return 0;
|
||||
};
|
||||
/*
|
||||
--
|
||||
2.31.1.751.gd2f1c929bd-goog
|
||||
|
||||
|
@ -0,0 +1,177 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Return-Path: <linux-kernel-owner@kernel.org>
|
||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
||||
aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
|
||||
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
|
||||
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
|
||||
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
|
||||
version=3.4.0
|
||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
|
||||
by smtp.lore.kernel.org (Postfix) with ESMTP id 1B6E6C433B4
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:47 +0000 (UTC)
|
||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
|
||||
by mail.kernel.org (Postfix) with ESMTP id 01DA3613BA
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:46 +0000 (UTC)
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S231250AbhETG4G (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
|
||||
Thu, 20 May 2021 02:56:06 -0400
|
||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37952 "EHLO
|
||||
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S231130AbhETGzn (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:43 -0400
|
||||
Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49])
|
||||
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 38C6EC061574
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:22 -0700 (PDT)
|
||||
Received: by mail-yb1-xb49.google.com with SMTP id e138-20020a25e7900000b029050df4b648fcso15235225ybh.7
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:22 -0700 (PDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=google.com; s=20161025;
|
||||
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
|
||||
:cc;
|
||||
bh=RJZRi3A5hgCw5vurGae674wlMdAubVgl39rxTDtDyVU=;
|
||||
b=JLF6ekZqpQB+K5YXwsizPGhNysHViGntJ8r9yptD6ne6XqGKoI0Wr7dT82jfftMw45
|
||||
KOQGieGgRV+BKuZtTbu4cD96tkttpjfquqm9xT5G1x+H3lcXyVbFnA/e0Iz2BGxOx/oJ
|
||||
BP1OK9ib2xvMirn2ogaiMLLuQUMqUVLP4SPszQpLdYUpmsyvtcBjEJsyZiQXMIULinqi
|
||||
S2oaVggbJoWpCxB/3pF4W62fMm5D/LXGAxEWoOTfyY0Ng+NdQ206TROqcoNsbbncUKfa
|
||||
mpyuoyCTOYlALfoNN1kP2lNPrNTUz+UQK31nuEwEnfTBGdsmWTsTelrdxl+7zutfQ7Vh
|
||||
E6Ag==
|
||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=1e100.net; s=20161025;
|
||||
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
|
||||
:references:subject:from:to:cc;
|
||||
bh=RJZRi3A5hgCw5vurGae674wlMdAubVgl39rxTDtDyVU=;
|
||||
b=I4OJloc251SYLj8eJFPP7eJ7uA2r6NXUj5S3hGT6Cv3INg/pbFfz6U56wMbDKeUUx2
|
||||
PFZUxNbewINRi+Xyu0XORumSFYK8cRNAA2xJjsiB/Mi20wJutqQp8eggHjJ4klnT3Arg
|
||||
fC/Qi7JDEKR9akyObrL1SszlU1EyBRMRlSuA56tL8Ayw3KSXAha5WNL73FfjPvnDX4Jn
|
||||
bmGmhKmr4OxIJyYH+35RFfAEzVEoAkRi3miAuWb8eWC6T+GXdpovk6EqvlqSAS4RZyph
|
||||
hWXp4amXtIPmK3meD1g4aF/hJ6IDATp3RWD8SfDV+tNQHU5Wvz3exLpVhhmIWwqq93UL
|
||||
V4jg==
|
||||
X-Gm-Message-State: AOAM532JcEmtEayMiu5r4FpU7325mYFfTWklXmMaEvDn+KHT0zqmqSZv
|
||||
de4I079gO1eY+8FLxSzEABlOF2R4isA=
|
||||
X-Google-Smtp-Source: ABdhPJxxpSuYeOHDU9e04y/REOjF/gJdL+d+nc25sd9W9QvVBy/CaC3vEGm8uybifzDpCi76iP47Kw7dnfI=
|
||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
|
||||
(user=yuzhao job=sendgmr) by 2002:a05:6902:4b3:: with SMTP id
|
||||
r19mr5173987ybs.290.1621493661349; Wed, 19 May 2021 23:54:21 -0700 (PDT)
|
||||
Date: Thu, 20 May 2021 00:53:54 -0600
|
||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
Message-Id: <20210520065355.2736558-14-yuzhao@google.com>
|
||||
Mime-Version: 1.0
|
||||
References: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
|
||||
Subject: [PATCH v3 13/14] mm: multigenerational lru: Kconfig
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
To: linux-mm@kvack.org
|
||||
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
|
||||
Andrew Morton <akpm@linux-foundation.org>,
|
||||
Dave Chinner <david@fromorbit.com>,
|
||||
Dave Hansen <dave.hansen@linux.intel.com>,
|
||||
Donald Carr <sirspudd@gmail.com>,
|
||||
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
|
||||
Johannes Weiner <hannes@cmpxchg.org>,
|
||||
Jonathan Corbet <corbet@lwn.net>,
|
||||
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru>,
|
||||
Marcus Seyfarth <m.seyfarth@gmail.com>,
|
||||
Matthew Wilcox <willy@infradead.org>,
|
||||
Mel Gorman <mgorman@suse.de>,
|
||||
Miaohe Lin <linmiaohe@huawei.com>,
|
||||
Michael Larabel <michael@michaellarabel.com>,
|
||||
Michal Hocko <mhocko@suse.com>,
|
||||
Michel Lespinasse <michel@lespinasse.org>,
|
||||
Rik van Riel <riel@surriel.com>,
|
||||
Roman Gushchin <guro@fb.com>,
|
||||
Tim Chen <tim.c.chen@linux.intel.com>,
|
||||
Vlastimil Babka <vbabka@suse.cz>,
|
||||
Yang Shi <shy828301@gmail.com>,
|
||||
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
|
||||
linux-kernel@vger.kernel.org, lkp@lists.01.org,
|
||||
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
|
||||
Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
List-Archive: <https://lore.kernel.org/lkml/>
|
||||
|
||||
Add configuration options for the multigenerational lru.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
---
|
||||
mm/Kconfig | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 58 insertions(+)
|
||||
|
||||
diff --git a/mm/Kconfig b/mm/Kconfig
|
||||
index 02d44e3420f5..da125f145bc4 100644
|
||||
--- a/mm/Kconfig
|
||||
+++ b/mm/Kconfig
|
||||
@@ -901,4 +901,62 @@ config KMAP_LOCAL
|
||||
# struct io_mapping based helper. Selected by drivers that need them
|
||||
config IO_MAPPING
|
||||
bool
|
||||
+
|
||||
+# the multigenerational lru {
|
||||
+config LRU_GEN
|
||||
+ bool "Multigenerational LRU"
|
||||
+ depends on MMU
|
||||
+ help
|
||||
+ A high performance LRU implementation to heavily overcommit workloads
|
||||
+ that are not IO bound. See Documentation/vm/multigen_lru.rst for
|
||||
+ details.
|
||||
+
|
||||
+ Warning: do not enable this option unless you plan to use it because
|
||||
+ it introduces a small per-process and per-memcg and per-node memory
|
||||
+ overhead.
|
||||
+
|
||||
+config LRU_GEN_ENABLED
|
||||
+ bool "Turn on by default"
|
||||
+ depends on LRU_GEN
|
||||
+ help
|
||||
+ The default value of /sys/kernel/mm/lru_gen/enabled is 0. This option
|
||||
+ changes it to 1.
|
||||
+
|
||||
+ Warning: the default value is the fast path. See
|
||||
+ Documentation/static-keys.txt for details.
|
||||
+
|
||||
+config LRU_GEN_STATS
|
||||
+ bool "Full stats for debugging"
|
||||
+ depends on LRU_GEN
|
||||
+ help
|
||||
+ This option keeps full stats for each generation, which can be read
|
||||
+ from /sys/kernel/debug/lru_gen_full.
|
||||
+
|
||||
+ Warning: do not enable this option unless you plan to use it because
|
||||
+ it introduces an additional small per-process and per-memcg and
|
||||
+ per-node memory overhead.
|
||||
+
|
||||
+config NR_LRU_GENS
|
||||
+ int "Max number of generations"
|
||||
+ depends on LRU_GEN
|
||||
+ range 4 31
|
||||
+ default 7
|
||||
+ help
|
||||
+ This will use order_base_2(N+1) spare bits from page flags.
|
||||
+
|
||||
+ Warning: do not use numbers larger than necessary because each
|
||||
+ generation introduces a small per-node and per-memcg memory overhead.
|
||||
+
|
||||
+config TIERS_PER_GEN
|
||||
+ int "Number of tiers per generation"
|
||||
+ depends on LRU_GEN
|
||||
+ range 2 5
|
||||
+ default 4
|
||||
+ help
|
||||
+ This will use N-2 spare bits from page flags.
|
||||
+
|
||||
+ Larger values generally offer better protection to active pages under
|
||||
+ heavy buffered I/O workloads.
|
||||
+# }
|
||||
+
|
||||
endmenu
|
||||
--
|
||||
2.31.1.751.gd2f1c929bd-goog
|
||||
|
||||
|
@ -0,0 +1,273 @@
|
||||
From mboxrd@z Thu Jan 1 00:00:00 1970
|
||||
Return-Path: <linux-kernel-owner@kernel.org>
|
||||
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
|
||||
aws-us-west-2-korg-lkml-1.web.codeaurora.org
|
||||
X-Spam-Level:
|
||||
X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED,
|
||||
DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS,
|
||||
INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,
|
||||
USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no
|
||||
version=3.4.0
|
||||
Received: from mail.kernel.org (mail.kernel.org [198.145.29.99])
|
||||
by smtp.lore.kernel.org (Postfix) with ESMTP id 10B58C433ED
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:48 +0000 (UTC)
|
||||
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
|
||||
by mail.kernel.org (Postfix) with ESMTP id E99D16108C
|
||||
for <linux-kernel@archiver.kernel.org>; Thu, 20 May 2021 06:54:47 +0000 (UTC)
|
||||
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
|
||||
id S231251AbhETG4H (ORCPT <rfc822;linux-kernel@archiver.kernel.org>);
|
||||
Thu, 20 May 2021 02:56:07 -0400
|
||||
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:37970 "EHLO
|
||||
lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
|
||||
with ESMTP id S231152AbhETGzq (ORCPT
|
||||
<rfc822;linux-kernel@vger.kernel.org>);
|
||||
Thu, 20 May 2021 02:55:46 -0400
|
||||
Received: from mail-qt1-x84a.google.com (mail-qt1-x84a.google.com [IPv6:2607:f8b0:4864:20::84a])
|
||||
by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C8635C06175F
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:23 -0700 (PDT)
|
||||
Received: by mail-qt1-x84a.google.com with SMTP id x9-20020ac84a090000b0290203194f1f86so3499707qtq.13
|
||||
for <linux-kernel@vger.kernel.org>; Wed, 19 May 2021 23:54:23 -0700 (PDT)
|
||||
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=google.com; s=20161025;
|
||||
h=date:in-reply-to:message-id:mime-version:references:subject:from:to
|
||||
:cc;
|
||||
bh=tmnYUMpAe2KoFw1JK5DEOLa6QKjWz+/jEuUps2TjE0M=;
|
||||
b=DnoKJgXGcZrakGIsy2wdggTSzr8gNr5Cga30A6c8a6Hf9x2dffeKxupvvvPjuu1gFH
|
||||
aGdEv0BQdUdQtd0c3PTB1yYrqJsJcPp5S6L8/JeU1mBsAkTgRAJC+WwYC2oJaN+K/+rh
|
||||
m7SHkphIH6F6L72NTt2b96CmRop8AS7h70mGFoqBtxgJZEEG0JjTr93/mLmeGl1DrblN
|
||||
ViY8g/jh939e21AJjULOIlpeBbxplek6u+fXKVxsYdCV2JKDsA0LwaCxMlx08fCc/j9n
|
||||
pt2cBRltMZSTctDaJlkHWcEOuGP8bGJA/JzG0MeUfva0r9KcYGAVy5zcvXU4Mkz8AXA/
|
||||
v3JQ==
|
||||
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
|
||||
d=1e100.net; s=20161025;
|
||||
h=x-gm-message-state:date:in-reply-to:message-id:mime-version
|
||||
:references:subject:from:to:cc;
|
||||
bh=tmnYUMpAe2KoFw1JK5DEOLa6QKjWz+/jEuUps2TjE0M=;
|
||||
b=eSQlkp99GhbOJbbfWHaqWXYyj8f2uV+mVQE23pf6QSUOoTukthTWydqV3fgiwXFIDZ
|
||||
SDohHvXcyn6N5BbFXVm6CtNfXtb315OJsMSEplLbhXduGrLKjsp7Zfpa0MW/pBEJOfNH
|
||||
/go5cnOUxmpFFo2+nAoIm8Xug3YYddsalK9BH0YMXpESvTCgOPpHU8wev9wLTU4zDG2s
|
||||
NSpyxsj72ahnHDJFkm3eEio8zmWqdEa9MYXuSU+QTZ/HJ0OwLb4BOdRwaOx/GeFoWGTu
|
||||
We7/PREhKWf+7tUeB8o2wbzSdGaKSWLh2SOQR0Ydr269QRIv3J4q6e/zT85DkE6XOcYE
|
||||
ziTg==
|
||||
X-Gm-Message-State: AOAM530QSwtf5Kda/I2DOnicxCl9Xadwo6H9cConTRpJ9+gh6AP7aLlU
|
||||
qfB+G5KAp3JrJXlL4Qf1Gmbl32aZCU0=
|
||||
X-Google-Smtp-Source: ABdhPJyfR8302KuxyD/mIOKCO+jxW1RXoZnlJejF8SLfwvo9YuRoFSL43tZzQ7DdKcZXlLzVckFytBbp+9s=
|
||||
X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:595d:62ee:f08:8e83])
|
||||
(user=yuzhao job=sendgmr) by 2002:ad4:5a52:: with SMTP id ej18mr3968319qvb.31.1621493662894;
|
||||
Wed, 19 May 2021 23:54:22 -0700 (PDT)
|
||||
Date: Thu, 20 May 2021 00:53:55 -0600
|
||||
In-Reply-To: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
Message-Id: <20210520065355.2736558-15-yuzhao@google.com>
|
||||
Mime-Version: 1.0
|
||||
References: <20210520065355.2736558-1-yuzhao@google.com>
|
||||
X-Mailer: git-send-email 2.31.1.751.gd2f1c929bd-goog
|
||||
Subject: [PATCH v3 14/14] mm: multigenerational lru: documentation
|
||||
From: Yu Zhao <yuzhao@google.com>
|
||||
To: linux-mm@kvack.org
|
||||
Cc: Alex Shi <alexs@kernel.org>, Andi Kleen <ak@linux.intel.com>,
|
||||
Andrew Morton <akpm@linux-foundation.org>,
|
||||
Dave Chinner <david@fromorbit.com>,
|
||||
Dave Hansen <dave.hansen@linux.intel.com>,
|
||||
Donald Carr <sirspudd@gmail.com>,
|
||||
Hillf Danton <hdanton@sina.com>, Jens Axboe <axboe@kernel.dk>,
|
||||
Johannes Weiner <hannes@cmpxchg.org>,
|
||||
Jonathan Corbet <corbet@lwn.net>,
|
||||
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
|
||||
Konstantin Kharlamov <hi-angel@yandex.ru>,
|
||||
Marcus Seyfarth <m.seyfarth@gmail.com>,
|
||||
Matthew Wilcox <willy@infradead.org>,
|
||||
Mel Gorman <mgorman@suse.de>,
|
||||
Miaohe Lin <linmiaohe@huawei.com>,
|
||||
Michael Larabel <michael@michaellarabel.com>,
|
||||
Michal Hocko <mhocko@suse.com>,
|
||||
Michel Lespinasse <michel@lespinasse.org>,
|
||||
Rik van Riel <riel@surriel.com>,
|
||||
Roman Gushchin <guro@fb.com>,
|
||||
Tim Chen <tim.c.chen@linux.intel.com>,
|
||||
Vlastimil Babka <vbabka@suse.cz>,
|
||||
Yang Shi <shy828301@gmail.com>,
|
||||
Ying Huang <ying.huang@intel.com>, Zi Yan <ziy@nvidia.com>,
|
||||
linux-kernel@vger.kernel.org, lkp@lists.01.org,
|
||||
page-reclaim@google.com, Yu Zhao <yuzhao@google.com>,
|
||||
Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
Content-Type: text/plain; charset="UTF-8"
|
||||
Precedence: bulk
|
||||
List-ID: <linux-kernel.vger.kernel.org>
|
||||
X-Mailing-List: linux-kernel@vger.kernel.org
|
||||
List-Archive: <https://lore.kernel.org/lkml/>
|
||||
|
||||
Add Documentation/vm/multigen_lru.rst.
|
||||
|
||||
Signed-off-by: Yu Zhao <yuzhao@google.com>
|
||||
Tested-by: Konstantin Kharlamov <Hi-Angel@yandex.ru>
|
||||
---
|
||||
Documentation/vm/index.rst | 1 +
|
||||
Documentation/vm/multigen_lru.rst | 143 ++++++++++++++++++++++++++++++
|
||||
2 files changed, 144 insertions(+)
|
||||
create mode 100644 Documentation/vm/multigen_lru.rst
|
||||
|
||||
diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst
|
||||
index eff5fbd492d0..c353b3f55924 100644
|
||||
--- a/Documentation/vm/index.rst
|
||||
+++ b/Documentation/vm/index.rst
|
||||
@@ -17,6 +17,7 @@ various features of the Linux memory management
|
||||
|
||||
swap_numa
|
||||
zswap
|
||||
+ multigen_lru
|
||||
|
||||
Kernel developers MM documentation
|
||||
==================================
|
||||
diff --git a/Documentation/vm/multigen_lru.rst b/Documentation/vm/multigen_lru.rst
|
||||
new file mode 100644
|
||||
index 000000000000..a18416ed7e92
|
||||
--- /dev/null
|
||||
+++ b/Documentation/vm/multigen_lru.rst
|
||||
@@ -0,0 +1,143 @@
|
||||
+.. SPDX-License-Identifier: GPL-2.0
|
||||
+
|
||||
+=====================
|
||||
+Multigenerational LRU
|
||||
+=====================
|
||||
+
|
||||
+Quick Start
|
||||
+===========
|
||||
+Build Options
|
||||
+-------------
|
||||
+:Required: Set ``CONFIG_LRU_GEN=y``.
|
||||
+
|
||||
+:Optional: Set ``CONFIG_LRU_GEN_ENABLED=y`` to turn the feature on by
|
||||
+ default.
|
||||
+
|
||||
+:Optional: Change ``CONFIG_NR_LRU_GENS`` to a number ``X`` to support
|
||||
+ a maximum of ``X`` generations.
|
||||
+
|
||||
+:Optional: Change ``CONFIG_TIERS_PER_GEN`` to a number ``Y`` to
|
||||
+ support a maximum of ``Y`` tiers per generation.
|
||||
+
|
||||
+Runtime Options
|
||||
+---------------
|
||||
+:Required: Write ``1`` to ``/sys/kernel/mm/lru_gen/enable`` if the
|
||||
+ feature was not turned on by default.
|
||||
+
|
||||
+:Optional: Change ``/sys/kernel/mm/lru_gen/spread`` to a number ``N``
|
||||
+ to spread pages out across ``N+1`` generations. ``N`` should be less
|
||||
+ than ``X``. Larger values make the background aging more aggressive.
|
||||
+
|
||||
+:Optional: Read ``/sys/kernel/debug/lru_gen`` to verify the feature.
|
||||
+ This file has the following output:
|
||||
+
|
||||
+::
|
||||
+
|
||||
+ memcg memcg_id memcg_path
|
||||
+ node node_id
|
||||
+ min_gen birth_time anon_size file_size
|
||||
+ ...
|
||||
+ max_gen birth_time anon_size file_size
|
||||
+
|
||||
+Given a memcg and a node, ``min_gen`` is the oldest generation
|
||||
+(number) and ``max_gen`` is the youngest. Birth time is in
|
||||
+milliseconds. The sizes of anon and file types are in pages.
|
||||
+
|
||||
+Recipes
|
||||
+-------
|
||||
+:Android on ARMv8.1+: ``X=4``, ``Y=3`` and ``N=0``.
|
||||
+
|
||||
+:Android on pre-ARMv8.1 CPUs: Not recommended due to the lack of
|
||||
+ ``ARM64_HW_AFDBM``.
|
||||
+
|
||||
+:Laptops and workstations running Chrome on x86_64: Use the default
|
||||
+ values.
|
||||
+
|
||||
+:Working set estimation: Write ``+ memcg_id node_id gen [swappiness]``
|
||||
+ to ``/sys/kernel/debug/lru_gen`` to account referenced pages to
|
||||
+ generation ``max_gen`` and create the next generation ``max_gen+1``.
|
||||
+ ``gen`` should be equal to ``max_gen``. A swap file and a non-zero
|
||||
+ ``swappiness`` are required to scan anon type. If swapping is not
|
||||
+ desired, set ``vm.swappiness`` to ``0``.
|
||||
+
|
||||
+:Proactive reclaim: Write ``- memcg_id node_id gen [swappiness]
|
||||
+ [nr_to_reclaim]`` to ``/sys/kernel/debug/lru_gen`` to evict
|
||||
+ generations less than or equal to ``gen``. ``gen`` should be less
|
||||
+ than ``max_gen-1`` as ``max_gen`` and ``max_gen-1`` are active
|
||||
+ generations and therefore protected from the eviction. Use
|
||||
+ ``nr_to_reclaim`` to limit the number of pages to evict. Multiple
|
||||
+ command lines are supported, so does concatenation with delimiters
|
||||
+ ``,`` and ``;``.
|
||||
+
|
||||
+Framework
|
||||
+=========
|
||||
+For each ``lruvec``, evictable pages are divided into multiple
|
||||
+generations. The youngest generation number is stored in ``max_seq``
|
||||
+for both anon and file types as they are aged on an equal footing. The
|
||||
+oldest generation numbers are stored in ``min_seq[2]`` separately for
|
||||
+anon and file types as clean file pages can be evicted regardless of
|
||||
+swap and write-back constraints. These three variables are
|
||||
+monotonically increasing. Generation numbers are truncated into
|
||||
+``order_base_2(CONFIG_NR_LRU_GENS+1)`` bits in order to fit into
|
||||
+``page->flags``. The sliding window technique is used to prevent
|
||||
+truncated generation numbers from overlapping. Each truncated
|
||||
+generation number is an index to an array of per-type and per-zone
|
||||
+lists. Evictable pages are added to the per-zone lists indexed by
|
||||
+``max_seq`` or ``min_seq[2]`` (modulo ``CONFIG_NR_LRU_GENS``),
|
||||
+depending on their types.
|
||||
+
|
||||
+Each generation is then divided into multiple tiers. Tiers represent
|
||||
+levels of usage from file descriptors only. Pages accessed N times via
|
||||
+file descriptors belong to tier order_base_2(N). Each generation
|
||||
+contains at most CONFIG_TIERS_PER_GEN tiers, and they require
|
||||
+additional CONFIG_TIERS_PER_GEN-2 bits in page->flags. In contrast to
|
||||
+moving across generations which requires the lru lock for the list
|
||||
+operations, moving across tiers only involves an atomic operation on
|
||||
+``page->flags`` and therefore has a negligible cost. A feedback loop
|
||||
+modeled after the PID controller monitors the refault rates across all
|
||||
+tiers and decides when to activate pages from which tiers in the
|
||||
+reclaim path.
|
||||
+
|
||||
+The framework comprises two conceptually independent components: the
|
||||
+aging and the eviction, which can be invoked separately from user
|
||||
+space for the purpose of working set estimation and proactive reclaim.
|
||||
+
|
||||
+Aging
|
||||
+-----
|
||||
+The aging produces young generations. Given an ``lruvec``, the aging
|
||||
+scans page tables for referenced pages of this ``lruvec``. Upon
|
||||
+finding one, the aging updates its generation number to ``max_seq``.
|
||||
+After each round of scan, the aging increments ``max_seq``.
|
||||
+
|
||||
+The aging maintains either a system-wide ``mm_struct`` list or
|
||||
+per-memcg ``mm_struct`` lists, and it only scans page tables of
|
||||
+processes that have been scheduled since the last scan.
|
||||
+
|
||||
+The aging is due when both of ``min_seq[2]`` reaches ``max_seq-1``,
|
||||
+assuming both anon and file types are reclaimable.
|
||||
+
|
||||
+Eviction
|
||||
+--------
|
||||
+The eviction consumes old generations. Given an ``lruvec``, the
|
||||
+eviction scans the pages on the per-zone lists indexed by either of
|
||||
+``min_seq[2]``. It first tries to select a type based on the values of
|
||||
+``min_seq[2]``. When anon and file types are both available from the
|
||||
+same generation, it selects the one that has a lower refault rate.
|
||||
+
|
||||
+During a scan, the eviction sorts pages according to their new
|
||||
+generation numbers, if the aging has found them referenced. It also
|
||||
+moves pages from the tiers that have higher refault rates than tier 0
|
||||
+to the next generation.
|
||||
+
|
||||
+When it finds all the per-zone lists of a selected type are empty, the
|
||||
+eviction increments ``min_seq[2]`` indexed by this selected type.
|
||||
+
|
||||
+To-do List
|
||||
+==========
|
||||
+KVM Optimization
|
||||
+----------------
|
||||
+Support shadow page table scanning.
|
||||
+
|
||||
+NUMA Optimization
|
||||
+-----------------
|
||||
+Optimize page table scan for NUMA.
|
||||
--
|
||||
2.31.1.751.gd2f1c929bd-goog
|
||||
|
||||
|
6531
sys-kernel/pinephone-sources/files/config-5.13.5
Normal file
6531
sys-kernel/pinephone-sources/files/config-5.13.5
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,9 @@
|
||||
post_upgrade() {
|
||||
if findmnt --fstab -uno SOURCE /boot &>/dev/null && ! mountpoint -q /boot; then
|
||||
echo "WARNING: /boot appears to be a separate partition but is not mounted."
|
||||
fi
|
||||
}
|
||||
|
||||
post_remove() {
|
||||
rm -f boot/initramfs-linux.img
|
||||
}
|
11
sys-kernel/pinephone-sources/files/linux.preset
Normal file
11
sys-kernel/pinephone-sources/files/linux.preset
Normal file
@ -0,0 +1,11 @@
|
||||
# mkinitcpio preset file for the '%PKGBASE%' package
|
||||
|
||||
ALL_config="/etc/mkinitcpio.conf"
|
||||
ALL_kver="%KERNVER%"
|
||||
|
||||
PRESETS=('default')
|
||||
|
||||
#default_config="/etc/mkinitcpio.conf"
|
||||
default_image="/boot/initramfs-linux.img"
|
||||
#default_options=""
|
||||
|
@ -0,0 +1,409 @@
|
||||
From f062022f2a2781d6b8ca63c460b0e72ebac30870 Mon Sep 17 00:00:00 2001
|
||||
From: Martijn Braam <martijn@brixit.nl>
|
||||
Date: Mon, 28 Sep 2020 14:26:11 +0200
|
||||
Subject: [PATCH] media: ov5640: Implement autofocus
|
||||
|
||||
The autofocus functionality needs a firmware blob loaded into the
|
||||
internal microcontroller.
|
||||
|
||||
V4L2 doesn't have an api to control all autofocus functionality, but
|
||||
this at least makes it possible to focus on the center of the sensor.
|
||||
|
||||
Signed-off-by: Martijn Braam <martijn@brixit.nl>
|
||||
---
|
||||
drivers/media/i2c/ov5640.c | 254 +++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 254 insertions(+)
|
||||
|
||||
diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c
|
||||
index df0a507c211f..08a5304c0e95 100644
|
||||
--- a/drivers/media/i2c/ov5640.c
|
||||
+++ b/drivers/media/i2c/ov5640.c
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <linux/clkdev.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/delay.h>
|
||||
+#include <linux/firmware.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/gpio/consumer.h>
|
||||
#include <linux/i2c.h>
|
||||
@@ -31,7 +32,11 @@
|
||||
|
||||
#define OV5640_DEFAULT_SLAVE_ID 0x3c
|
||||
|
||||
+#define OV5640_REG_SYS_RESET00 0x3000
|
||||
+#define OV5640_REG_SYS_RESET01 0x3001
|
||||
#define OV5640_REG_SYS_RESET02 0x3002
|
||||
+#define OV5640_REG_SYS_CLOCK_ENABLE00 0x3004
|
||||
+#define OV5640_REG_SYS_CLOCK_ENABLE01 0x3005
|
||||
#define OV5640_REG_SYS_CLOCK_ENABLE02 0x3006
|
||||
#define OV5640_REG_SYS_CTRL0 0x3008
|
||||
#define OV5640_REG_SYS_CTRL0_SW_PWDN 0x42
|
||||
@@ -41,6 +46,14 @@
|
||||
#define OV5640_REG_PAD_OUTPUT_ENABLE01 0x3017
|
||||
#define OV5640_REG_PAD_OUTPUT_ENABLE02 0x3018
|
||||
#define OV5640_REG_PAD_OUTPUT00 0x3019
|
||||
+#define OV5640_REG_FW_CMD_MAIN 0x3022
|
||||
+#define OV5640_REG_FW_CMD_ACK 0x3023
|
||||
+#define OV5640_REG_FW_CMD_PARA0 0x3024
|
||||
+#define OV5640_REG_FW_CMD_PARA1 0x3025
|
||||
+#define OV5640_REG_FW_CMD_PARA2 0x3026
|
||||
+#define OV5640_REG_FW_CMD_PARA3 0x3027
|
||||
+#define OV5640_REG_FW_CMD_PARA4 0x3028
|
||||
+#define OV5640_REG_FW_STATUS 0x3029
|
||||
#define OV5640_REG_SYSTEM_CONTROL1 0x302e
|
||||
#define OV5640_REG_SC_PLL_CTRL0 0x3034
|
||||
#define OV5640_REG_SC_PLL_CTRL1 0x3035
|
||||
@@ -59,6 +72,7 @@
|
||||
#define OV5640_REG_AEC_PK_MANUAL 0x3503
|
||||
#define OV5640_REG_AEC_PK_REAL_GAIN 0x350a
|
||||
#define OV5640_REG_AEC_PK_VTS 0x350c
|
||||
+#define OV5640_REG_VCM_CONTROL4 0x3606
|
||||
#define OV5640_REG_TIMING_DVPHO 0x3808
|
||||
#define OV5640_REG_TIMING_DVPVO 0x380a
|
||||
#define OV5640_REG_TIMING_HTS 0x380c
|
||||
@@ -95,6 +109,20 @@
|
||||
#define OV5640_REG_SDE_CTRL4 0x5584
|
||||
#define OV5640_REG_SDE_CTRL5 0x5585
|
||||
#define OV5640_REG_AVG_READOUT 0x56a1
|
||||
+#define OV5640_REG_FIRMWARE_BASE 0x8000
|
||||
+
|
||||
+#define OV5640_FW_STATUS_S_FIRMWARE 0x7f
|
||||
+#define OV5640_FW_STATUS_S_STARTUP 0x7e
|
||||
+#define OV5640_FW_STATUS_S_IDLE 0x70
|
||||
+#define OV5640_FW_STATUS_S_FOCUSING 0x00
|
||||
+#define OV5640_FW_STATUS_S_FOCUSED 0x10
|
||||
+
|
||||
+#define OV5640_FW_CMD_TRIGGER_FOCUS 0x03
|
||||
+#define OV5640_FW_CMD_CONTINUOUS_FOCUS 0x04
|
||||
+#define OV5640_FW_CMD_GET_FOCUS_RESULT 0x07
|
||||
+#define OV5640_FW_CMD_RELEASE_FOCUS 0x08
|
||||
+#define OV5640_FW_CMD_ZONE_CONFIG 0x12
|
||||
+#define OV5640_FW_CMD_DEFAULT_ZONES 0x80
|
||||
|
||||
enum ov5640_mode_id {
|
||||
OV5640_MODE_QCIF_176_144 = 0,
|
||||
@@ -218,6 +246,12 @@ struct ov5640_ctrls {
|
||||
struct v4l2_ctrl *auto_gain;
|
||||
struct v4l2_ctrl *gain;
|
||||
};
|
||||
+ struct {
|
||||
+ struct v4l2_ctrl *focus_auto;
|
||||
+ struct v4l2_ctrl *af_start;
|
||||
+ struct v4l2_ctrl *af_stop;
|
||||
+ struct v4l2_ctrl *af_status;
|
||||
+ };
|
||||
struct v4l2_ctrl *brightness;
|
||||
struct v4l2_ctrl *light_freq;
|
||||
struct v4l2_ctrl *saturation;
|
||||
@@ -261,6 +295,8 @@ struct ov5640_dev {
|
||||
|
||||
bool pending_mode_change;
|
||||
bool streaming;
|
||||
+
|
||||
+ bool af_initialized;
|
||||
};
|
||||
|
||||
static inline struct ov5640_dev *to_ov5640_dev(struct v4l2_subdev *sd)
|
||||
@@ -1967,6 +2003,118 @@ static void ov5640_reset(struct ov5640_dev *sensor)
|
||||
usleep_range(20000, 25000);
|
||||
}
|
||||
|
||||
+static int ov5640_copy_fw_to_device(struct ov5640_dev *sensor,
|
||||
+ const struct firmware *fw)
|
||||
+{
|
||||
+ struct i2c_client *client = sensor->i2c_client;
|
||||
+ const u8 *data = (const u8 *)fw->data;
|
||||
+ u8 fw_status;
|
||||
+ int i;
|
||||
+ int ret;
|
||||
+
|
||||
+ // Putting MCU in reset state
|
||||
+ ret = ov5640_write_reg(sensor, OV5640_REG_SYS_RESET00, 0x20);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ // Write firmware
|
||||
+ for (i = 0; i < fw->size / sizeof(u8); i++)
|
||||
+ ov5640_write_reg(sensor,
|
||||
+ OV5640_REG_FIRMWARE_BASE + i,
|
||||
+ data[i]);
|
||||
+
|
||||
+ // Reset MCU state
|
||||
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_MAIN, 0x00);
|
||||
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_ACK, 0x00);
|
||||
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA0, 0x00);
|
||||
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA1, 0x00);
|
||||
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA2, 0x00);
|
||||
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA3, 0x00);
|
||||
+ ov5640_write_reg(sensor, OV5640_REG_FW_CMD_PARA4, 0x00);
|
||||
+ ov5640_write_reg(sensor, OV5640_REG_FW_STATUS, 0x7f);
|
||||
+
|
||||
+ // Start AF MCU
|
||||
+ ret = ov5640_write_reg(sensor, OV5640_REG_SYS_RESET00, 0x00);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ dev_info(&client->dev, "firmware upload success\n");
|
||||
+
|
||||
+ // Wait for firmware to be ready
|
||||
+ for (i = 0; i < 5; i++) {
|
||||
+ ret = ov5640_read_reg(sensor, OV5640_REG_FW_STATUS, &fw_status);
|
||||
+ if (fw_status == OV5640_FW_STATUS_S_IDLE) {
|
||||
+ dev_info(&client->dev, "fw started after %d ms\n", i * 50);
|
||||
+ return ret;
|
||||
+ }
|
||||
+ msleep(50);
|
||||
+ }
|
||||
+ dev_err(&client->dev, "uploaded firmware didn't start, got to 0x%x, retrying...\n", fw_status);
|
||||
+
|
||||
+ // Putting MCU in reset state
|
||||
+ ret = ov5640_write_reg(sensor, OV5640_REG_SYS_RESET00, 0x20);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+ // Start AF MCU
|
||||
+ ret = ov5640_write_reg(sensor, OV5640_REG_SYS_RESET00, 0x00);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+ // Wait for firmware to be ready
|
||||
+ for (i = 0; i < 5; i++) {
|
||||
+ ret = ov5640_read_reg(sensor, OV5640_REG_FW_STATUS, &fw_status);
|
||||
+ if (fw_status == OV5640_FW_STATUS_S_IDLE) {
|
||||
+ dev_info(&client->dev, "fw started after %d ms\n", i * 50);
|
||||
+ return ret;
|
||||
+ }
|
||||
+ msleep(50);
|
||||
+ }
|
||||
+ dev_err(&client->dev, "uploaded firmware didn't start, got to 0x%x\n", fw_status);
|
||||
+ return -ETIMEDOUT;
|
||||
+}
|
||||
+
|
||||
+static int ov5640_af_init(struct ov5640_dev *sensor)
|
||||
+{
|
||||
+ struct i2c_client *client = sensor->i2c_client;
|
||||
+ const char* fwname = "ov5640_af.bin";
|
||||
+ const struct firmware *fw;
|
||||
+ int ret;
|
||||
+
|
||||
+ if (sensor->af_initialized) {
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ if (firmware_request_nowarn(&fw, fwname, &client->dev) == 0) {
|
||||
+ ret = ov5640_copy_fw_to_device(sensor, fw);
|
||||
+ if (ret == 0)
|
||||
+ sensor->af_initialized = 1;
|
||||
+ } else {
|
||||
+ dev_warn(&client->dev, "%s: no autofocus firmware available (%s)\n",
|
||||
+ __func__, fwname);
|
||||
+ ret = -1;
|
||||
+ }
|
||||
+ release_firmware(fw);
|
||||
+
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ // Enable AF systems
|
||||
+ ret = ov5640_mod_reg(sensor, OV5640_REG_SYS_CLOCK_ENABLE00,
|
||||
+ (BIT(6) | BIT(5)), (BIT(6) | BIT(5)));
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+ ret = ov5640_mod_reg(sensor, OV5640_REG_SYS_CLOCK_ENABLE01,
|
||||
+ BIT(6), BIT(6));
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ // Set lens focus driver on
|
||||
+ ov5640_write_reg(sensor, OV5640_REG_VCM_CONTROL4, 0x3f);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
static int ov5640_set_power_on(struct ov5640_dev *sensor)
|
||||
{
|
||||
struct i2c_client *client = sensor->i2c_client;
|
||||
@@ -1988,6 +2117,8 @@ static int ov5640_set_power_on(struct ov5640_dev *sensor)
|
||||
goto xclk_off;
|
||||
}
|
||||
|
||||
+ sensor->af_initialized = 0;
|
||||
+
|
||||
ov5640_reset(sensor);
|
||||
ov5640_power(sensor, true);
|
||||
|
||||
@@ -2416,6 +2547,35 @@ static int ov5640_set_framefmt(struct ov5640_dev *sensor,
|
||||
is_jpeg ? (BIT(5) | BIT(3)) : 0);
|
||||
}
|
||||
|
||||
+static int ov5640_fw_command(struct ov5640_dev *sensor, int command)
|
||||
+{
|
||||
+ u8 fw_ack;
|
||||
+ int i;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = ov5640_write_reg(sensor, OV5640_REG_FW_CMD_ACK, 0x01);
|
||||
+ if(ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ ret = ov5640_write_reg(sensor, OV5640_REG_FW_CMD_MAIN, command);
|
||||
+ if(ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ for (i = 0; i < 100; i++) {
|
||||
+ ret = ov5640_read_reg(sensor, OV5640_REG_FW_CMD_ACK, &fw_ack);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ if (fw_ack == 0){
|
||||
+ return ret;
|
||||
+ }
|
||||
+
|
||||
+ msleep(50);
|
||||
+ }
|
||||
+ return -ETIMEDOUT;
|
||||
+}
|
||||
+
|
||||
+
|
||||
/*
|
||||
* Sensor Controls.
|
||||
*/
|
||||
@@ -2532,6 +2692,41 @@ static int ov5640_set_ctrl_exposure(struct ov5640_dev *sensor,
|
||||
return ret;
|
||||
}
|
||||
|
||||
+static int ov5640_set_ctrl_focus(struct ov5640_dev *sensor, int command)
|
||||
+{
|
||||
+ struct i2c_client *client = sensor->i2c_client;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = ov5640_af_init(sensor);
|
||||
+ if (ret) {
|
||||
+ dev_err(&client->dev, "%s: no autofocus firmware loaded\n",
|
||||
+ __func__);
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ if (command == OV5640_FW_CMD_RELEASE_FOCUS) {
|
||||
+ dev_dbg(&client->dev, "%s: Releasing autofocus\n",
|
||||
+ __func__);
|
||||
+ return ov5640_fw_command(sensor, OV5640_FW_CMD_RELEASE_FOCUS);
|
||||
+ }
|
||||
+
|
||||
+ // Restart zone config
|
||||
+ ret = ov5640_fw_command(sensor, OV5640_FW_CMD_ZONE_CONFIG);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ // Set default focus zones
|
||||
+ ret = ov5640_fw_command(sensor, OV5640_FW_CMD_DEFAULT_ZONES);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ dev_dbg(&client->dev, "%s: Triggering autofocus\n",
|
||||
+ __func__);
|
||||
+
|
||||
+ // Start focussing
|
||||
+ return ov5640_fw_command(sensor, command);
|
||||
+}
|
||||
+
|
||||
static int ov5640_set_ctrl_gain(struct ov5640_dev *sensor, bool auto_gain)
|
||||
{
|
||||
struct ov5640_ctrls *ctrls = &sensor->ctrls;
|
||||
@@ -2638,6 +2833,32 @@ static int ov5640_set_ctrl_vflip(struct ov5640_dev *sensor, int value)
|
||||
(BIT(2) | BIT(1)) : 0);
|
||||
}
|
||||
|
||||
+static int ov5640_get_af_status(struct ov5640_dev *sensor)
|
||||
+{
|
||||
+ u8 fw_status;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = ov5640_read_reg(sensor, OV5640_REG_FW_STATUS, &fw_status);
|
||||
+ if (ret)
|
||||
+ return ret;
|
||||
+
|
||||
+ switch (fw_status) {
|
||||
+ case OV5640_FW_STATUS_S_FIRMWARE:
|
||||
+ case OV5640_FW_STATUS_S_STARTUP:
|
||||
+ return V4L2_AUTO_FOCUS_STATUS_FAILED;
|
||||
+ break;
|
||||
+ case OV5640_FW_STATUS_S_IDLE:
|
||||
+ return V4L2_AUTO_FOCUS_STATUS_IDLE;
|
||||
+ break;
|
||||
+ case OV5640_FW_STATUS_S_FOCUSED:
|
||||
+ return V4L2_AUTO_FOCUS_STATUS_REACHED;
|
||||
+ break;
|
||||
+ default:
|
||||
+ return V4L2_AUTO_FOCUS_STATUS_BUSY;
|
||||
+ break;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static int ov5640_g_volatile_ctrl(struct v4l2_ctrl *ctrl)
|
||||
{
|
||||
struct v4l2_subdev *sd = ctrl_to_sd(ctrl);
|
||||
@@ -2659,6 +2880,12 @@ static int ov5640_g_volatile_ctrl(struct v4l2_ctrl *ctrl)
|
||||
return val;
|
||||
sensor->ctrls.exposure->val = val;
|
||||
break;
|
||||
+ case V4L2_CID_FOCUS_AUTO:
|
||||
+ val = ov5640_get_af_status(sensor);
|
||||
+ if (val < 0)
|
||||
+ return val;
|
||||
+ sensor->ctrls.af_status->val = val;
|
||||
+ break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -2690,6 +2917,18 @@ static int ov5640_s_ctrl(struct v4l2_ctrl *ctrl)
|
||||
case V4L2_CID_AUTO_WHITE_BALANCE:
|
||||
ret = ov5640_set_ctrl_white_balance(sensor, ctrl->val);
|
||||
break;
|
||||
+ case V4L2_CID_FOCUS_AUTO:
|
||||
+ if (ctrl->val)
|
||||
+ ret = ov5640_set_ctrl_focus(sensor, OV5640_FW_CMD_CONTINUOUS_FOCUS);
|
||||
+ else
|
||||
+ ret = ov5640_set_ctrl_focus(sensor, OV5640_FW_CMD_RELEASE_FOCUS);
|
||||
+ break;
|
||||
+ case V4L2_CID_AUTO_FOCUS_START:
|
||||
+ ret = ov5640_set_ctrl_focus(sensor, OV5640_FW_CMD_TRIGGER_FOCUS);
|
||||
+ break;
|
||||
+ case V4L2_CID_AUTO_FOCUS_STOP:
|
||||
+ ret = ov5640_set_ctrl_focus(sensor, OV5640_FW_CMD_RELEASE_FOCUS);
|
||||
+ break;
|
||||
case V4L2_CID_HUE:
|
||||
ret = ov5640_set_ctrl_hue(sensor, ctrl->val);
|
||||
break;
|
||||
@@ -2762,6 +3001,20 @@ static int ov5640_init_controls(struct ov5640_dev *sensor)
|
||||
ctrls->gain = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_GAIN,
|
||||
0, 1023, 1, 0);
|
||||
|
||||
+ /* Autofocus */
|
||||
+ ctrls->focus_auto = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_FOCUS_AUTO,
|
||||
+ 0, 1, 1, 0);
|
||||
+ ctrls->af_start = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_AUTO_FOCUS_START,
|
||||
+ 0, 1, 1, 0);
|
||||
+ ctrls->af_stop = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_AUTO_FOCUS_STOP,
|
||||
+ 0, 1, 1, 0);
|
||||
+ ctrls->af_status = v4l2_ctrl_new_std(hdl, ops,
|
||||
+ V4L2_CID_AUTO_FOCUS_STATUS, 0,
|
||||
+ (V4L2_AUTO_FOCUS_STATUS_BUSY |
|
||||
+ V4L2_AUTO_FOCUS_STATUS_REACHED |
|
||||
+ V4L2_AUTO_FOCUS_STATUS_FAILED),
|
||||
+ 0, V4L2_AUTO_FOCUS_STATUS_IDLE);
|
||||
+
|
||||
ctrls->saturation = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_SATURATION,
|
||||
0, 255, 1, 64);
|
||||
ctrls->hue = v4l2_ctrl_new_std(hdl, ops, V4L2_CID_HUE,
|
||||
@@ -2795,6 +3048,7 @@ static int ov5640_init_controls(struct ov5640_dev *sensor)
|
||||
v4l2_ctrl_auto_cluster(3, &ctrls->auto_wb, 0, false);
|
||||
v4l2_ctrl_auto_cluster(2, &ctrls->auto_gain, 0, true);
|
||||
v4l2_ctrl_auto_cluster(2, &ctrls->auto_exp, 1, true);
|
||||
+ v4l2_ctrl_cluster(4, &ctrls->focus_auto);
|
||||
|
||||
sensor->sd.ctrl_handler = hdl;
|
||||
return 0;
|
||||
--
|
||||
2.25.4
|
||||
|
12
sys-kernel/pinephone-sources/files/panic-led-5.12.patch
Normal file
12
sys-kernel/pinephone-sources/files/panic-led-5.12.patch
Normal file
@ -0,0 +1,12 @@
|
||||
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
|
||||
index 1c555456b..05fab5d79 100644
|
||||
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
|
||||
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinephone.dtsi
|
||||
@@ -78,6 +78,7 @@ green {
|
||||
};
|
||||
|
||||
led-2 {
|
||||
+ linux,default-trigger = "panic";
|
||||
function = LED_FUNCTION_INDICATOR;
|
||||
color = <LED_COLOR_ID_RED>;
|
||||
gpios = <&pio 3 19 GPIO_ACTIVE_HIGH>; /* PD19 */
|
@ -0,0 +1,21 @@
|
||||
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
|
||||
index a72c2ec8c..b3a7bef13 100644
|
||||
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
|
||||
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
|
||||
@@ -227,7 +227,15 @@ &i2c0_pins {
|
||||
&i2c1 {
|
||||
status = "okay";
|
||||
|
||||
- /* TODO: add Bochs BMA223 accelerometer here */
|
||||
+ bma223@18 {
|
||||
+ compatible = "bosch,bma223", "bosch,bma222e";
|
||||
+ reg = <0x18>;
|
||||
+ interrupt-parent = <&pio>;
|
||||
+ interrupts = <7 5 IRQ_TYPE_LEVEL_HIGH>; /* PH5 */
|
||||
+ mount-matrix = "0", "-1", "0",
|
||||
+ "-1", "0", "0",
|
||||
+ "0", "0", "-1";
|
||||
+ };
|
||||
};
|
||||
|
||||
&lradc {
|
44
sys-kernel/pinephone-sources/files/pinetab-bluetooth.patch
Normal file
44
sys-kernel/pinephone-sources/files/pinetab-bluetooth.patch
Normal file
@ -0,0 +1,44 @@
|
||||
From 330d05da1b6e8118c9c4655f0b234cf32a2f1ce4 Mon Sep 17 00:00:00 2001
|
||||
From: Icenowy Zheng <icenowy@aosc.io>
|
||||
Date: Sun, 14 Apr 2019 23:46:47 +0800
|
||||
Subject: [PATCH] arm64: allwinner: a64: pinetab: enable RTL8723CS bluetooth
|
||||
|
||||
PineTab has a RTL8723CS Wi-Fi/BT combo chip on board, the bluetooth part
|
||||
of it communicates with A64 via UART, and the power of it is controlled
|
||||
with some GPIO at PL bank.
|
||||
|
||||
Enable the bluetooth in the device tree.
|
||||
|
||||
Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
|
||||
---
|
||||
.../boot/dts/allwinner/sun50i-a64-pinetab.dts | 14 ++++++++++++++
|
||||
1 file changed, 14 insertions(+)
|
||||
|
||||
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
|
||||
index 84d6e8cb2b88..e8b823875740 100644
|
||||
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
|
||||
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts
|
||||
@@ -447,6 +447,20 @@ &uart0 {
|
||||
status = "okay";
|
||||
};
|
||||
|
||||
+&uart1 {
|
||||
+ pinctrl-names = "default";
|
||||
+ pinctrl-0 = <&uart1_pins>, <&uart1_rts_cts_pins>;
|
||||
+ status = "okay";
|
||||
+
|
||||
+ bluetooth {
|
||||
+ compatible = "realtek,rtl8723cs-bt";
|
||||
+ reset-gpios = <&r_pio 0 4 GPIO_ACTIVE_LOW>; /* PL4 */
|
||||
+ device-wake-gpios = <&r_pio 0 5 GPIO_ACTIVE_LOW>; /* PL5 */
|
||||
+ host-wake-gpios = <&r_pio 0 6 GPIO_ACTIVE_HIGH>; /* PL6 */
|
||||
+ firmware-postfix = "pinebook";
|
||||
+ };
|
||||
+};
|
||||
+
|
||||
&usb_otg {
|
||||
dr_mode = "otg";
|
||||
status = "okay";
|
||||
--
|
||||
GitLab
|
||||
|
82
sys-kernel/pinephone-sources/pinephone-sources-5.13.5.ebuild
Normal file
82
sys-kernel/pinephone-sources/pinephone-sources-5.13.5.ebuild
Normal file
@ -0,0 +1,82 @@
|
||||
# Copyright 1999-2021 Gentoo Authors
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
EAPI="6"
|
||||
K_NOUSENAME="yes"
|
||||
K_NOSETEXTRAVERSION="yes"
|
||||
K_SECURITY_UNSUPPORTED="1"
|
||||
ETYPE="sources"
|
||||
inherit kernel-2
|
||||
detect_version
|
||||
|
||||
KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~mips ~ppc ~ppc64 ~s390 ~sparc ~x86"
|
||||
# Copyright 1999-2021 Gentoo Authors
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
|
||||
DEPEND="${RDEPEND}
|
||||
>=sys-devel/patch-2.7.5"
|
||||
|
||||
DESCRIPTION="Full sources for the Linux kernel, with megi's patch for pinephone"
|
||||
|
||||
MEGI_PATCH_URI="https://xff.cz/kernels/${PV:0:4}/patches/all.patch"
|
||||
SRC_URI="${KERNEL_URI} ${MEGI_PATCH_URI} -> all-${PV}.patch"
|
||||
|
||||
PATCHES=(
|
||||
${DISTDIR}/all-${PV}.patch
|
||||
${FILESDIR}/enable-hdmi-output-pinetab.patch
|
||||
${FILESDIR}/enable-jack-detection-pinetab.patch
|
||||
${FILESDIR}/pinetab-bluetooth.patch
|
||||
${FILESDIR}/pinetab-accelerometer.patch
|
||||
${FILESDIR}/dts-pinephone-drop-modem-power-node.patch
|
||||
#${FILESDIR}/dts-headphone-jack-detection.patch
|
||||
${FILESDIR}/media-ov5640-Implement-autofocus.patch
|
||||
${FILESDIR}/0011-dts-pinetab-hardcode-mmc-numbers.patch
|
||||
#${FILESDIR}/0012-pinephone-fix-pogopin-i2c.patch
|
||||
${FILESDIR}/0107-quirk-kernel-org-bug-210681-firmware_rome_error.patch
|
||||
${FILESDIR}/0177-leds-gpio-make-max_brightness-configurable.patch
|
||||
#${FILESDIR}/0178-sun8i-codec-fix-headphone-jack-pin-name.patch
|
||||
#${FILESDIR}/0179-arm64-dts-allwinner-pinephone-improve-device-tree-5.12.patch
|
||||
${FILESDIR}/panic-led-5.12.patch
|
||||
#${FILESDIR}/improve-jack-button-handling-and-mic.patch
|
||||
|
||||
${FILESDIR}/PATCH-v3-01-14-include-linux-memcontrol.h-do-not-warn-in-page_memcg_rcu-if-CONFIG_MEMCG.patch
|
||||
${FILESDIR}/PATCH-v3-02-14-include-linux-nodemask.h-define-next_memory_node-if-CONFIG_NUMA.patch
|
||||
${FILESDIR}/PATCH-v3-03-14-include-linux-cgroup.h-export-cgroup_mutex.patch
|
||||
${FILESDIR}/PATCH-v3-04-14-mm-x86-support-the-access-bit-on-non-leaf-PMD-entries.patch
|
||||
${FILESDIR}/PATCH-v3-05-14-mm-vmscan.c-refactor-shrink_node.patch
|
||||
${FILESDIR}/PATCH-v3-06-14-mm-workingset.c-refactor-pack_shadow-and-unpack_shadow.patch
|
||||
${FILESDIR}/PATCH-v3-07-14-mm-multigenerational-lru-groundwork.patch
|
||||
${FILESDIR}/PATCH-v3-08-14-mm-multigenerational-lru-activation.patch
|
||||
${FILESDIR}/PATCH-v3-09-14-mm-multigenerational-lru-mm_struct-list.patch
|
||||
${FILESDIR}/PATCH-v3-10-14-mm-multigenerational-lru-aging.patch
|
||||
${FILESDIR}/PATCH-v3-11-14-mm-multigenerational-lru-eviction.patch
|
||||
${FILESDIR}/PATCH-v3-12-14-mm-multigenerational-lru-user-interface.patch
|
||||
${FILESDIR}/PATCH-v3-13-14-mm-multigenerational-lru-Kconfig.patch
|
||||
${FILESDIR}/PATCH-v3-14-14-mm-multigenerational-lru-documentation.patch
|
||||
)
|
||||
|
||||
src_prepare() {
|
||||
default
|
||||
eapply_user
|
||||
}
|
||||
|
||||
pkg_postinst() {
|
||||
kernel-2_pkg_postinst
|
||||
einfo "For more info on this patchset, and how to report problems, see:"
|
||||
einfo "${HOMEPAGE}"
|
||||
einfo "To build the kernel use the following command:"
|
||||
einfo "make Image Image.gz modules"
|
||||
einfo "make DTC_FLAGS="-@" dtbs"
|
||||
einfo "make install; make modules_intall; make dtbs_install"
|
||||
einfo "If you use kernel config coming with this ebuild, don't forget to also copy dracut-pp.conf to /etc/dracut.conf.d/"
|
||||
einfo "to make sure proper kernel modules are loaded into initramfs"
|
||||
einfo "if you want to cross compile pinephone kernel on amd64 host, follow the https://wiki.gentoo.org/wiki/Cross_build_environment"
|
||||
einfo "to setup cross toolchain environment, then create a xmake wrapper like the following, and replace make with xmake in above commands"
|
||||
einfo "#!/bin/sh"
|
||||
einfo "exec make ARCH='arm64' CROSS_COMPILE='aarch64-unknown-linux-gnu-' INSTALL_MOD_PATH='${SYSROOT}' '$@'"
|
||||
}
|
||||
|
||||
pkg_postrm() {
|
||||
kernel-2_pkg_postrm
|
||||
}
|
Loading…
Reference in New Issue
Block a user