diff --git a/sys-kernel/pinephone-sources/files/PATCH-1-4-HID-magicmouse-add-Apple-Magic-Mouse-2-support.patch b/sys-kernel/pinephone-sources/files/PATCH-1-4-HID-magicmouse-add-Apple-Magic-Mouse-2-support.patch new file mode 100644 index 0000000..1611a15 --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-1-4-HID-magicmouse-add-Apple-Magic-Mouse-2-support.patch @@ -0,0 +1,247 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-15.8 required=3.0 tests=BAYES_00,DKIM_SIGNED, + DKIM_VALID,DKIM_VALID_AU,FREEMAIL_FORGED_FROMDOMAIN,FREEMAIL_FROM, + HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, + MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham + autolearn_force=no version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id 4888EC433C1 + for ; Sat, 27 Mar 2021 13:07:07 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 0E6E861981 + for ; Sat, 27 Mar 2021 13:07:07 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S230295AbhC0NGh (ORCPT ); + Sat, 27 Mar 2021 09:06:37 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59740 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S229582AbhC0NGT (ORCPT + ); + Sat, 27 Mar 2021 09:06:19 -0400 +Received: from mail-pg1-x529.google.com (mail-pg1-x529.google.com [IPv6:2607:f8b0:4864:20::529]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 82262C0613B1 + for ; Sat, 27 Mar 2021 06:06:19 -0700 (PDT) +Received: by mail-pg1-x529.google.com with SMTP id v10so6405578pgs.12 + for ; Sat, 27 Mar 2021 06:06:19 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=gmail.com; s=20161025; + h=from:to:cc:subject:date:message-id:in-reply-to:references + :mime-version:content-transfer-encoding; + bh=/43es5lmfTvSMg9V9lh/7OQVghMj1iNxFqwqD88gyCk=; + b=JA8+yZao+x/DmyoiRUpwr0wP9XgaNgDVez40dXm+yEd6Wlgs1dQvO3DkU8n7trJWcL + TCj7NqBp0z4pf3pSHrTxX7rWZX4yRyZJAXo7fqTPqfN2R0PkRIp5gnvcDv+7/BRM4nqx + 3pI6ubgKZ+rxYph8XNAuO94/oOjxgItIhOqYGbLPHwa2eoI60mUbrF/ukBsw8OwQ+Vli + 0siGyaoTCPP/h+9uuHJqQJ1yw6CCkCAxMwZXD79abtLytL6WkhuvoFJ6exRYGHawcHMs + bel32ifzIlv+7ULbcTI2uVNhxvdrD51tRSNrAZ77n+Tk8RivXMeSqSzPVngWZCs0uk6s + JryA== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to + :references:mime-version:content-transfer-encoding; + bh=/43es5lmfTvSMg9V9lh/7OQVghMj1iNxFqwqD88gyCk=; + b=fAhjI90TZfQpcQBqM4rN69d8uN92OH3j+lhm/dYYlmqdchK6ZZsPD3wt6VW8/ObU+0 + BpTic3inOmn0aVasSmAkbNxaVAUJ339klb/WnO9RfaemBLXDCBMgGjVr+ofhpIbfKxiZ + 0aBswW4Dc2uY39zmxm7wtJ2sRHHwj/Ltdt7B+NYes7Kzohvfg98YLvm8I5mloimR02U9 + HRlPKK2YbMcZ5i2Y8Q3faX8356caUUU7l91utK4EXdrVFCbNftXBEmRej6gXSZudCBga + 7w6Rgymaox0hfMZzYLWtJJp2fo3BcKA4+TD6bJ1yrxIdPmK59QMGoyMUIKqTIZIjN2c/ + gvpg== +X-Gm-Message-State: AOAM531lA6V8bOmQPsuLmZx3iv59gcixbI4HEH5eqWzOJ/N3DRaX/hb9 + NavPhvckezEkR22O7uWWvZAUxOplQlRwSsX5 +X-Google-Smtp-Source: ABdhPJyaSIYZWu4pp8j7TnxkxYd0BP77HzgDaIZFIDeoL910Tkv+L4VuoQLEw0GNu+5Zxi80enV/YQ== +X-Received: by 2002:a65:498b:: with SMTP id r11mr16491362pgs.364.1616850378733; + Sat, 27 Mar 2021 06:06:18 -0700 (PDT) +Received: from johnchen902-arch-ryzen.. (2001-b011-3815-3a1f-9afa-9bff-fe6e-3ce2.dynamic-ip6.hinet.net. [2001:b011:3815:3a1f:9afa:9bff:fe6e:3ce2]) + by smtp.gmail.com with ESMTPSA id ot17sm6413787pjb.50.2021.03.27.06.06.17 + (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); + Sat, 27 Mar 2021 06:06:18 -0700 (PDT) +From: John Chen +To: linux-kernel@vger.kernel.org +Cc: Rohit Pidaparthi , + RicardoEPRodrigues , + Jiri Kosina , + Benjamin Tissoires , + John Chen +Subject: [PATCH 1/4] HID: magicmouse: add Apple Magic Mouse 2 support +Date: Sat, 27 Mar 2021 21:05:05 +0800 +Message-Id: <20210327130508.24849-2-johnchen902@gmail.com> +X-Mailer: git-send-email 2.31.0 +In-Reply-To: <20210327130508.24849-1-johnchen902@gmail.com> +References: <20210327130508.24849-1-johnchen902@gmail.com> +MIME-Version: 1.0 +Content-Transfer-Encoding: 8bit +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +Bluetooth device + Vendor 004c (Apple) + Device 0269 (Magic Mouse 2) + +Add support for Apple Magic Mouse 2, putting the device in multi-touch +mode. + +Co-authored-by: Rohit Pidaparthi +Co-authored-by: RicardoEPRodrigues +Signed-off-by: John Chen +--- + drivers/hid/hid-ids.h | 1 + + drivers/hid/hid-magicmouse.c | 53 ++++++++++++++++++++++++++++++++---- + 2 files changed, 49 insertions(+), 5 deletions(-) + +diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h +index e42aaae3138f..fa0edf03570a 100644 +--- a/drivers/hid/hid-ids.h ++++ b/drivers/hid/hid-ids.h +@@ -93,6 +93,7 @@ + #define BT_VENDOR_ID_APPLE 0x004c + #define USB_DEVICE_ID_APPLE_MIGHTYMOUSE 0x0304 + #define USB_DEVICE_ID_APPLE_MAGICMOUSE 0x030d ++#define USB_DEVICE_ID_APPLE_MAGICMOUSE2 0x0269 + #define USB_DEVICE_ID_APPLE_MAGICTRACKPAD 0x030e + #define USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 0x0265 + #define USB_DEVICE_ID_APPLE_FOUNTAIN_ANSI 0x020e +diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c +index abd86903875f..7aad6ca56780 100644 +--- a/drivers/hid/hid-magicmouse.c ++++ b/drivers/hid/hid-magicmouse.c +@@ -54,6 +54,7 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state fie + #define TRACKPAD2_USB_REPORT_ID 0x02 + #define TRACKPAD2_BT_REPORT_ID 0x31 + #define MOUSE_REPORT_ID 0x29 ++#define MOUSE2_REPORT_ID 0x12 + #define DOUBLE_REPORT_ID 0xf7 + /* These definitions are not precise, but they're close enough. (Bits + * 0x03 seem to indicate the aspect ratio of the touch, bits 0x70 seem +@@ -195,7 +196,8 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda + int id, x, y, size, orientation, touch_major, touch_minor, state, down; + int pressure = 0; + +- if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) { ++ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE || ++ input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) { + id = (tdata[6] << 2 | tdata[5] >> 6) & 0xf; + x = (tdata[1] << 28 | tdata[0] << 20) >> 20; + y = -((tdata[2] << 24 | tdata[1] << 16) >> 20); +@@ -296,7 +298,8 @@ static void magicmouse_emit_touch(struct magicmouse_sc *msc, int raw_id, u8 *tda + input_report_abs(input, ABS_MT_PRESSURE, pressure); + + if (report_undeciphered) { +- if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) ++ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE || ++ input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) + input_event(input, EV_MSC, MSC_RAW, tdata[7]); + else if (input->id.product != + USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) +@@ -380,6 +383,34 @@ static int magicmouse_raw_event(struct hid_device *hdev, + * ts = data[3] >> 6 | data[4] << 2 | data[5] << 10; + */ + break; ++ case MOUSE2_REPORT_ID: ++ /* Size is either 8 or (14 + 8 * N) */ ++ if (size != 8 && (size < 14 || (size - 14) % 8 != 0)) ++ return 0; ++ npoints = (size - 14) / 8; ++ if (npoints > 15) { ++ hid_warn(hdev, "invalid size value (%d) for MOUSE2_REPORT_ID\n", ++ size); ++ return 0; ++ } ++ msc->ntouches = 0; ++ for (ii = 0; ii < npoints; ii++) ++ magicmouse_emit_touch(msc, ii, data + ii * 8 + 14); ++ ++ /* When emulating three-button mode, it is important ++ * to have the current touch information before ++ * generating a click event. ++ */ ++ x = (int)((data[3] << 24) | (data[2] << 16)) >> 16; ++ y = (int)((data[5] << 24) | (data[4] << 16)) >> 16; ++ clicks = data[1]; ++ ++ /* The following bits provide a device specific timestamp. They ++ * are unused here. ++ * ++ * ts = data[11] >> 6 | data[12] << 2 | data[13] << 10; ++ */ ++ break; + case DOUBLE_REPORT_ID: + /* Sometimes the trackpad sends two touch reports in one + * packet. +@@ -392,7 +423,8 @@ static int magicmouse_raw_event(struct hid_device *hdev, + return 0; + } + +- if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) { ++ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE || ++ input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) { + magicmouse_emit_buttons(msc, clicks & 3); + input_report_rel(input, REL_X, x); + input_report_rel(input, REL_Y, y); +@@ -415,7 +447,8 @@ static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hd + + __set_bit(EV_KEY, input->evbit); + +- if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) { ++ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE || ++ input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) { + __set_bit(BTN_LEFT, input->keybit); + __set_bit(BTN_RIGHT, input->keybit); + if (emulate_3button) +@@ -480,7 +513,8 @@ static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hd + * the origin at the same position, and just uses the additive + * inverse of the reported Y. + */ +- if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) { ++ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE || ++ input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) { + input_set_abs_params(input, ABS_MT_ORIENTATION, -31, 32, 1, 0); + input_set_abs_params(input, ABS_MT_POSITION_X, + MOUSE_MIN_X, MOUSE_MAX_X, 4, 0); +@@ -586,6 +620,7 @@ static int magicmouse_probe(struct hid_device *hdev, + { + const u8 *feature; + const u8 feature_mt[] = { 0xD7, 0x01 }; ++ const u8 feature_mt_mouse2[] = { 0xF1, 0x02, 0x01 }; + const u8 feature_mt_trackpad2_usb[] = { 0x02, 0x01 }; + const u8 feature_mt_trackpad2_bt[] = { 0xF1, 0x02, 0x01 }; + u8 *buf; +@@ -631,6 +666,9 @@ static int magicmouse_probe(struct hid_device *hdev, + if (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE) + report = hid_register_report(hdev, HID_INPUT_REPORT, + MOUSE_REPORT_ID, 0); ++ else if (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) ++ report = hid_register_report(hdev, HID_INPUT_REPORT, ++ MOUSE2_REPORT_ID, 0); + else if (id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) { + if (id->vendor == BT_VENDOR_ID_APPLE) + report = hid_register_report(hdev, HID_INPUT_REPORT, +@@ -660,6 +698,9 @@ static int magicmouse_probe(struct hid_device *hdev, + feature_size = sizeof(feature_mt_trackpad2_usb); + feature = feature_mt_trackpad2_usb; + } ++ } else if (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) { ++ feature_size = sizeof(feature_mt_mouse2); ++ feature = feature_mt_mouse2; + } else { + feature_size = sizeof(feature_mt); + feature = feature_mt; +@@ -696,6 +737,8 @@ static int magicmouse_probe(struct hid_device *hdev, + static const struct hid_device_id magic_mice[] = { + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, + USB_DEVICE_ID_APPLE_MAGICMOUSE), .driver_data = 0 }, ++ { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, ++ USB_DEVICE_ID_APPLE_MAGICMOUSE2), .driver_data = 0 }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, + USB_DEVICE_ID_APPLE_MAGICTRACKPAD), .driver_data = 0 }, + { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, +-- +2.31.0 + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-2-4-HID-magicmouse-fix-3-button-emulation-of-Mouse-2.patch b/sys-kernel/pinephone-sources/files/PATCH-2-4-HID-magicmouse-fix-3-button-emulation-of-Mouse-2.patch new file mode 100644 index 0000000..bc2276d --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-2-4-HID-magicmouse-fix-3-button-emulation-of-Mouse-2.patch @@ -0,0 +1,134 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-15.8 required=3.0 tests=BAYES_00,DKIM_SIGNED, + DKIM_VALID,DKIM_VALID_AU,FREEMAIL_FORGED_FROMDOMAIN,FREEMAIL_FROM, + HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, + MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham + autolearn_force=no version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id 06C18C433E1 + for ; Sat, 27 Mar 2021 13:07:08 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id D1CE16193D + for ; Sat, 27 Mar 2021 13:07:07 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S230328AbhC0NGi (ORCPT ); + Sat, 27 Mar 2021 09:06:38 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59770 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S230266AbhC0NG1 (ORCPT + ); + Sat, 27 Mar 2021 09:06:27 -0400 +Received: from mail-pl1-x634.google.com (mail-pl1-x634.google.com [IPv6:2607:f8b0:4864:20::634]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5086BC0613B1 + for ; Sat, 27 Mar 2021 06:06:27 -0700 (PDT) +Received: by mail-pl1-x634.google.com with SMTP id h8so2235029plt.7 + for ; Sat, 27 Mar 2021 06:06:27 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=gmail.com; s=20161025; + h=from:to:cc:subject:date:message-id:in-reply-to:references + :mime-version:content-transfer-encoding; + bh=NeWUvZBV3NAy1b0eckELIbBZ7sti/n1sLYnD4r2cjaU=; + b=V7uM0AaI1Vy/mmqpuTVu5F6+98YPDzOa3QS6tRkWeJqhrflMONfCXtOxXVR+CeiPil + OOfaxOtAMeVEW9wE0EU3U/8aNghtzuUvVN+0Tj57+W+4g0ilQOODiDLDu4ZqAo1Q5eDZ + gA+He13KWVwNYaYTNUNParLXG5GYDbblaqABSUDurI1FTjn1US0ZZytlzdZy1GfL9eTj + 6AiiVM3A4YdUGUWE7qQQE8jI92o4qKYvaNjn1M+d5ypKCue3NJWeRTSPKLu0QD2qL02+ + QPga2RPtmLpztA8/lPGTRpgVNY3C5jdCBZyWgFtvZg5dNoDfe5bQnAmF2J2ka+A7JBSD + VHtw== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to + :references:mime-version:content-transfer-encoding; + bh=NeWUvZBV3NAy1b0eckELIbBZ7sti/n1sLYnD4r2cjaU=; + b=OQek2lJ5JINezfYdN/FzSPFL1N9Hrs+KstU7K4gEHavdffvSAOBebg2MG5VSzkf93H + o1iOiAOoXY7cx7j7Vx5CFZUuJOLilpC6gPTJpZlaP8YtEFfGkPaUPPh5FSTyM463Sir8 + n6DupTSrFUI1y44GOBZ2bM2pf9hRN1Yj1oiCT6upmfoHw0/PaKEZt5aOEI8se7HRJp94 + td6+SEZok3uxKEglKEqAG8cnj7Pt4tKVQlg+MI1AQDLQ/ytdYJlMPmrqVyNpnsv44wYa + dxBf0TaMvqn9SYDIDcGct3toAVm5DfVUqXm1nkYcYMOdvPrmLoH52NtCyi5cYC+2TR6i + jUpA== +X-Gm-Message-State: AOAM532sXgN0NNpKjilSMBewUXwwXz+MOfd7J5FRI6zAWA5st7gy5LmE + Sw/QHj4cm3zT07LU1kWYSO9puwFV+yK0Hquf +X-Google-Smtp-Source: ABdhPJyDnhcP7BeBHXX2rPqMXwkOQiZdussDPATmYqyQnp7HAsi0OqWSUVIloMNi3QBpMsmjXTtyew== +X-Received: by 2002:a17:903:2285:b029:e6:faf5:eaff with SMTP id b5-20020a1709032285b02900e6faf5eaffmr19574014plh.70.1616850386727; + Sat, 27 Mar 2021 06:06:26 -0700 (PDT) +Received: from johnchen902-arch-ryzen.. (2001-b011-3815-3a1f-9afa-9bff-fe6e-3ce2.dynamic-ip6.hinet.net. [2001:b011:3815:3a1f:9afa:9bff:fe6e:3ce2]) + by smtp.gmail.com with ESMTPSA id ot17sm6413787pjb.50.2021.03.27.06.06.25 + (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); + Sat, 27 Mar 2021 06:06:26 -0700 (PDT) +From: John Chen +To: linux-kernel@vger.kernel.org +Cc: Rohit Pidaparthi , + RicardoEPRodrigues , + Jiri Kosina , + Benjamin Tissoires , + John Chen +Subject: [PATCH 2/4] HID: magicmouse: fix 3 button emulation of Mouse 2 +Date: Sat, 27 Mar 2021 21:05:06 +0800 +Message-Id: <20210327130508.24849-3-johnchen902@gmail.com> +X-Mailer: git-send-email 2.31.0 +In-Reply-To: <20210327130508.24849-1-johnchen902@gmail.com> +References: <20210327130508.24849-1-johnchen902@gmail.com> +MIME-Version: 1.0 +Content-Transfer-Encoding: 8bit +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +It is observed that, with 3 button emulation, when middle button is +clicked, either the left button or right button is clicked as well. It +is caused by hidinput "correctly" acting on the event, oblivious to the +3 button emulation. + +As raw_event has taken care of everything, no further processing is +needed. However, the only way to stop at raw_event is to return an error +(negative) value. Therefore, the processing is stopped at event instead. + +Signed-off-by: John Chen +--- + drivers/hid/hid-magicmouse.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c +index 7aad6ca56780..c646b4cd3783 100644 +--- a/drivers/hid/hid-magicmouse.c ++++ b/drivers/hid/hid-magicmouse.c +@@ -440,6 +440,21 @@ static int magicmouse_raw_event(struct hid_device *hdev, + return 1; + } + ++static int magicmouse_event(struct hid_device *hdev, struct hid_field *field, ++ struct hid_usage *usage, __s32 value) ++{ ++ struct magicmouse_sc *msc = hid_get_drvdata(hdev); ++ if (msc->input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE2 && ++ field->report->id == MOUSE2_REPORT_ID) { ++ // magic_mouse_raw_event has done all the work. Skip hidinput. ++ // ++ // Specifically, hidinput may modify BTN_LEFT and BTN_RIGHT, ++ // breaking emulate_3button. ++ return 1; ++ } ++ return 0; ++} ++ + static int magicmouse_setup_input(struct input_dev *input, struct hid_device *hdev) + { + int error; +@@ -754,6 +769,7 @@ static struct hid_driver magicmouse_driver = { + .id_table = magic_mice, + .probe = magicmouse_probe, + .raw_event = magicmouse_raw_event, ++ .event = magicmouse_event, + .input_mapping = magicmouse_input_mapping, + .input_configured = magicmouse_input_configured, + }; +-- +2.31.0 + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-3-4-HID-magicmouse-fix-reconnection-of-Magic-Mouse-2.patch b/sys-kernel/pinephone-sources/files/PATCH-3-4-HID-magicmouse-fix-reconnection-of-Magic-Mouse-2.patch new file mode 100644 index 0000000..04dbaff --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-3-4-HID-magicmouse-fix-reconnection-of-Magic-Mouse-2.patch @@ -0,0 +1,265 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-15.8 required=3.0 tests=BAYES_00,DKIM_SIGNED, + DKIM_VALID,DKIM_VALID_AU,FREEMAIL_FORGED_FROMDOMAIN,FREEMAIL_FROM, + HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, + MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS,USER_AGENT_GIT autolearn=ham + autolearn_force=no version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id 9A212C433DB + for ; Sat, 27 Mar 2021 13:10:34 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 60FCC61981 + for ; Sat, 27 Mar 2021 13:10:34 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S230394AbhC0NHJ (ORCPT ); + Sat, 27 Mar 2021 09:07:09 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:59810 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S230307AbhC0NGi (ORCPT + ); + Sat, 27 Mar 2021 09:06:38 -0400 +Received: from mail-pf1-x432.google.com (mail-pf1-x432.google.com [IPv6:2607:f8b0:4864:20::432]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1EDFCC0613B1 + for ; Sat, 27 Mar 2021 06:06:38 -0700 (PDT) +Received: by mail-pf1-x432.google.com with SMTP id q5so6741894pfh.10 + for ; Sat, 27 Mar 2021 06:06:38 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=gmail.com; s=20161025; + h=from:to:cc:subject:date:message-id:in-reply-to:references + :mime-version:content-transfer-encoding; + bh=fWEWnDB7IS15Aoqul4RZDergwEtbUe4NAH8lKjv7p/s=; + b=CGLrSHoDnG8b5CL6asLWP1Ym/QFl+wtwIF8PhKlW7RJ5IhavVtdO6Fd7/cY/3GQTDa + wvX9Q1wfBsakVlG9/sM9CuozOsra6Ec9c1B+0beWTAKj/tBjwvsVHtMoCiqOPL/Vbig6 + 4zkWMb6dwWSzAgmCqPEaYlyJYqBrDLzzXxqGhchwTfcNgNZQGq0xhh7tZsukEPz4XLIC + LNCy6+hPSVdRG1ADbyPpOGFn3fSeFs5KAwl3y1Cn0TvTPxgpckTLcFz5TsTF/w7VLGW1 + bn9Gakn+MaATqxahU0lDwyzI1sMK2er7/ddjV9VugYN4PzgL9DHGu/iGzXGFftDoLdaJ + tBIQ== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to + :references:mime-version:content-transfer-encoding; + bh=fWEWnDB7IS15Aoqul4RZDergwEtbUe4NAH8lKjv7p/s=; + b=PQiPlj7RSTzmBU6u/2xzL9qv8jrelC7cJFFiOHjwKfz43PMzm0nEj6PxY5ZFMSjmbs + JEfC8iDjJh39FJdthBrvaZX4yuTv4QmOdmRMWrN77sQYbZOaKOhbNrCx2/LdHzAFjLBY + qTHW0+siiP/ATBf1M0cSP200UZAjBwU8MRapxAlaIUmlrfr5+oM8ZrL2tMhzDYcn5b51 + TwXEVVI5Ep0YZxyGYQ04yaMBZxb1hSKev6UhrFpk96Ukg4IY3qBQBRpjWHIWqZY21aUl + EeDLmlWZaqDbp6UQQrAd2p1kIVyrxKD2Cf4aPnk2JcvzR9qGfMwV8cpR9rqwrXBEiyLj + KZFg== +X-Gm-Message-State: AOAM532lFsZyg8BiLek2pS5Ftc0rOopeD1Q9b7d5Lc7gC8pPIjHcnizK + 2/grg+4GExN9zVerojORiZgGkTwU1/c2DswO +X-Google-Smtp-Source: ABdhPJwECFbuV2SwesS0pF6L0s23ghF61g6whXAjcLZpxYe6b6OsgENBMa3gmTj9FFMF+68uJYhPPw== +X-Received: by 2002:a63:1d26:: with SMTP id d38mr17032822pgd.385.1616850397389; + Sat, 27 Mar 2021 06:06:37 -0700 (PDT) +Received: from johnchen902-arch-ryzen.. (2001-b011-3815-3a1f-9afa-9bff-fe6e-3ce2.dynamic-ip6.hinet.net. [2001:b011:3815:3a1f:9afa:9bff:fe6e:3ce2]) + by smtp.gmail.com with ESMTPSA id ot17sm6413787pjb.50.2021.03.27.06.06.36 + (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); + Sat, 27 Mar 2021 06:06:37 -0700 (PDT) +From: John Chen +To: linux-kernel@vger.kernel.org +Cc: Rohit Pidaparthi , + RicardoEPRodrigues , + Jiri Kosina , + Benjamin Tissoires , + John Chen +Subject: [PATCH 3/4] HID: magicmouse: fix reconnection of Magic Mouse 2 +Date: Sat, 27 Mar 2021 21:05:07 +0800 +Message-Id: <20210327130508.24849-4-johnchen902@gmail.com> +X-Mailer: git-send-email 2.31.0 +In-Reply-To: <20210327130508.24849-1-johnchen902@gmail.com> +References: <20210327130508.24849-1-johnchen902@gmail.com> +MIME-Version: 1.0 +Content-Transfer-Encoding: 8bit +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +It is observed that the Magic Mouse 2 would not enter multi-touch mode +unless the mouse is connected before loading the module. It seems to be +a quirk specific to Magic Mouse 2 + +Retrying after 500ms fixes the problem for me. The delay can't be +reduced much further --- 300ms didn't work for me. Retrying immediately +after receiving an event didn't work either. + +Signed-off-by: John Chen +--- + drivers/hid/hid-magicmouse.c | 93 ++++++++++++++++++++++++------------ + 1 file changed, 63 insertions(+), 30 deletions(-) + +diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c +index c646b4cd3783..69aefef9fe07 100644 +--- a/drivers/hid/hid-magicmouse.c ++++ b/drivers/hid/hid-magicmouse.c +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + + #include "hid-ids.h" + +@@ -128,6 +129,9 @@ struct magicmouse_sc { + u8 size; + } touches[16]; + int tracking_ids[16]; ++ ++ struct hid_device *hdev; ++ struct delayed_work work; + }; + + static int magicmouse_firm_touch(struct magicmouse_sc *msc) +@@ -629,9 +633,7 @@ static int magicmouse_input_configured(struct hid_device *hdev, + return 0; + } + +- +-static int magicmouse_probe(struct hid_device *hdev, +- const struct hid_device_id *id) ++static int magicmouse_enable_multitouch(struct hid_device *hdev) + { + const u8 *feature; + const u8 feature_mt[] = { 0xD7, 0x01 }; +@@ -639,10 +641,52 @@ static int magicmouse_probe(struct hid_device *hdev, + const u8 feature_mt_trackpad2_usb[] = { 0x02, 0x01 }; + const u8 feature_mt_trackpad2_bt[] = { 0xF1, 0x02, 0x01 }; + u8 *buf; ++ int ret; ++ int feature_size; ++ ++ if (hdev->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) { ++ if (hdev->vendor == BT_VENDOR_ID_APPLE) { ++ feature_size = sizeof(feature_mt_trackpad2_bt); ++ feature = feature_mt_trackpad2_bt; ++ } else { /* USB_VENDOR_ID_APPLE */ ++ feature_size = sizeof(feature_mt_trackpad2_usb); ++ feature = feature_mt_trackpad2_usb; ++ } ++ } else if (hdev->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) { ++ feature_size = sizeof(feature_mt_mouse2); ++ feature = feature_mt_mouse2; ++ } else { ++ feature_size = sizeof(feature_mt); ++ feature = feature_mt; ++ } ++ ++ buf = kmemdup(feature, feature_size, GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; ++ ++ ret = hid_hw_raw_request(hdev, buf[0], buf, feature_size, ++ HID_FEATURE_REPORT, HID_REQ_SET_REPORT); ++ kfree(buf); ++ return ret; ++} ++ ++static void magicmouse_enable_mt_work(struct work_struct *work) ++{ ++ struct magicmouse_sc *msc = ++ container_of(work, struct magicmouse_sc, work.work); ++ int ret; ++ ++ ret = magicmouse_enable_multitouch(msc->hdev); ++ if (ret < 0) ++ hid_err(msc->hdev, "unable to request touch data (%d)\n", ret); ++} ++ ++static int magicmouse_probe(struct hid_device *hdev, ++ const struct hid_device_id *id) ++{ + struct magicmouse_sc *msc; + struct hid_report *report; + int ret; +- int feature_size; + + if (id->vendor == USB_VENDOR_ID_APPLE && + id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 && +@@ -656,6 +700,8 @@ static int magicmouse_probe(struct hid_device *hdev, + } + + msc->scroll_accel = SCROLL_ACCEL_DEFAULT; ++ msc->hdev = hdev; ++ INIT_DEFERRABLE_WORK(&msc->work, magicmouse_enable_mt_work); + + msc->quirks = id->driver_data; + hid_set_drvdata(hdev, msc); +@@ -705,28 +751,6 @@ static int magicmouse_probe(struct hid_device *hdev, + } + report->size = 6; + +- if (id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2) { +- if (id->vendor == BT_VENDOR_ID_APPLE) { +- feature_size = sizeof(feature_mt_trackpad2_bt); +- feature = feature_mt_trackpad2_bt; +- } else { /* USB_VENDOR_ID_APPLE */ +- feature_size = sizeof(feature_mt_trackpad2_usb); +- feature = feature_mt_trackpad2_usb; +- } +- } else if (id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) { +- feature_size = sizeof(feature_mt_mouse2); +- feature = feature_mt_mouse2; +- } else { +- feature_size = sizeof(feature_mt); +- feature = feature_mt; +- } +- +- buf = kmemdup(feature, feature_size, GFP_KERNEL); +- if (!buf) { +- ret = -ENOMEM; +- goto err_stop_hw; +- } +- + /* + * Some devices repond with 'invalid report id' when feature + * report switching it into multitouch mode is sent to it. +@@ -735,13 +759,14 @@ static int magicmouse_probe(struct hid_device *hdev, + * but there seems to be no other way of switching the mode. + * Thus the super-ugly hacky success check below. + */ +- ret = hid_hw_raw_request(hdev, buf[0], buf, feature_size, +- HID_FEATURE_REPORT, HID_REQ_SET_REPORT); +- kfree(buf); +- if (ret != -EIO && ret != feature_size) { ++ ret = magicmouse_enable_multitouch(hdev); ++ if (ret != -EIO && ret < 0) { + hid_err(hdev, "unable to request touch data (%d)\n", ret); + goto err_stop_hw; + } ++ if (ret == -EIO && id->product == USB_DEVICE_ID_APPLE_MAGICMOUSE2) { ++ schedule_delayed_work(&msc->work, msecs_to_jiffies(500)); ++ } + + return 0; + err_stop_hw: +@@ -749,6 +774,13 @@ static int magicmouse_probe(struct hid_device *hdev, + return ret; + } + ++static void magicmouse_remove(struct hid_device *hdev) ++{ ++ struct magicmouse_sc *msc = hid_get_drvdata(hdev); ++ cancel_delayed_work_sync(&msc->work); ++ hid_hw_stop(hdev); ++} ++ + static const struct hid_device_id magic_mice[] = { + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, + USB_DEVICE_ID_APPLE_MAGICMOUSE), .driver_data = 0 }, +@@ -768,6 +800,7 @@ static struct hid_driver magicmouse_driver = { + .name = "magicmouse", + .id_table = magic_mice, + .probe = magicmouse_probe, ++ .remove = magicmouse_remove, + .raw_event = magicmouse_raw_event, + .event = magicmouse_event, + .input_mapping = magicmouse_input_mapping, +-- +2.31.0 + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-01-16-include-linux-memcontrol.h-do-not-warn-in-page_memcg_rcu-if-CONFIG_MEMCG.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-01-16-include-linux-memcontrol.h-do-not-warn-in-page_memcg_rcu-if-CONFIG_MEMCG.patch new file mode 100644 index 0000000..1a88a1f --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-01-16-include-linux-memcontrol.h-do-not-warn-in-page_memcg_rcu-if-CONFIG_MEMCG.patch @@ -0,0 +1,146 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id B98A5C43462 + for ; Tue, 13 Apr 2021 06:56:52 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 9970A613D1 + for ; Tue, 13 Apr 2021 06:56:52 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S245186AbhDMG5J (ORCPT ); + Tue, 13 Apr 2021 02:57:09 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44138 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S242333AbhDMG5C (ORCPT + ); + Tue, 13 Apr 2021 02:57:02 -0400 +Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 28542C061574 + for ; Mon, 12 Apr 2021 23:56:43 -0700 (PDT) +Received: by mail-yb1-xb4a.google.com with SMTP id i2so15393704ybl.21 + for ; Mon, 12 Apr 2021 23:56:43 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=uoN+cWnulcs+MZz6Yfoth7IiX8iwaSm44WY0GAxt+Q4=; + b=Ky/g/4nTpvE6H1kNq4Im8vCSVqJJWgdY64updRqr3NGODL/gY7XSLNlMuXa/Yqagpg + 8h8aUIGoWcm6zgtJI5Fw5fMN+PJDxOQb+W3x0OLBhrQ+nOe/aDQ/DaNsTpFLgKXpBR7/ + Nvvw4ruE5Db9uCII9HC5YVMWkv6n0oPwKqmHcIgXqyJRfj6NX9MMyHBXVjqP883hb1k1 + Uts/76AmsciIF0vpEK2WDi/7DTKQWJN38NKXgOIJgZwI3uctZHJ221m0qvGUkZ8xVQ8M + LJm2bY+K9olC9c50QyUPY+bxF/x11l+o56tHmajIr/WsoQoJ64e/eJ6Tpi1C0nsUQsqW + HHBQ== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=uoN+cWnulcs+MZz6Yfoth7IiX8iwaSm44WY0GAxt+Q4=; + b=q4VjA6z3lTu7Y75EQkCaOUnGPrZr+a8VxIneVHg9KIy8GcVnTbV6azYx3iJlfN/mqY + nM4GFUu6opNihX2CTE1sYviNzX90nlsf6Ip3WykacM0NVKoiD/02EGRPQvc0l3EE/8K0 + 43Y8NKqjqKspr7Tjz074a8EJrkBUqhaBpFzDGZwvcg5JCb19/+tTrjWSio3YSp1gtbA+ + 8OB8fTMMZlhaH5pTQWlQnQM3YN8CNJBooHERVgByq78Q7xObvheM9tjTza0hz5coErNv + aLMQMSIT87k3f7EWq0H6qOBAaxbbR8uChrhfVLanXWxhaw/G+ZI5csPO154ctl5A0+5/ + Yc5g== +X-Gm-Message-State: AOAM5311I++jOq9dpMAS7ctzsZDbqRUOtVWfMxjhdktZjjKeusU8mSAv + AjoVQqVxKqAzXcw+CT2fcJSxxzNjPAU= +X-Google-Smtp-Source: ABdhPJyfoyUlusz71TmeoRvttPw/GuUM1FYO9KnbxFJsUN5OFDqRz4J7wq87XkLveCWglWGJeEC6Et9cWvE= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a25:bb41:: with SMTP id b1mr41562657ybk.249.1618297002301; + Mon, 12 Apr 2021 23:56:42 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:18 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-2-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 01/16] include/linux/memcontrol.h: do not warn in + page_memcg_rcu() if !CONFIG_MEMCG +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +page_memcg_rcu() warns on !rcu_read_lock_held() regardless of +CONFIG_MEMCG. The following code is legit, but it triggers the warning +when !CONFIG_MEMCG, since lock_page_memcg() and unlock_page_memcg() +are empty for this config. + + memcg = lock_page_memcg(page1) + (rcu_read_lock() if CONFIG_MEMCG=y) + + do something to page1 + + if (page_memcg_rcu(page2) == memcg) + do something to page2 too as it cannot be migrated away from the + memcg either. + + unlock_page_memcg(page1) + (rcu_read_unlock() if CONFIG_MEMCG=y) + +Locking/unlocking rcu consistently for both configs is rigorous but it +also forces unnecessary locking upon users who have no interest in +CONFIG_MEMCG. + +This patch removes the assertion for !CONFIG_MEMCG, because +page_memcg_rcu() has a few callers and there are no concerns regarding +their correctness at the moment. + +Signed-off-by: Yu Zhao +--- + include/linux/memcontrol.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h +index 0c04d39a7967..f13dc02cf277 100644 +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -1077,7 +1077,6 @@ static inline struct mem_cgroup *page_memcg(struct page *page) + + static inline struct mem_cgroup *page_memcg_rcu(struct page *page) + { +- WARN_ON_ONCE(!rcu_read_lock_held()); + return NULL; + } + +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-02-16-include-linux-nodemask.h-define-next_memory_node-if-CONFIG_NUMA.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-02-16-include-linux-nodemask.h-define-next_memory_node-if-CONFIG_NUMA.patch new file mode 100644 index 0000000..413a913 --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-02-16-include-linux-nodemask.h-define-next_memory_node-if-CONFIG_NUMA.patch @@ -0,0 +1,124 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=ham autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id DD966C43460 + for ; Tue, 13 Apr 2021 06:56:56 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id AD0DD613B1 + for ; Tue, 13 Apr 2021 06:56:56 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S245188AbhDMG5O (ORCPT ); + Tue, 13 Apr 2021 02:57:14 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44148 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S245147AbhDMG5D (ORCPT + ); + Tue, 13 Apr 2021 02:57:03 -0400 +Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AF0D4C061574 + for ; Mon, 12 Apr 2021 23:56:44 -0700 (PDT) +Received: by mail-yb1-xb4a.google.com with SMTP id e185so6246113ybf.4 + for ; Mon, 12 Apr 2021 23:56:44 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=EM7U/N62rbxjpd/wy3lwoMJ7CSKXstnqAzc6WMXVO+c=; + b=t/TvdOo7hn9eFyLRcO6IKN2knJLFlMvJD85LqS3p70ezJY9KmJyQnoNmrkIR2uthXy + WmFHutjhP3sNRUFV88YVqyqRzdb/QCULw0znZtShHzf8oRGvUznrafDt1yFbCPXkkI+0 + Y1bOuKRWZn44z9QIgS0RLo1mHpFU76jVw8i6GqzSatKn5V3qIjC6li7inmOfVCGRz5Zl + +SxAwEh7kMa92WQx0NoeerKExD4+Xxk3+iMBmL0VuvWnWnvSTan6oFLfspI3Vr1AfObf + fAVPm3SigqMxgdFIo7OoLz/1wI9FPVPrUSETRfh9HMZZzvtlTIxOqZEUvZjaaMCiZtbS + 2tUA== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=EM7U/N62rbxjpd/wy3lwoMJ7CSKXstnqAzc6WMXVO+c=; + b=rGLib4fG3u5JfGsMESfD549XkyQTbkxo+ZDK2peyx+gBJaLu40gpIMHYqYOJAyzqzG + ix/FmZokOmB2+3Naq4VoOPQoJeMjsTJL0YBtF/6MDHz1/XjT5miqUjxHiUs4UtTo2Du6 + F/+TEZ6RtK0ePZqj+F41HO2cFdLMN0FfxwTT86IF0q5FEXGo7ZGqUj/nGxuH9w5dgmHf + 9Nskde954GH8rRzCtUmRNHuA8h7Ac3cmaz+uI7FTFiX01W+tcnke/SrzFAqCCl6ML8Ah + 6Js8R+1sL+sXe8TtZjGQ2aa7aOQGYsPwyF+SJW5qYMLvDpcoUNdkKpfb2nSVpEKolrJA + C3cg== +X-Gm-Message-State: AOAM533k6NruViQt9bY73WARuw0APJWRdFLtJTsHl/VJrzJggskh0kcA + On0mU/on2LGVIbt6g8dxcT+hA0GZgOI= +X-Google-Smtp-Source: ABdhPJx9dY0CYhzp53dRcd9T1SUoIr4KnxC7LGKi7djvDgAR/DF3q/feIx7ybIki3WMXmS4BOiKGzGOIvao= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a5b:b4a:: with SMTP id b10mr2519734ybr.182.1618297003935; + Mon, 12 Apr 2021 23:56:43 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:19 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-3-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 02/16] include/linux/nodemask.h: define next_memory_node() + if !CONFIG_NUMA +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +Currently next_memory_node only exists when CONFIG_NUMA=y. This patch +adds the macro for !CONFIG_NUMA. + +Signed-off-by: Yu Zhao +--- + include/linux/nodemask.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h +index ac398e143c9a..89fe4e3592f9 100644 +--- a/include/linux/nodemask.h ++++ b/include/linux/nodemask.h +@@ -486,6 +486,7 @@ static inline int num_node_state(enum node_states state) + #define first_online_node 0 + #define first_memory_node 0 + #define next_online_node(nid) (MAX_NUMNODES) ++#define next_memory_node(nid) (MAX_NUMNODES) + #define nr_node_ids 1U + #define nr_online_nodes 1U + +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-03-16-include-linux-huge_mm.h-define-is_huge_zero_pmd-if-CONFIG_TRANSPARENT_HUGEPAGE.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-03-16-include-linux-huge_mm.h-define-is_huge_zero_pmd-if-CONFIG_TRANSPARENT_HUGEPAGE.patch new file mode 100644 index 0000000..9077013 --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-03-16-include-linux-huge_mm.h-define-is_huge_zero_pmd-if-CONFIG_TRANSPARENT_HUGEPAGE.patch @@ -0,0 +1,130 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id 6E4E7C433ED + for ; Tue, 13 Apr 2021 06:56:58 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 2A301613EB + for ; Tue, 13 Apr 2021 06:56:58 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S1345084AbhDMG5Q (ORCPT + ); + Tue, 13 Apr 2021 02:57:16 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44152 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S237032AbhDMG5F (ORCPT + ); + Tue, 13 Apr 2021 02:57:05 -0400 +Received: from mail-qv1-xf4a.google.com (mail-qv1-xf4a.google.com [IPv6:2607:f8b0:4864:20::f4a]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 26D6FC061574 + for ; Mon, 12 Apr 2021 23:56:46 -0700 (PDT) +Received: by mail-qv1-xf4a.google.com with SMTP id gu11so7133331qvb.0 + for ; Mon, 12 Apr 2021 23:56:46 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=ty30EBMFobCGhabQdsuq+v2Kg8uUmEONp40/WyUA1q8=; + b=i8n6+BP4XniO7GqYB3njPBeS1g0cajvT/0XeibRC9E79Y2kxVkXGp/HuAtF4IVW6+L + /n2Z+ZNUjzYoRG1K8TO2KT7wPH4dB0dBfh+QxjE4pa3hFSlYATFkHsATy+5tXCYxPNI5 + icwBWKo7lmwEnXOUHSMAZbfasHoawvCVog/UnTwIW6ATbaU4DRzi4r/NM6Dk8D5iMFw0 + uINBgxANuIFFKRfVUOyfzXT7qWKDHKlb5wvR3T/4y2+SRO3Xq0OMidUV+vii8Ijbi9C8 + OKDCcdJr7BmAzQtIPAXlE+vxaL8G9raL19q09IcdqKKULLNIy57jVK2xtDVpTIbZE6jh + DVMg== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=ty30EBMFobCGhabQdsuq+v2Kg8uUmEONp40/WyUA1q8=; + b=YVzfIBXrv665u9gqpA0aaR8rYQ3ksKwQ6y1pnY3UhRF3H0B9Ey8UftLQ5sEjQSYXf5 + 4YJG1pSXti7Zr0NjAcVojZxJ3vul55+LG8QsAqvrkxu9kZe9BCPGcZ7CtjYmvAXZMaJS + LTzQMVutjT5FccfRztpgbLs4XZyflvf+EfncOMZ0jVl38t1cj4+1gqSFR9l9ghy+Xj2h + TuyP9qzN8JVm4XYhKfTX+rAB+yQ+CKmVvhh3Oj8O2I0hVOGKHfv1GT2BxP8lsdodzCri + TV4h5qxgSpmrJT5zS82i0VC+Kgi1iQ5lNkeUwKrowIXgTTdj2LkXGChb1hia2Sb2fq/c + /0RA== +X-Gm-Message-State: AOAM532KBvjkAJqjUGm4z3T6vDFjQzVEl4MdDPqiOTi/Sx/00HV2Sk4T + CDYdSIReMsyd3sZTjfEkJQizn1CUbQo= +X-Google-Smtp-Source: ABdhPJz9bP7GjZCXkR9CChLjfI00GuzH9av/gCfg2jgEdkGIxWUcBRwxRgL0Vxc4uB1fdD7yCdL0ylir3GM= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a05:6214:161:: with SMTP id + y1mr13969669qvs.31.1618297005251; Mon, 12 Apr 2021 23:56:45 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:20 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-4-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 03/16] include/linux/huge_mm.h: define is_huge_zero_pmd() + if !CONFIG_TRANSPARENT_HUGEPAGE +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +Currently is_huge_zero_pmd() only exists when +CONFIG_TRANSPARENT_HUGEPAGE=y. This patch adds the function for +!CONFIG_TRANSPARENT_HUGEPAGE. + +Signed-off-by: Yu Zhao +--- + include/linux/huge_mm.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h +index ba973efcd369..0ba7b3f9029c 100644 +--- a/include/linux/huge_mm.h ++++ b/include/linux/huge_mm.h +@@ -443,6 +443,11 @@ static inline bool is_huge_zero_page(struct page *page) + return false; + } + ++static inline bool is_huge_zero_pmd(pmd_t pmd) ++{ ++ return false; ++} ++ + static inline bool is_huge_zero_pud(pud_t pud) + { + return false; +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-04-16-include-linux-cgroup.h-export-cgroup_mutex.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-04-16-include-linux-cgroup.h-export-cgroup_mutex.patch new file mode 100644 index 0000000..ff26b63 --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-04-16-include-linux-cgroup.h-export-cgroup_mutex.patch @@ -0,0 +1,151 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=ham autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id B1779C433B4 + for ; Tue, 13 Apr 2021 06:56:59 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 93C83613CB + for ; Tue, 13 Apr 2021 06:56:59 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S1345093AbhDMG5R (ORCPT + ); + Tue, 13 Apr 2021 02:57:17 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44160 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S237122AbhDMG5G (ORCPT + ); + Tue, 13 Apr 2021 02:57:06 -0400 +Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 70228C061756 + for ; Mon, 12 Apr 2021 23:56:47 -0700 (PDT) +Received: by mail-yb1-xb4a.google.com with SMTP id d1so15228352ybj.15 + for ; Mon, 12 Apr 2021 23:56:47 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=NPo7MPPcRQhwQwi0VkGJEhiUUoPZKpCjODwiJd36ReE=; + b=baGnCiioZTP9ADs7IVEB/mQcb3cvKmCKgg9drauUZQ+Tp4ZFhqV8SVk54iVXXC/g4a + cpq3VBdcxXnUKSenbwAnH9Jp0vcf5HUqcvm0/PItCUte5xo66HxROV5Obn4PGte89xi9 + p+R4eomS1+PIS2MLxgShOMpnFvyxeBgpYJvBAHU3FKJ3dtUuQ8TMqtRRYgDLRETQtThQ + kFEKuP+qBTfl6NS1fHTb9BFTIgP5Z/N1DOBc07huBgFItja27dgr56dPRNvm09QqhgN8 + KNYrM6tJs6Md4vWQFOufoHl576biAVAYjl1tmh0+nRa81An0lfEfinpclElVWZVJap6f + 3K6Q== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=NPo7MPPcRQhwQwi0VkGJEhiUUoPZKpCjODwiJd36ReE=; + b=VQUPKq30uKeUAF6Ejq35xfekJF7nOdr7VngI/76uX8lOU1pIKoO4mC5aTAYeOIOrr8 + d9hpCUWEcuxEWFU49K2HTzz6r9TRtei0Z3TR3n5CdNJqIigsBiTmuLGfOPgRfmTdf4p1 + Gy4MP3Ln+GHBFflwKZ+f5OPcq+R/slU8HpAWd4KR6PshMeb/Uf/RnHWhIQ3qI8S3QLXv + K66JL1wL5gT1XsIvdtHxoLQ/CLC3QqmB2rSMp/tB7Orqc6DK48r53Kt037j1ALstA0O7 + qY6CPZRsbCum+NhqDvT8/KN1dsIkOSEmKUt0TfQc8hUEIm0I2juU0HYZsBV7D9xioz8r + p45w== +X-Gm-Message-State: AOAM533p7SYDUFBf9Ifm7vaTwGtjEO4CrlaCuZ4KoZ7jp3M6fMJFAFBH + 4BBDhvIWmrjLJRxSeBVIWDYQXg1lPro= +X-Google-Smtp-Source: ABdhPJyRALAhdJY/7MdeRvaPV8dMvbenEwa1GhqHOoi94XTiY8IwvBzrDPMpa5ltVLi8kkX49f0gbWJD/40= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a25:8b86:: with SMTP id j6mr39368340ybl.470.1618297006589; + Mon, 12 Apr 2021 23:56:46 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:21 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-5-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 04/16] include/linux/cgroup.h: export cgroup_mutex +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +cgroup_mutex is needed to synchronize with memcg creations. + +Signed-off-by: Yu Zhao +--- + include/linux/cgroup.h | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h +index 4f2f79de083e..bd5744360cfa 100644 +--- a/include/linux/cgroup.h ++++ b/include/linux/cgroup.h +@@ -432,6 +432,18 @@ static inline void cgroup_put(struct cgroup *cgrp) + css_put(&cgrp->self); + } + ++extern struct mutex cgroup_mutex; ++ ++static inline void cgroup_lock(void) ++{ ++ mutex_lock(&cgroup_mutex); ++} ++ ++static inline void cgroup_unlock(void) ++{ ++ mutex_unlock(&cgroup_mutex); ++} ++ + /** + * task_css_set_check - obtain a task's css_set with extra access conditions + * @task: the task to obtain css_set for +@@ -446,7 +458,6 @@ static inline void cgroup_put(struct cgroup *cgrp) + * as locks used during the cgroup_subsys::attach() methods. + */ + #ifdef CONFIG_PROVE_RCU +-extern struct mutex cgroup_mutex; + extern spinlock_t css_set_lock; + #define task_css_set_check(task, __c) \ + rcu_dereference_check((task)->cgroups, \ +@@ -704,6 +715,8 @@ struct cgroup; + static inline u64 cgroup_id(const struct cgroup *cgrp) { return 1; } + static inline void css_get(struct cgroup_subsys_state *css) {} + static inline void css_put(struct cgroup_subsys_state *css) {} ++static inline void cgroup_lock(void) {} ++static inline void cgroup_unlock(void) {} + static inline int cgroup_attach_task_all(struct task_struct *from, + struct task_struct *t) { return 0; } + static inline int cgroupstats_build(struct cgroupstats *stats, +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-05-16-mm-swap.c-export-activate_page.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-05-16-mm-swap.c-export-activate_page.patch new file mode 100644 index 0000000..a3e5d49 --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-05-16-mm-swap.c-export-activate_page.patch @@ -0,0 +1,190 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id 3D894C433ED + for ; Tue, 13 Apr 2021 06:57:01 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 16A5761278 + for ; Tue, 13 Apr 2021 06:57:01 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S1345104AbhDMG5S (ORCPT + ); + Tue, 13 Apr 2021 02:57:18 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44168 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S237169AbhDMG5I (ORCPT + ); + Tue, 13 Apr 2021 02:57:08 -0400 +Received: from mail-qk1-x749.google.com (mail-qk1-x749.google.com [IPv6:2607:f8b0:4864:20::749]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 01AF4C06175F + for ; Mon, 12 Apr 2021 23:56:49 -0700 (PDT) +Received: by mail-qk1-x749.google.com with SMTP id j24so9889811qkg.7 + for ; Mon, 12 Apr 2021 23:56:48 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=kZ40TZQJmz2zt6lYwCpeAnxVbOWM8KwFdCtsfH6CbQ4=; + b=Lo7XMOOHbyzBoRlK8b2GE15qCT4QqS9ijyXSl1ryGVj5Alkuv2mcfhY4vR1gU/ak5i + HPCaNU4SNyd/togq6z9pJeIcKdhVNoakHlBzalPajFLmRC9Qbai2K4MiOiC3w/4zVP3/ + NtLrS3pnu6kRnE/1OF1NCyaMABOTJ1Ahmg/dZPqItxMI54CzXgYo6GdLYksK4AzjBKx6 + 3OPkxOXxP71Nm7Tjl273X7BKZEBEv2cYYpFtO65/dAM6wU+OCRnD0EkkgtX7e7+gTBso + oX16tOXHwiiZ6sLaMJLirvmeW9Lp7bXGjP63ZC1IEHuQFyVaxg7TzhpG+PXULs33Mwht + 64KQ== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=kZ40TZQJmz2zt6lYwCpeAnxVbOWM8KwFdCtsfH6CbQ4=; + b=m5HbExYCzmc21c5OLCzzHa8Xe8EdXvMRiTtiR09Dq8ChzNpcxJHIjjhpQyFMcUJWLj + +EmmgKiIE+uS4OHdEXmzNSv8MNhhEq7kUHf2SgjNDKlYLuCdTyrGG1MSWfK/msnX8s0I + ed03u8uPvY4i5nrXUPDSK0dSOilJdsKsbJ2GZF+UbwvHZb/bl7np8JUMFzrB2dYfV3GD + rJFKMpvlKiHjGv/usQSGWtLVDxlNl2ZH02SQETt2ZwtrhNj3g1Je8bALwt2ZVdzkZCGJ + ieq/RzKjaSqH69A9hehJuecmBRowdH3vtX4JtNR1N62OtoE92KN5JhRy7UIVzomglFHL + 9n1A== +X-Gm-Message-State: AOAM533DVaJizLoTWtX7Zoe1e9yCLp7H3odxXAoCcHrMJ9IzNh+lDvEB + F0NqK2LlktrIoIPLMrk68BAVCsE0tyc= +X-Google-Smtp-Source: ABdhPJx0OFD8QshALbNm7ufdWhFpw5ctF+y/1hKbFM42Olw0k5XnLx6uQVu5On95xo6CAByxMQgtMhVbOBY= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a0c:fa12:: with SMTP id q18mr9972206qvn.2.1618297008125; + Mon, 12 Apr 2021 23:56:48 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:22 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-6-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 05/16] mm/swap.c: export activate_page() +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +activate_page() is needed to activate pages that are already on lru or +queued in lru_pvecs.lru_add. The exported function is a merger between +the existing activate_page() and __lru_cache_activate_page(). + +Signed-off-by: Yu Zhao +--- + include/linux/swap.h | 1 + + mm/swap.c | 28 +++++++++++++++------------- + 2 files changed, 16 insertions(+), 13 deletions(-) + +diff --git a/include/linux/swap.h b/include/linux/swap.h +index 4cc6ec3bf0ab..de2bbbf181ba 100644 +--- a/include/linux/swap.h ++++ b/include/linux/swap.h +@@ -344,6 +344,7 @@ extern void lru_add_drain_cpu(int cpu); + extern void lru_add_drain_cpu_zone(struct zone *zone); + extern void lru_add_drain_all(void); + extern void rotate_reclaimable_page(struct page *page); ++extern void activate_page(struct page *page); + extern void deactivate_file_page(struct page *page); + extern void deactivate_page(struct page *page); + extern void mark_page_lazyfree(struct page *page); +diff --git a/mm/swap.c b/mm/swap.c +index 31b844d4ed94..f20ed56ebbbf 100644 +--- a/mm/swap.c ++++ b/mm/swap.c +@@ -334,7 +334,7 @@ static bool need_activate_page_drain(int cpu) + return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0; + } + +-static void activate_page(struct page *page) ++static void activate_page_on_lru(struct page *page) + { + page = compound_head(page); + if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { +@@ -354,7 +354,7 @@ static inline void activate_page_drain(int cpu) + { + } + +-static void activate_page(struct page *page) ++static void activate_page_on_lru(struct page *page) + { + struct lruvec *lruvec; + +@@ -368,11 +368,22 @@ static void activate_page(struct page *page) + } + #endif + +-static void __lru_cache_activate_page(struct page *page) ++/* ++ * If the page is on the LRU, queue it for activation via ++ * lru_pvecs.activate_page. Otherwise, assume the page is on a ++ * pagevec, mark it active and it'll be moved to the active ++ * LRU on the next drain. ++ */ ++void activate_page(struct page *page) + { + struct pagevec *pvec; + int i; + ++ if (PageLRU(page)) { ++ activate_page_on_lru(page); ++ return; ++ } ++ + local_lock(&lru_pvecs.lock); + pvec = this_cpu_ptr(&lru_pvecs.lru_add); + +@@ -421,16 +432,7 @@ void mark_page_accessed(struct page *page) + * evictable page accessed has no effect. + */ + } else if (!PageActive(page)) { +- /* +- * If the page is on the LRU, queue it for activation via +- * lru_pvecs.activate_page. Otherwise, assume the page is on a +- * pagevec, mark it active and it'll be moved to the active +- * LRU on the next drain. +- */ +- if (PageLRU(page)) +- activate_page(page); +- else +- __lru_cache_activate_page(page); ++ activate_page(page); + ClearPageReferenced(page); + workingset_activation(page); + } +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-06-16-mm-x86-support-the-access-bit-on-non-leaf-PMD-entries.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-06-16-mm-x86-support-the-access-bit-on-non-leaf-PMD-entries.patch new file mode 100644 index 0000000..7e27f56 --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-06-16-mm-x86-support-the-access-bit-on-non-leaf-PMD-entries.patch @@ -0,0 +1,214 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id AE093C433B4 + for ; Tue, 13 Apr 2021 06:57:02 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 867F3613B6 + for ; Tue, 13 Apr 2021 06:57:02 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S237032AbhDMG5T (ORCPT ); + Tue, 13 Apr 2021 02:57:19 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44174 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S245189AbhDMG5J (ORCPT + ); + Tue, 13 Apr 2021 02:57:09 -0400 +Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 804B0C061756 + for ; Mon, 12 Apr 2021 23:56:50 -0700 (PDT) +Received: by mail-yb1-xb49.google.com with SMTP id t9so4737272ybd.11 + for ; Mon, 12 Apr 2021 23:56:50 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=ClG8Hyf5TTtr5EO4ugQ7lEF5g9jg1Khbtn9fiHHHUO4=; + b=Bz/NCIqrnVElEbVXzKEuDo/rZuQj9KS3qgxTKdWtHhz5pm8i/K2zNVWoVZLOT3rUSR + LsBDpHnPsr/ZpnLlRjgBWaTe1LWedpUZEH5ms55YmlHa6b6jgezdJL3RT6PspSs7PC0D + X2Cp8BNNHZoXRtz4WK/5SGU3p+K+AzCV3OWzqDVroA6mh4+0ezV8mgPVSzwRPD5kb0gr + h1rkXixNjOMz9WdBgGoShJ+IdH8LzpJqTgis+qWDrFblJngv4Of0j7VP1YZiUBDZBIO8 + UPhfTPDB4QZtT8MN0GMlMXbeAlUWYEo/7WcySgFwiSO0kt7YfrA1ke9uBnFFX4PziJEZ + ISaA== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=ClG8Hyf5TTtr5EO4ugQ7lEF5g9jg1Khbtn9fiHHHUO4=; + b=Ln0JHJYmVa2eSlKqpGtl/4uP0U/tFRs/pk5G6Sl8Iec4RrR5oqZdSeZC19j8TSeMUO + DmIZ5X8vhdMmgBAkWF7E4NxzMbBEJfzjseP4tvMHiWSQ+ZWeCLuYCrW6DEaObyCK+T7t + zIVNPEeJOIg1zDbSyPA0EVnJqpe6Gkec8ahBEG03YbyTmfuG6vb0McULQljJ5OhniFfX + UripKlgaIV1a55hf1KsyL81MPaz5nGMe/cCHrm8EHqvFhxWzKWFO1Qk4Tc1VI45wYTHS + YVo0QOvbSbampG2ears9RXvYdJ9QVT1M8JfO5/+bVnbN3VbRLxG7g4jVuwkA4zPKOHYI + dISw== +X-Gm-Message-State: AOAM531fA312edJF5bN6zMI4xlJ2NDI7L0pqlv/7HXEcSl6sGX7pfMuO + 8LvKSxlzMxN/BLov7kCFr0vqNk/bYbk= +X-Google-Smtp-Source: ABdhPJwc8JriuoHPQ23GGBqKR69oc5Gp+cE2EiR0xXWJLv2glle7kn2s+OHctKLTVqR0qrsNshOCMzVz8BQ= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a25:8b0f:: with SMTP id i15mr42151231ybl.277.1618297009506; + Mon, 12 Apr 2021 23:56:49 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:23 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-7-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 06/16] mm, x86: support the access bit on non-leaf PMD entries +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +Some architectures support the accessed bit on non-leaf PMD entries +(parents) in addition to leaf PTE entries (children) where pages are +mapped, e.g., x86_64 sets the accessed bit on a parent when using it +as part of linear-address translation [1]. Page table walkers who are +interested in the accessed bit on children can take advantage of this: +they do not need to search the children when the accessed bit is not +set on a parent, given that they have previously cleared the accessed +bit on this parent. + +[1]: Intel 64 and IA-32 Architectures Software Developer's Manual + Volume 3 (October 2019), section 4.8 + +Signed-off-by: Yu Zhao +--- + arch/Kconfig | 9 +++++++++ + arch/x86/Kconfig | 1 + + arch/x86/include/asm/pgtable.h | 2 +- + arch/x86/mm/pgtable.c | 5 ++++- + include/linux/pgtable.h | 4 ++-- + 5 files changed, 17 insertions(+), 4 deletions(-) + +diff --git a/arch/Kconfig b/arch/Kconfig +index ecfd3520b676..cbd7f66734ee 100644 +--- a/arch/Kconfig ++++ b/arch/Kconfig +@@ -782,6 +782,15 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE + config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + bool + ++config HAVE_ARCH_PARENT_PMD_YOUNG ++ bool ++ depends on PGTABLE_LEVELS > 2 ++ help ++ Architectures that select this are able to set the accessed bit on ++ non-leaf PMD entries in addition to leaf PTE entries where pages are ++ mapped. For them, page table walkers that clear the accessed bit may ++ stop at non-leaf PMD entries when they do not see the accessed bit. ++ + config HAVE_ARCH_HUGE_VMAP + bool + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index 2792879d398e..b5972eb82337 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -163,6 +163,7 @@ config X86 + select HAVE_ARCH_TRACEHOOK + select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 ++ select HAVE_ARCH_PARENT_PMD_YOUNG if X86_64 + select HAVE_ARCH_USERFAULTFD_WP if X86_64 && USERFAULTFD + select HAVE_ARCH_VMAP_STACK if X86_64 + select HAVE_ARCH_WITHIN_STACK_FRAMES +diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h +index a02c67291cfc..a6b5cfe1fc5a 100644 +--- a/arch/x86/include/asm/pgtable.h ++++ b/arch/x86/include/asm/pgtable.h +@@ -846,7 +846,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) + + static inline int pmd_bad(pmd_t pmd) + { +- return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; ++ return ((pmd_flags(pmd) | _PAGE_ACCESSED) & ~_PAGE_USER) != _KERNPG_TABLE; + } + + static inline unsigned long pages_to_mb(unsigned long npg) +diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c +index f6a9e2e36642..1c27e6f43f80 100644 +--- a/arch/x86/mm/pgtable.c ++++ b/arch/x86/mm/pgtable.c +@@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, + return ret; + } + +-#ifdef CONFIG_TRANSPARENT_HUGEPAGE ++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG) + int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) + { +@@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, + + return ret; + } ++#endif ++ ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE + int pudp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pud_t *pudp) + { +diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h +index 5e772392a379..08dd9b8c055a 100644 +--- a/include/linux/pgtable.h ++++ b/include/linux/pgtable.h +@@ -193,7 +193,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + #endif + + #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +-#ifdef CONFIG_TRANSPARENT_HUGEPAGE ++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG) + static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, + pmd_t *pmdp) +@@ -214,7 +214,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + BUILD_BUG(); + return 0; + } +-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ ++#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG */ + #endif + + #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-07-16-mm-vmscan.c-refactor-shrink_node.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-07-16-mm-vmscan.c-refactor-shrink_node.patch new file mode 100644 index 0000000..f97e74b --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-07-16-mm-vmscan.c-refactor-shrink_node.patch @@ -0,0 +1,324 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id 51FFEC43460 + for ; Tue, 13 Apr 2021 06:57:09 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 379F261278 + for ; Tue, 13 Apr 2021 06:57:09 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S1345127AbhDMG5Z (ORCPT + ); + Tue, 13 Apr 2021 02:57:25 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44184 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S242333AbhDMG5L (ORCPT + ); + Tue, 13 Apr 2021 02:57:11 -0400 +Received: from mail-qk1-x74a.google.com (mail-qk1-x74a.google.com [IPv6:2607:f8b0:4864:20::74a]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C5CAAC06138C + for ; Mon, 12 Apr 2021 23:56:51 -0700 (PDT) +Received: by mail-qk1-x74a.google.com with SMTP id g62so10544674qkf.18 + for ; Mon, 12 Apr 2021 23:56:51 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=1g9DmXJ6S7uvtuGOH48osWaF0/2fGGaQ6ChmAYKTm4o=; + b=VX7vOS1iaX+Hrwo31qklSok4an751KXHjlIezhTcoCSLXRV871k6PBsw+EibR4qWwF + i7kN3+4V671SYh9T69KvNxd786HKo+6WHv6Cd77TeqTfMbKijle6EBM4m+gl3DmNgnt0 + ZA8WH1LPEZfGwn3JGivnRSoUPFkulI9NBk9pGJpe7wwngua0FZfbXjlpD6td2UZKxBbD + sm8Xc+HrppZn5mA4exh2/iFeR515mlnGTrbTx70pum7Y/iYPYQ2/HgcjccRGsGWUBLbF + bSOTnALSUrqOctmdDO2fO0EzfSnndPfVgKwv5QWLNUcXAi3ZlYRs7lyuvShH4lnaJxFe + LTUA== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=1g9DmXJ6S7uvtuGOH48osWaF0/2fGGaQ6ChmAYKTm4o=; + b=oh0TJS5Iv72EGLBpsE6HR3bE5cZX3J2uuz3z3TwZZpsfqvBQ4F+ZjeXnT9ZM8znSwl + DwO2yHU9V2acH3+Fw0txUASuMMXsp1h+eHsdlfoqbA5zx2G/8OJbldp/rudOwBO+wc4D + Wu7IiJYBc9jidKDE7Rputac3XOWXhSIhHMN1UGb8rIrlefaHD89A6pEKF6H/v6TSV99v + 1MEtFUSmceep3K2EmUGX64fyXznC0KPZIkHHX/LcuC8xgYK2Go0LXGglt5x6U6QQ+Yk8 + QGNr4pv1ynAg5b5FcA5bQe34gJ4JarQfXZx82+zF84UGh0Hj4hR4I60qEnSwVJBlCNqE + o7DA== +X-Gm-Message-State: AOAM532mqZo9PBRpK7zpxWavyuHSPxCR5uYKAcywst7dl0qA/ZdHQHKq + TyCJ6Kl6g2of6qtWwfJ7m9Y3UH3EDGM= +X-Google-Smtp-Source: ABdhPJwH+ey8nBGqYBlYs+cX0y6B8vZ/ifwsZXXs+V8u1FJGnhfXc1ufux+fOtI1iR9OnRAE6E9FqbhZIZQ= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a0c:db82:: with SMTP id m2mr21253979qvk.37.1618297010980; + Mon, 12 Apr 2021 23:56:50 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:24 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-8-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 07/16] mm/vmscan.c: refactor shrink_node() +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +Heuristics that determine scan balance between anon and file LRUs are +rather independent. Move them into a separate function to improve +readability. + +Signed-off-by: Yu Zhao +--- + mm/vmscan.c | 186 +++++++++++++++++++++++++++------------------------- + 1 file changed, 98 insertions(+), 88 deletions(-) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 562e87cbd7a1..1a24d2e0a4cb 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -2224,6 +2224,103 @@ enum scan_balance { + SCAN_FILE, + }; + ++static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc) ++{ ++ unsigned long file; ++ struct lruvec *target_lruvec; ++ ++ target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); ++ ++ /* ++ * Determine the scan balance between anon and file LRUs. ++ */ ++ spin_lock_irq(&target_lruvec->lru_lock); ++ sc->anon_cost = target_lruvec->anon_cost; ++ sc->file_cost = target_lruvec->file_cost; ++ spin_unlock_irq(&target_lruvec->lru_lock); ++ ++ /* ++ * Target desirable inactive:active list ratios for the anon ++ * and file LRU lists. ++ */ ++ if (!sc->force_deactivate) { ++ unsigned long refaults; ++ ++ refaults = lruvec_page_state(target_lruvec, ++ WORKINGSET_ACTIVATE_ANON); ++ if (refaults != target_lruvec->refaults[0] || ++ inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) ++ sc->may_deactivate |= DEACTIVATE_ANON; ++ else ++ sc->may_deactivate &= ~DEACTIVATE_ANON; ++ ++ /* ++ * When refaults are being observed, it means a new ++ * workingset is being established. Deactivate to get ++ * rid of any stale active pages quickly. ++ */ ++ refaults = lruvec_page_state(target_lruvec, ++ WORKINGSET_ACTIVATE_FILE); ++ if (refaults != target_lruvec->refaults[1] || ++ inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) ++ sc->may_deactivate |= DEACTIVATE_FILE; ++ else ++ sc->may_deactivate &= ~DEACTIVATE_FILE; ++ } else ++ sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; ++ ++ /* ++ * If we have plenty of inactive file pages that aren't ++ * thrashing, try to reclaim those first before touching ++ * anonymous pages. ++ */ ++ file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE); ++ if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) ++ sc->cache_trim_mode = 1; ++ else ++ sc->cache_trim_mode = 0; ++ ++ /* ++ * Prevent the reclaimer from falling into the cache trap: as ++ * cache pages start out inactive, every cache fault will tip ++ * the scan balance towards the file LRU. And as the file LRU ++ * shrinks, so does the window for rotation from references. ++ * This means we have a runaway feedback loop where a tiny ++ * thrashing file LRU becomes infinitely more attractive than ++ * anon pages. Try to detect this based on file LRU size. ++ */ ++ if (!cgroup_reclaim(sc)) { ++ unsigned long total_high_wmark = 0; ++ unsigned long free, anon; ++ int z; ++ ++ free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); ++ file = node_page_state(pgdat, NR_ACTIVE_FILE) + ++ node_page_state(pgdat, NR_INACTIVE_FILE); ++ ++ for (z = 0; z < MAX_NR_ZONES; z++) { ++ struct zone *zone = &pgdat->node_zones[z]; ++ ++ if (!managed_zone(zone)) ++ continue; ++ ++ total_high_wmark += high_wmark_pages(zone); ++ } ++ ++ /* ++ * Consider anon: if that's low too, this isn't a ++ * runaway file reclaim problem, but rather just ++ * extreme pressure. Reclaim as per usual then. ++ */ ++ anon = node_page_state(pgdat, NR_INACTIVE_ANON); ++ ++ sc->file_is_tiny = ++ file + free <= total_high_wmark && ++ !(sc->may_deactivate & DEACTIVATE_ANON) && ++ anon >> sc->priority; ++ } ++} ++ + /* + * Determine how aggressively the anon and file LRU lists should be + * scanned. The relative value of each set of LRU lists is determined +@@ -2669,7 +2766,6 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) + unsigned long nr_reclaimed, nr_scanned; + struct lruvec *target_lruvec; + bool reclaimable = false; +- unsigned long file; + + target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); + +@@ -2679,93 +2775,7 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) + nr_reclaimed = sc->nr_reclaimed; + nr_scanned = sc->nr_scanned; + +- /* +- * Determine the scan balance between anon and file LRUs. +- */ +- spin_lock_irq(&target_lruvec->lru_lock); +- sc->anon_cost = target_lruvec->anon_cost; +- sc->file_cost = target_lruvec->file_cost; +- spin_unlock_irq(&target_lruvec->lru_lock); +- +- /* +- * Target desirable inactive:active list ratios for the anon +- * and file LRU lists. +- */ +- if (!sc->force_deactivate) { +- unsigned long refaults; +- +- refaults = lruvec_page_state(target_lruvec, +- WORKINGSET_ACTIVATE_ANON); +- if (refaults != target_lruvec->refaults[0] || +- inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) +- sc->may_deactivate |= DEACTIVATE_ANON; +- else +- sc->may_deactivate &= ~DEACTIVATE_ANON; +- +- /* +- * When refaults are being observed, it means a new +- * workingset is being established. Deactivate to get +- * rid of any stale active pages quickly. +- */ +- refaults = lruvec_page_state(target_lruvec, +- WORKINGSET_ACTIVATE_FILE); +- if (refaults != target_lruvec->refaults[1] || +- inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) +- sc->may_deactivate |= DEACTIVATE_FILE; +- else +- sc->may_deactivate &= ~DEACTIVATE_FILE; +- } else +- sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; +- +- /* +- * If we have plenty of inactive file pages that aren't +- * thrashing, try to reclaim those first before touching +- * anonymous pages. +- */ +- file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE); +- if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) +- sc->cache_trim_mode = 1; +- else +- sc->cache_trim_mode = 0; +- +- /* +- * Prevent the reclaimer from falling into the cache trap: as +- * cache pages start out inactive, every cache fault will tip +- * the scan balance towards the file LRU. And as the file LRU +- * shrinks, so does the window for rotation from references. +- * This means we have a runaway feedback loop where a tiny +- * thrashing file LRU becomes infinitely more attractive than +- * anon pages. Try to detect this based on file LRU size. +- */ +- if (!cgroup_reclaim(sc)) { +- unsigned long total_high_wmark = 0; +- unsigned long free, anon; +- int z; +- +- free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); +- file = node_page_state(pgdat, NR_ACTIVE_FILE) + +- node_page_state(pgdat, NR_INACTIVE_FILE); +- +- for (z = 0; z < MAX_NR_ZONES; z++) { +- struct zone *zone = &pgdat->node_zones[z]; +- if (!managed_zone(zone)) +- continue; +- +- total_high_wmark += high_wmark_pages(zone); +- } +- +- /* +- * Consider anon: if that's low too, this isn't a +- * runaway file reclaim problem, but rather just +- * extreme pressure. Reclaim as per usual then. +- */ +- anon = node_page_state(pgdat, NR_INACTIVE_ANON); +- +- sc->file_is_tiny = +- file + free <= total_high_wmark && +- !(sc->may_deactivate & DEACTIVATE_ANON) && +- anon >> sc->priority; +- } ++ prepare_scan_count(pgdat, sc); + + shrink_node_memcgs(pgdat, sc); + +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-08-16-mm-multigenerational-lru-groundwork.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-08-16-mm-multigenerational-lru-groundwork.patch new file mode 100644 index 0000000..40de278 --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-08-16-mm-multigenerational-lru-groundwork.patch @@ -0,0 +1,1030 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.3 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id 9271AC43460 + for ; Tue, 13 Apr 2021 06:57:14 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 6F7F761278 + for ; Tue, 13 Apr 2021 06:57:14 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S1345144AbhDMG5c (ORCPT + ); + Tue, 13 Apr 2021 02:57:32 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44200 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S1345056AbhDMG5N (ORCPT + ); + Tue, 13 Apr 2021 02:57:13 -0400 +Received: from mail-qv1-xf4a.google.com (mail-qv1-xf4a.google.com [IPv6:2607:f8b0:4864:20::f4a]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5886EC06138E + for ; Mon, 12 Apr 2021 23:56:53 -0700 (PDT) +Received: by mail-qv1-xf4a.google.com with SMTP id n3so9515409qvr.8 + for ; Mon, 12 Apr 2021 23:56:53 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=qmuOmHYK6LSruSz/2mjoXeqDbULVi5nuXE31vo/uT/E=; + b=RteOX5J2xXYXxoT8hvVl//3BoaC5DuqnY+dzlu51rUnhT6e7BR7g4+n3LMaNa1whYR + zrfIWJ59sygdcM1Es0SKxIsBy8MgNPx+cp4MnH1RNI6PX26RlqbdJAJcLRiwNuIfonRY + +RcpmQMOPLKcbIAfRrr945XA4B4DMgEoqJAQzCJo0Tk8+ePW27kJVqmyYiSPINJ5qa13 + I2dDUx+mBmGLWi0dSd6mP2VhHO5u3yOQuyX6/EoNqeuLjOCql6IB+9+ajcCk/+KgWz/r + yKa6WcOvWbH2MOwBjtm1zdIYOLcqnAUIeTFF2gZcgUvTo9qLOQBx7vmqzxTIGHmBWbmB + dY1g== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=qmuOmHYK6LSruSz/2mjoXeqDbULVi5nuXE31vo/uT/E=; + b=LzN6dvUnDev3I6h6eRkkUIKNCqacio5A3hCeq0AENrBnJApelTtHtohD2b8mGB2+b8 + FgBFakwrbMTM8qJlm0OrT0/2K28OqLy8P9ax/BVg82q/ZVfM3XN49jaYTJu9wYJF0985 + aGe69rZg6WsBRltw/jLctcFk0QdsWLAIVQQoeJt7x4gPooZYnWXkl2re6x6YgBFQoLIx + JyWOoltw76YumdgM31AAcxXuJ+dvSwfo1E9eFM+Hlik6PBUTCfax3Utq6UAa+yH14EjL + xgOADyLDe4KCnIwHiztDNGNjKFHnZ7UqWWoud9HCL90VpGMk065emurkIgQFbPVEg0V1 + 9qyw== +X-Gm-Message-State: AOAM530tcRPNecJ31M7qMJl8prcIek/MK6l8U4VxGe36yJG393H27cYz + Ad7HcBB5bHhtcjp6bk5ZzVtdHgMGmzQ= +X-Google-Smtp-Source: ABdhPJyECwT99WvCpbLympvxtLSS3W+wdxylSknNr7gnrDzblSRr/IYdf45JyR2ogWoeHAZoGdrvDE0H6iM= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a05:6214:88d:: with SMTP id + cz13mr16541122qvb.13.1618297012468; Mon, 12 Apr 2021 23:56:52 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:25 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-9-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 08/16] mm: multigenerational lru: groundwork +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +For each lruvec, evictable pages are divided into multiple +generations. The youngest generation number is stored in max_seq for +both anon and file types as they are aged on an equal footing. The +oldest generation numbers are stored in min_seq[2] separately for anon +and file types as clean file pages can be evicted regardless of +may_swap or may_writepage. Generation numbers are truncated into +order_base_2(MAX_NR_GENS+1) bits in order to fit into page->flags. The +sliding window technique is used to prevent truncated generation +numbers from overlapping. Each truncated generation number is an index +to lruvec->evictable.lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]. +Evictable pages are added to the per-zone lists indexed by max_seq or +min_seq[2] (modulo MAX_NR_GENS), depending on whether they are being +faulted in. + +The workflow comprises two conceptually independent functions: the +aging and the eviction. The aging produces young generations. Given an +lruvec, the aging scans page tables for referenced pages of this +lruvec. Upon finding one, the aging updates its generation number to +max_seq. After each round of scan, the aging increments max_seq. The +aging is due when both of min_seq[2] reaches max_seq-1, assuming both +anon and file types are reclaimable. + +The eviction consumes old generations. Given an lruvec, the eviction +scans the pages on the per-zone lists indexed by either of min_seq[2]. +It tries to select a type based on the values of min_seq[2] and +swappiness. During a scan, the eviction sorts pages according to their +generation numbers, if the aging has found them referenced. When it +finds all the per-zone lists of a selected type are empty, the +eviction increments min_seq[2] indexed by this selected type. + +Signed-off-by: Yu Zhao +--- + fs/fuse/dev.c | 3 +- + include/linux/mm.h | 2 + + include/linux/mm_inline.h | 193 +++++++++++++++++++ + include/linux/mmzone.h | 110 +++++++++++ + include/linux/page-flags-layout.h | 20 +- + include/linux/page-flags.h | 4 +- + kernel/bounds.c | 6 + + mm/huge_memory.c | 3 +- + mm/mm_init.c | 16 +- + mm/mmzone.c | 2 + + mm/swapfile.c | 4 + + mm/vmscan.c | 305 ++++++++++++++++++++++++++++++ + 12 files changed, 656 insertions(+), 12 deletions(-) + +diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c +index c0fee830a34e..27c83f557794 100644 +--- a/fs/fuse/dev.c ++++ b/fs/fuse/dev.c +@@ -784,7 +784,8 @@ static int fuse_check_page(struct page *page) + 1 << PG_lru | + 1 << PG_active | + 1 << PG_reclaim | +- 1 << PG_waiters))) { ++ 1 << PG_waiters | ++ LRU_GEN_MASK | LRU_USAGE_MASK))) { + dump_page(page, "fuse: trying to steal weird page"); + return 1; + } +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 8ba434287387..2c8a2db78ce9 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1070,6 +1070,8 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); + #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) + #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) + #define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) ++#define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH) ++#define LRU_USAGE_PGOFF (LRU_GEN_PGOFF - LRU_USAGE_WIDTH) + + /* + * Define the bit shifts to access each section. For non-existent +diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h +index 355ea1ee32bd..2bf910eb3dd7 100644 +--- a/include/linux/mm_inline.h ++++ b/include/linux/mm_inline.h +@@ -79,11 +79,198 @@ static __always_inline enum lru_list page_lru(struct page *page) + return lru; + } + ++#ifdef CONFIG_LRU_GEN ++ ++#ifdef CONFIG_LRU_GEN_ENABLED ++DECLARE_STATIC_KEY_TRUE(lru_gen_static_key); ++#define lru_gen_enabled() static_branch_likely(&lru_gen_static_key) ++#else ++DECLARE_STATIC_KEY_FALSE(lru_gen_static_key); ++#define lru_gen_enabled() static_branch_unlikely(&lru_gen_static_key) ++#endif ++ ++/* We track at most MAX_NR_GENS generations using the sliding window technique. */ ++static inline int lru_gen_from_seq(unsigned long seq) ++{ ++ return seq % MAX_NR_GENS; ++} ++ ++/* Return a proper index regardless whether we keep a full history of stats. */ ++static inline int sid_from_seq_or_gen(int seq_or_gen) ++{ ++ return seq_or_gen % NR_STAT_GENS; ++} ++ ++/* The youngest and the second youngest generations are considered active. */ ++static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen) ++{ ++ unsigned long max_seq = READ_ONCE(lruvec->evictable.max_seq); ++ ++ VM_BUG_ON(!max_seq); ++ VM_BUG_ON(gen >= MAX_NR_GENS); ++ ++ return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1); ++} ++ ++/* Update the sizes of the multigenerational lru. */ ++static inline void lru_gen_update_size(struct page *page, struct lruvec *lruvec, ++ int old_gen, int new_gen) ++{ ++ int file = page_is_file_lru(page); ++ int zone = page_zonenum(page); ++ int delta = thp_nr_pages(page); ++ enum lru_list lru = LRU_FILE * file; ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ lockdep_assert_held(&lruvec->lru_lock); ++ VM_BUG_ON(old_gen != -1 && old_gen >= MAX_NR_GENS); ++ VM_BUG_ON(new_gen != -1 && new_gen >= MAX_NR_GENS); ++ VM_BUG_ON(old_gen == -1 && new_gen == -1); ++ ++ if (old_gen >= 0) ++ WRITE_ONCE(lrugen->sizes[old_gen][file][zone], ++ lrugen->sizes[old_gen][file][zone] - delta); ++ if (new_gen >= 0) ++ WRITE_ONCE(lrugen->sizes[new_gen][file][zone], ++ lrugen->sizes[new_gen][file][zone] + delta); ++ ++ if (old_gen < 0) { ++ if (lru_gen_is_active(lruvec, new_gen)) ++ lru += LRU_ACTIVE; ++ update_lru_size(lruvec, lru, zone, delta); ++ return; ++ } ++ ++ if (new_gen < 0) { ++ if (lru_gen_is_active(lruvec, old_gen)) ++ lru += LRU_ACTIVE; ++ update_lru_size(lruvec, lru, zone, -delta); ++ return; ++ } ++ ++ if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) { ++ update_lru_size(lruvec, lru, zone, -delta); ++ update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta); ++ } ++ ++ VM_BUG_ON(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); ++} ++ ++/* Add a page to a list of the multigenerational lru. Return true on success. */ ++static inline bool lru_gen_addition(struct page *page, struct lruvec *lruvec, bool front) ++{ ++ int gen; ++ unsigned long old_flags, new_flags; ++ int file = page_is_file_lru(page); ++ int zone = page_zonenum(page); ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ if (PageUnevictable(page) || !lrugen->enabled[file]) ++ return false; ++ /* ++ * If a page is being faulted in, add it to the youngest generation. ++ * try_walk_mm_list() may look at the size of the youngest generation to ++ * determine if the aging is due. ++ * ++ * If a page can't be evicted immediately, i.e., a shmem page not in ++ * swap cache, a dirty page waiting on writeback, or a page rejected by ++ * evict_lru_gen_pages() due to races, dirty buffer heads, etc., add it ++ * to the second oldest generation. ++ * ++ * If a page could be evicted immediately, i.e., deactivated, rotated by ++ * writeback, or allocated for buffered io, add it to the oldest ++ * generation. ++ */ ++ if (PageActive(page)) ++ gen = lru_gen_from_seq(lrugen->max_seq); ++ else if ((!file && !PageSwapCache(page)) || ++ (PageReclaim(page) && (PageDirty(page) || PageWriteback(page))) || ++ (!PageReferenced(page) && PageWorkingset(page))) ++ gen = lru_gen_from_seq(lrugen->min_seq[file] + 1); ++ else ++ gen = lru_gen_from_seq(lrugen->min_seq[file]); ++ ++ do { ++ old_flags = READ_ONCE(page->flags); ++ VM_BUG_ON_PAGE(old_flags & LRU_GEN_MASK, page); ++ ++ new_flags = (old_flags & ~(LRU_GEN_MASK | BIT(PG_active))) | ++ ((gen + 1UL) << LRU_GEN_PGOFF); ++ /* see the comment in evict_lru_gen_pages() */ ++ if (!(old_flags & BIT(PG_referenced))) ++ new_flags &= ~(LRU_USAGE_MASK | LRU_TIER_FLAGS); ++ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); ++ ++ lru_gen_update_size(page, lruvec, -1, gen); ++ if (front) ++ list_add(&page->lru, &lrugen->lists[gen][file][zone]); ++ else ++ list_add_tail(&page->lru, &lrugen->lists[gen][file][zone]); ++ ++ return true; ++} ++ ++/* Delete a page from a list of the multigenerational lru. Return true on success. */ ++static inline bool lru_gen_deletion(struct page *page, struct lruvec *lruvec) ++{ ++ int gen; ++ unsigned long old_flags, new_flags; ++ ++ do { ++ old_flags = READ_ONCE(page->flags); ++ if (!(old_flags & LRU_GEN_MASK)) ++ return false; ++ ++ VM_BUG_ON_PAGE(PageActive(page), page); ++ VM_BUG_ON_PAGE(PageUnevictable(page), page); ++ ++ gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; ++ ++ new_flags = old_flags & ~LRU_GEN_MASK; ++ /* mark page active accordingly */ ++ if (lru_gen_is_active(lruvec, gen)) ++ new_flags |= BIT(PG_active); ++ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); ++ ++ lru_gen_update_size(page, lruvec, gen, -1); ++ list_del(&page->lru); ++ ++ return true; ++} ++ ++/* Return -1 when a page is not on a list of the multigenerational lru. */ ++static inline int page_lru_gen(struct page *page) ++{ ++ return ((READ_ONCE(page->flags) & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; ++} ++ ++#else /* CONFIG_LRU_GEN */ ++ ++static inline bool lru_gen_enabled(void) ++{ ++ return false; ++} ++ ++static inline bool lru_gen_addition(struct page *page, struct lruvec *lruvec, bool front) ++{ ++ return false; ++} ++ ++static inline bool lru_gen_deletion(struct page *page, struct lruvec *lruvec) ++{ ++ return false; ++} ++ ++#endif /* CONFIG_LRU_GEN */ ++ + static __always_inline void add_page_to_lru_list(struct page *page, + struct lruvec *lruvec) + { + enum lru_list lru = page_lru(page); + ++ if (lru_gen_addition(page, lruvec, true)) ++ return; ++ + update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); + list_add(&page->lru, &lruvec->lists[lru]); + } +@@ -93,6 +280,9 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page, + { + enum lru_list lru = page_lru(page); + ++ if (lru_gen_addition(page, lruvec, false)) ++ return; ++ + update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); + list_add_tail(&page->lru, &lruvec->lists[lru]); + } +@@ -100,6 +290,9 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page, + static __always_inline void del_page_from_lru_list(struct page *page, + struct lruvec *lruvec) + { ++ if (lru_gen_deletion(page, lruvec)) ++ return; ++ + list_del(&page->lru); + update_lru_size(lruvec, page_lru(page), page_zonenum(page), + -thp_nr_pages(page)); +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index 47946cec7584..a60c7498afd7 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -293,6 +293,112 @@ enum lruvec_flags { + */ + }; + ++struct lruvec; ++ ++#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) ++#define LRU_USAGE_MASK ((BIT(LRU_USAGE_WIDTH) - 1) << LRU_USAGE_PGOFF) ++ ++#ifdef CONFIG_LRU_GEN ++ ++/* ++ * For each lruvec, evictable pages are divided into multiple generations. The ++ * youngest and the oldest generation numbers, AKA max_seq and min_seq, are ++ * monotonically increasing. The sliding window technique is used to track at ++ * most MAX_NR_GENS and at least MIN_NR_GENS generations. An offset within the ++ * window, AKA gen, indexes an array of per-type and per-zone lists for the ++ * corresponding generation. All pages from this array of lists have gen+1 ++ * stored in page->flags. 0 is reserved to indicate that pages are not on the ++ * lists. ++ */ ++#define MAX_NR_GENS ((unsigned int)CONFIG_NR_LRU_GENS) ++ ++/* ++ * Each generation is then divided into multiple tiers. Tiers represent levels ++ * of usage from file descriptors, i.e., mark_page_accessed(). In contrast to ++ * moving across generations which requires the lru lock, moving across tiers ++ * only involves an atomic operation on page->flags and therefore has a ++ * negligible cost. ++ * ++ * The purposes of tiers are to: ++ * 1) estimate whether pages accessed multiple times via file descriptors are ++ * more active than pages accessed only via page tables by separating the two ++ * access types into upper tiers and the base tier and comparing refault rates ++ * across tiers. ++ * 2) improve buffered io performance by deferring activations of pages ++ * accessed multiple times until the eviction. That is activations happen in ++ * the reclaim path, not the access path. ++ * ++ * Pages accessed N times via file descriptors belong to tier order_base_2(N). ++ * The base tier uses the following page flag: ++ * !PageReferenced() -- readahead pages ++ * PageReferenced() -- single-access pages ++ * All upper tiers use the following page flags: ++ * PageReferenced() && PageWorkingset() -- multi-access pages ++ * in addition to the bits storing N-2 accesses. Therefore, we can support one ++ * upper tier without using additional bits in page->flags. ++ * ++ * Note that ++ * 1) PageWorkingset() is always set for upper tiers because we want to ++ * maintain the existing psi behavior. ++ * 2) !PageReferenced() && PageWorkingset() is not a valid tier. See the ++ * comment in evict_lru_gen_pages(). ++ * 3) pages accessed only via page tables belong to the base tier. ++ * ++ * Pages from the base tier are evicted regardless of the refault rate. Pages ++ * from upper tiers will be moved to the next generation, if their refault rates ++ * are higher than that of the base tier. ++ */ ++#define MAX_NR_TIERS ((unsigned int)CONFIG_TIERS_PER_GEN) ++#define LRU_TIER_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) ++#define LRU_USAGE_SHIFT (CONFIG_TIERS_PER_GEN - 1) ++ ++/* Whether to keep historical stats for each generation. */ ++#ifdef CONFIG_LRU_GEN_STATS ++#define NR_STAT_GENS ((unsigned int)CONFIG_NR_LRU_GENS) ++#else ++#define NR_STAT_GENS 1U ++#endif ++ ++struct lrugen { ++ /* the aging increments the max generation number */ ++ unsigned long max_seq; ++ /* the eviction increments the min generation numbers */ ++ unsigned long min_seq[ANON_AND_FILE]; ++ /* the birth time of each generation in jiffies */ ++ unsigned long timestamps[MAX_NR_GENS]; ++ /* the lists of the multigenerational lru */ ++ struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; ++ /* the sizes of the multigenerational lru in pages */ ++ unsigned long sizes[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; ++ /* to determine which type and its tiers to evict */ ++ atomic_long_t evicted[NR_STAT_GENS][ANON_AND_FILE][MAX_NR_TIERS]; ++ atomic_long_t refaulted[NR_STAT_GENS][ANON_AND_FILE][MAX_NR_TIERS]; ++ /* the base tier is inactive and won't be activated */ ++ unsigned long activated[NR_STAT_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1]; ++ /* arithmetic mean weighted by geometric series 1/2, 1/4, ... */ ++ unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS]; ++ unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS]; ++ /* reclaim priority to compare across memcgs */ ++ atomic_t priority; ++ /* whether the multigenerational lru is enabled */ ++ bool enabled[ANON_AND_FILE]; ++}; ++ ++void lru_gen_init_lruvec(struct lruvec *lruvec); ++void lru_gen_set_state(bool enable, bool main, bool swap); ++ ++#else /* CONFIG_LRU_GEN */ ++ ++static inline void lru_gen_init_lruvec(struct lruvec *lruvec) ++{ ++} ++ ++static inline void lru_gen_set_state(bool enable, bool main, bool swap) ++{ ++} ++ ++#endif /* CONFIG_LRU_GEN */ ++ + struct lruvec { + struct list_head lists[NR_LRU_LISTS]; + /* per lruvec lru_lock for memcg */ +@@ -310,6 +416,10 @@ struct lruvec { + unsigned long refaults[ANON_AND_FILE]; + /* Various lruvec state flags (enum lruvec_flags) */ + unsigned long flags; ++#ifdef CONFIG_LRU_GEN ++ /* unevictable pages are on LRU_UNEVICTABLE */ ++ struct lrugen evictable; ++#endif + #ifdef CONFIG_MEMCG + struct pglist_data *pgdat; + #endif +diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h +index 7d4ec26d8a3e..df83aaec8498 100644 +--- a/include/linux/page-flags-layout.h ++++ b/include/linux/page-flags-layout.h +@@ -24,6 +24,17 @@ + #error ZONES_SHIFT -- too many zones configured adjust calculation + #endif + ++#ifdef CONFIG_LRU_GEN ++/* ++ * LRU_GEN_WIDTH is generated from order_base_2(CONFIG_NR_LRU_GENS + 1). And the ++ * comment on MAX_NR_TIERS explains why we offset by 2 here. ++ */ ++#define LRU_USAGE_WIDTH (CONFIG_TIERS_PER_GEN - 2) ++#else ++#define LRU_GEN_WIDTH 0 ++#define LRU_USAGE_WIDTH 0 ++#endif ++ + #ifdef CONFIG_SPARSEMEM + #include + +@@ -56,7 +67,8 @@ + + #define ZONES_WIDTH ZONES_SHIFT + +-#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS ++#if SECTIONS_WIDTH+ZONES_WIDTH+LRU_GEN_WIDTH+LRU_USAGE_WIDTH+NODES_SHIFT \ ++ <= BITS_PER_LONG - NR_PAGEFLAGS + #define NODES_WIDTH NODES_SHIFT + #else + #ifdef CONFIG_SPARSEMEM_VMEMMAP +@@ -83,14 +95,16 @@ + #define KASAN_TAG_WIDTH 0 + #endif + +-#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT+KASAN_TAG_WIDTH \ ++#if SECTIONS_WIDTH+ZONES_WIDTH+LRU_GEN_WIDTH+LRU_USAGE_WIDTH+ \ ++ NODES_WIDTH+KASAN_TAG_WIDTH+LAST_CPUPID_SHIFT \ + <= BITS_PER_LONG - NR_PAGEFLAGS + #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT + #else + #define LAST_CPUPID_WIDTH 0 + #endif + +-#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH+LAST_CPUPID_WIDTH+KASAN_TAG_WIDTH \ ++#if SECTIONS_WIDTH+ZONES_WIDTH+LRU_GEN_WIDTH+LRU_USAGE_WIDTH+ \ ++ NODES_WIDTH+KASAN_TAG_WIDTH+LAST_CPUPID_WIDTH \ + > BITS_PER_LONG - NR_PAGEFLAGS + #error "Not enough bits in page flags" + #endif +diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h +index 04a34c08e0a6..e58984fca32a 100644 +--- a/include/linux/page-flags.h ++++ b/include/linux/page-flags.h +@@ -817,7 +817,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) + 1UL << PG_private | 1UL << PG_private_2 | \ + 1UL << PG_writeback | 1UL << PG_reserved | \ + 1UL << PG_slab | 1UL << PG_active | \ +- 1UL << PG_unevictable | __PG_MLOCKED) ++ 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK) + + /* + * Flags checked when a page is prepped for return by the page allocator. +@@ -828,7 +828,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) + * alloc-free cycle to prevent from reusing the page. + */ + #define PAGE_FLAGS_CHECK_AT_PREP \ +- (((1UL << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON) ++ ((((1UL << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_USAGE_MASK) + + #define PAGE_FLAGS_PRIVATE \ + (1UL << PG_private | 1UL << PG_private_2) +diff --git a/kernel/bounds.c b/kernel/bounds.c +index 9795d75b09b2..a8cbf2d0b11a 100644 +--- a/kernel/bounds.c ++++ b/kernel/bounds.c +@@ -22,6 +22,12 @@ int main(void) + DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); + #endif + DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); ++#ifdef CONFIG_LRU_GEN ++ /* bits needed to represent internal values stored in page->flags */ ++ DEFINE(LRU_GEN_WIDTH, order_base_2(CONFIG_NR_LRU_GENS + 1)); ++ /* bits needed to represent normalized values for external uses */ ++ DEFINE(LRU_GEN_SHIFT, order_base_2(CONFIG_NR_LRU_GENS)); ++#endif + /* End of constants */ + + return 0; +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index ae907a9c2050..26d3cc4a7a0b 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2418,7 +2418,8 @@ static void __split_huge_page_tail(struct page *head, int tail, + #ifdef CONFIG_64BIT + (1L << PG_arch_2) | + #endif +- (1L << PG_dirty))); ++ (1L << PG_dirty) | ++ LRU_GEN_MASK | LRU_USAGE_MASK)); + + /* ->mapping in first tail page is compound_mapcount */ + VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, +diff --git a/mm/mm_init.c b/mm/mm_init.c +index 8e02e865cc65..6303ed7aa511 100644 +--- a/mm/mm_init.c ++++ b/mm/mm_init.c +@@ -71,27 +71,33 @@ void __init mminit_verify_pageflags_layout(void) + width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH + - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH; + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths", +- "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n", ++ "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d lru gen %d tier %d Flags %d\n", + SECTIONS_WIDTH, + NODES_WIDTH, + ZONES_WIDTH, + LAST_CPUPID_WIDTH, + KASAN_TAG_WIDTH, ++ LRU_GEN_WIDTH, ++ LRU_USAGE_WIDTH, + NR_PAGEFLAGS); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts", +- "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n", ++ "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d lru gen %d tier %d\n", + SECTIONS_SHIFT, + NODES_SHIFT, + ZONES_SHIFT, + LAST_CPUPID_SHIFT, +- KASAN_TAG_WIDTH); ++ KASAN_TAG_WIDTH, ++ LRU_GEN_WIDTH, ++ LRU_USAGE_WIDTH); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_pgshifts", +- "Section %lu Node %lu Zone %lu Lastcpupid %lu Kasantag %lu\n", ++ "Section %lu Node %lu Zone %lu Lastcpupid %lu Kasantag %lu lru gen %lu tier %lu\n", + (unsigned long)SECTIONS_PGSHIFT, + (unsigned long)NODES_PGSHIFT, + (unsigned long)ZONES_PGSHIFT, + (unsigned long)LAST_CPUPID_PGSHIFT, +- (unsigned long)KASAN_TAG_PGSHIFT); ++ (unsigned long)KASAN_TAG_PGSHIFT, ++ (unsigned long)LRU_GEN_PGOFF, ++ (unsigned long)LRU_USAGE_PGOFF); + mminit_dprintk(MMINIT_TRACE, "pageflags_layout_nodezoneid", + "Node/Zone ID: %lu -> %lu\n", + (unsigned long)(ZONEID_PGOFF + ZONEID_SHIFT), +diff --git a/mm/mmzone.c b/mm/mmzone.c +index eb89d6e018e2..2ec0d7793424 100644 +--- a/mm/mmzone.c ++++ b/mm/mmzone.c +@@ -81,6 +81,8 @@ void lruvec_init(struct lruvec *lruvec) + + for_each_lru(lru) + INIT_LIST_HEAD(&lruvec->lists[lru]); ++ ++ lru_gen_init_lruvec(lruvec); + } + + #if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) +diff --git a/mm/swapfile.c b/mm/swapfile.c +index 084a5b9a18e5..c6041d10a73a 100644 +--- a/mm/swapfile.c ++++ b/mm/swapfile.c +@@ -2702,6 +2702,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) + err = 0; + atomic_inc(&proc_poll_event); + wake_up_interruptible(&proc_poll_wait); ++ /* stop tracking anon if the multigenerational lru is enabled */ ++ lru_gen_set_state(false, false, true); + + out_dput: + filp_close(victim, NULL); +@@ -3348,6 +3350,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) + mutex_unlock(&swapon_mutex); + atomic_inc(&proc_poll_event); + wake_up_interruptible(&proc_poll_wait); ++ /* start tracking anon if the multigenerational lru is enabled */ ++ lru_gen_set_state(true, false, true); + + error = 0; + goto out; +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 1a24d2e0a4cb..8559bb94d452 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -49,6 +49,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -4314,3 +4315,307 @@ void check_move_unevictable_pages(struct pagevec *pvec) + } + } + EXPORT_SYMBOL_GPL(check_move_unevictable_pages); ++ ++#ifdef CONFIG_LRU_GEN ++ ++/* ++ * After pages are faulted in, the aging must scan them twice before the ++ * eviction can. The first scan clears the accessed bit set during initial ++ * faults. And the second scan makes sure they haven't been used since the ++ * first. ++ */ ++#define MIN_NR_GENS 2 ++ ++#define MAX_BATCH_SIZE 8192 ++ ++/****************************************************************************** ++ * shorthand helpers ++ ******************************************************************************/ ++ ++#define DEFINE_MAX_SEQ() \ ++ unsigned long max_seq = READ_ONCE(lruvec->evictable.max_seq) ++ ++#define DEFINE_MIN_SEQ() \ ++ unsigned long min_seq[ANON_AND_FILE] = { \ ++ READ_ONCE(lruvec->evictable.min_seq[0]), \ ++ READ_ONCE(lruvec->evictable.min_seq[1]), \ ++ } ++ ++#define for_each_type_zone(file, zone) \ ++ for ((file) = 0; (file) < ANON_AND_FILE; (file)++) \ ++ for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++) ++ ++#define for_each_gen_type_zone(gen, file, zone) \ ++ for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \ ++ for ((file) = 0; (file) < ANON_AND_FILE; (file)++) \ ++ for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++) ++ ++static int get_nr_gens(struct lruvec *lruvec, int file) ++{ ++ return lruvec->evictable.max_seq - lruvec->evictable.min_seq[file] + 1; ++} ++ ++static int min_nr_gens(unsigned long max_seq, unsigned long *min_seq, int swappiness) ++{ ++ return max_seq - max(min_seq[!swappiness], min_seq[1]) + 1; ++} ++ ++static int max_nr_gens(unsigned long max_seq, unsigned long *min_seq, int swappiness) ++{ ++ return max_seq - min(min_seq[!swappiness], min_seq[1]) + 1; ++} ++ ++static bool __maybe_unused seq_is_valid(struct lruvec *lruvec) ++{ ++ lockdep_assert_held(&lruvec->lru_lock); ++ ++ return get_nr_gens(lruvec, 0) >= MIN_NR_GENS && ++ get_nr_gens(lruvec, 0) <= MAX_NR_GENS && ++ get_nr_gens(lruvec, 1) >= MIN_NR_GENS && ++ get_nr_gens(lruvec, 1) <= MAX_NR_GENS; ++} ++ ++/****************************************************************************** ++ * state change ++ ******************************************************************************/ ++ ++#ifdef CONFIG_LRU_GEN_ENABLED ++DEFINE_STATIC_KEY_TRUE(lru_gen_static_key); ++#else ++DEFINE_STATIC_KEY_FALSE(lru_gen_static_key); ++#endif ++ ++static DEFINE_MUTEX(lru_gen_state_mutex); ++static int lru_gen_nr_swapfiles __read_mostly; ++ ++static bool __maybe_unused state_is_valid(struct lruvec *lruvec) ++{ ++ int gen, file, zone; ++ enum lru_list lru; ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ for_each_evictable_lru(lru) { ++ file = is_file_lru(lru); ++ ++ if (lrugen->enabled[file] && !list_empty(&lruvec->lists[lru])) ++ return false; ++ } ++ ++ for_each_gen_type_zone(gen, file, zone) { ++ if (!lrugen->enabled[file] && !list_empty(&lrugen->lists[gen][file][zone])) ++ return false; ++ ++ VM_WARN_ONCE(!lrugen->enabled[file] && lrugen->sizes[gen][file][zone], ++ "lru_gen: possible unbalanced number of pages"); ++ } ++ ++ return true; ++} ++ ++static bool fill_lru_gen_lists(struct lruvec *lruvec) ++{ ++ enum lru_list lru; ++ int batch_size = 0; ++ ++ for_each_evictable_lru(lru) { ++ int file = is_file_lru(lru); ++ bool active = is_active_lru(lru); ++ struct list_head *head = &lruvec->lists[lru]; ++ ++ if (!lruvec->evictable.enabled[file]) ++ continue; ++ ++ while (!list_empty(head)) { ++ bool success; ++ struct page *page = lru_to_page(head); ++ ++ VM_BUG_ON_PAGE(PageTail(page), page); ++ VM_BUG_ON_PAGE(PageUnevictable(page), page); ++ VM_BUG_ON_PAGE(PageActive(page) != active, page); ++ VM_BUG_ON_PAGE(page_lru_gen(page) != -1, page); ++ VM_BUG_ON_PAGE(page_is_file_lru(page) != file, page); ++ ++ prefetchw_prev_lru_page(page, head, flags); ++ ++ del_page_from_lru_list(page, lruvec); ++ success = lru_gen_addition(page, lruvec, true); ++ VM_BUG_ON(!success); ++ ++ if (++batch_size == MAX_BATCH_SIZE) ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++static bool drain_lru_gen_lists(struct lruvec *lruvec) ++{ ++ int gen, file, zone; ++ int batch_size = 0; ++ ++ for_each_gen_type_zone(gen, file, zone) { ++ struct list_head *head = &lruvec->evictable.lists[gen][file][zone]; ++ ++ if (lruvec->evictable.enabled[file]) ++ continue; ++ ++ while (!list_empty(head)) { ++ bool success; ++ struct page *page = lru_to_page(head); ++ ++ VM_BUG_ON_PAGE(PageTail(page), page); ++ VM_BUG_ON_PAGE(PageUnevictable(page), page); ++ VM_BUG_ON_PAGE(PageActive(page), page); ++ VM_BUG_ON_PAGE(page_is_file_lru(page) != file, page); ++ VM_BUG_ON_PAGE(page_zonenum(page) != zone, page); ++ ++ prefetchw_prev_lru_page(page, head, flags); ++ ++ success = lru_gen_deletion(page, lruvec); ++ VM_BUG_ON(!success); ++ add_page_to_lru_list(page, lruvec); ++ ++ if (++batch_size == MAX_BATCH_SIZE) ++ return false; ++ } ++ } ++ ++ return true; ++} ++ ++/* ++ * For file page tracking, we enable/disable it according to the main switch. ++ * For anon page tracking, we only enabled it when the main switch is on and ++ * there is at least one swapfile; we disable it when there are no swapfiles ++ * regardless of the value of the main switch. Otherwise, we will eventually ++ * reach the max size of the sliding window and have to call inc_min_seq(), ++ * which brings an unnecessary overhead. ++ */ ++void lru_gen_set_state(bool enable, bool main, bool swap) ++{ ++ struct mem_cgroup *memcg; ++ ++ mem_hotplug_begin(); ++ mutex_lock(&lru_gen_state_mutex); ++ cgroup_lock(); ++ ++ main = main && enable != lru_gen_enabled(); ++ swap = swap && !(enable ? lru_gen_nr_swapfiles++ : --lru_gen_nr_swapfiles); ++ swap = swap && lru_gen_enabled(); ++ if (!main && !swap) ++ goto unlock; ++ ++ if (main) { ++ if (enable) ++ static_branch_enable(&lru_gen_static_key); ++ else ++ static_branch_disable(&lru_gen_static_key); ++ } ++ ++ memcg = mem_cgroup_iter(NULL, NULL, NULL); ++ do { ++ int nid; ++ ++ for_each_node_state(nid, N_MEMORY) { ++ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ spin_lock_irq(&lruvec->lru_lock); ++ ++ VM_BUG_ON(!seq_is_valid(lruvec)); ++ VM_BUG_ON(!state_is_valid(lruvec)); ++ ++ WRITE_ONCE(lrugen->enabled[0], lru_gen_enabled() && lru_gen_nr_swapfiles); ++ WRITE_ONCE(lrugen->enabled[1], lru_gen_enabled()); ++ ++ while (!(enable ? fill_lru_gen_lists(lruvec) : ++ drain_lru_gen_lists(lruvec))) { ++ spin_unlock_irq(&lruvec->lru_lock); ++ cond_resched(); ++ spin_lock_irq(&lruvec->lru_lock); ++ } ++ ++ spin_unlock_irq(&lruvec->lru_lock); ++ } ++ ++ cond_resched(); ++ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); ++unlock: ++ cgroup_unlock(); ++ mutex_unlock(&lru_gen_state_mutex); ++ mem_hotplug_done(); ++} ++ ++static int __meminit __maybe_unused lru_gen_online_mem(struct notifier_block *self, ++ unsigned long action, void *arg) ++{ ++ struct mem_cgroup *memcg; ++ struct memory_notify *mnb = arg; ++ int nid = mnb->status_change_nid; ++ ++ if (action != MEM_GOING_ONLINE || nid == NUMA_NO_NODE) ++ return NOTIFY_DONE; ++ ++ mutex_lock(&lru_gen_state_mutex); ++ cgroup_lock(); ++ ++ memcg = mem_cgroup_iter(NULL, NULL, NULL); ++ do { ++ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ VM_BUG_ON(!seq_is_valid(lruvec)); ++ VM_BUG_ON(!state_is_valid(lruvec)); ++ ++ WRITE_ONCE(lrugen->enabled[0], lru_gen_enabled() && lru_gen_nr_swapfiles); ++ WRITE_ONCE(lrugen->enabled[1], lru_gen_enabled()); ++ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); ++ ++ cgroup_unlock(); ++ mutex_unlock(&lru_gen_state_mutex); ++ ++ return NOTIFY_DONE; ++} ++ ++/****************************************************************************** ++ * initialization ++ ******************************************************************************/ ++ ++void lru_gen_init_lruvec(struct lruvec *lruvec) ++{ ++ int i; ++ int gen, file, zone; ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ atomic_set(&lrugen->priority, DEF_PRIORITY); ++ ++ lrugen->max_seq = MIN_NR_GENS + 1; ++ lrugen->enabled[0] = lru_gen_enabled() && lru_gen_nr_swapfiles; ++ lrugen->enabled[1] = lru_gen_enabled(); ++ ++ for (i = 0; i <= MIN_NR_GENS + 1; i++) ++ lrugen->timestamps[i] = jiffies; ++ ++ for_each_gen_type_zone(gen, file, zone) ++ INIT_LIST_HEAD(&lrugen->lists[gen][file][zone]); ++} ++ ++static int __init init_lru_gen(void) ++{ ++ BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS); ++ BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS); ++ ++ if (hotplug_memory_notifier(lru_gen_online_mem, 0)) ++ pr_err("lru_gen: failed to subscribe hotplug notifications\n"); ++ ++ return 0; ++}; ++/* ++ * We want to run as early as possible because some debug code, e.g., ++ * dma_resv_lockdep(), calls mm_alloc() and mmput(). We only depend on mm_kobj, ++ * which is initialized one stage earlier. ++ */ ++arch_initcall(init_lru_gen); ++ ++#endif /* CONFIG_LRU_GEN */ +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-09-16-mm-multigenerational-lru-activation.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-09-16-mm-multigenerational-lru-activation.patch new file mode 100644 index 0000000..1783bba --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-09-16-mm-multigenerational-lru-activation.patch @@ -0,0 +1,940 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id EF4FEC43462 + for ; Tue, 13 Apr 2021 06:57:18 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id CFA6161278 + for ; Tue, 13 Apr 2021 06:57:18 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S1345133AbhDMG5g (ORCPT + ); + Tue, 13 Apr 2021 02:57:36 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44204 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S1345075AbhDMG5O (ORCPT + ); + Tue, 13 Apr 2021 02:57:14 -0400 +Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C1B27C061342 + for ; Mon, 12 Apr 2021 23:56:54 -0700 (PDT) +Received: by mail-yb1-xb49.google.com with SMTP id g7so15243258ybm.13 + for ; Mon, 12 Apr 2021 23:56:54 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=o5Jou7hUitprbLWSkwF9m0rzlQtpjYePVUNvL8744B4=; + b=j0OnRRuICsaUkKDFgMmxVB6XdLNdlw7bkERy4WEKt8hjBSvD+Kp0+iOIcFy8N7824S + fiIZT/4kse0kGwqLNz6aT5fmfZX9JxxYEdOVwlR/Ws0MZO827eTQkQKIlfbqh7xkc4GT + TA7uVRsWqbOXCZgWt9zOAQjOZb/rs2P9QMKUlOFvfucJY2YuTWnwAyhKKGoanMVjppPe + XiDsyf+xl36l8HZCKTFf1nC3jlDQYELifqMsU7LnJQvyp4qL2Ghw5qGYALRz1HLWn1HT + nDo94se9xqkySvHWr7K7F6f3bxkPeLasd/CUo3jf80RHfUmgLwPgfJh9UGJtXbKnz7fZ + QiIQ== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=o5Jou7hUitprbLWSkwF9m0rzlQtpjYePVUNvL8744B4=; + b=GyMzG4Y9CRlIQTVJmAqzu40iDf9Ip5RESHdeLQAYm+tiJUh2RGVBJa6vKg38UMcgXC + EphRx2fv2WzLbuzG3KYV63fQ6mVN44J7Q5DZllmGANTY0ulI4ONN6upN04OPR+6Py8nD + thVg9bECRFbbKis2TNfSLXbGoO0/p8IfhjTpTAY+/gcDlXuuEwdN42+F5w+mKC73Ybd4 + YzMfYRrVWHdmd49KirIiJ2yKVwsTTFfOgJlsRhMjIxnKiDO88ZiQPXOhSThi9Pq3d4xZ + AKWIylGhQNKmESlmvpmEzuo3lhpofz6NtP61MD5kogRHKN8cOrfEwHfr81CTzg1JSAjQ + d+PQ== +X-Gm-Message-State: AOAM530BBghVYsHEGPHYaVOEjeRU+Fi6DhCLAJz+E/4KNkH046B//NxP + jRpr98Lw0DozCkFBmdQ3Y2SqfxcTm/k= +X-Google-Smtp-Source: ABdhPJw4gIvDWjMb3eWqmdPfHBjM8mpzIQ6uMlcwopqsTVyafHAw8KFn3kdXyj3+PrOeIymH0kmLZduE+GQ= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a5b:f51:: with SMTP id y17mr7630772ybr.398.1618297013927; + Mon, 12 Apr 2021 23:56:53 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:26 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-10-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 09/16] mm: multigenerational lru: activation +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +For pages accessed multiple times via file descriptors, instead of +activating them upon the second accesses, we activate them based on +the refault rates of their tiers. Pages accessed N times via file +descriptors belong to tier order_base_2(N). Pages from tier 0, i.e., +those read ahead, accessed once via file descriptors and accessed only +via page tables, are evicted regardless of the refault rate. Pages +from other tiers will be moved to the next generation, i.e., +activated, if the refault rates of their tiers are higher than that of +tier 0. Each generation contains at most MAX_NR_TIERS tiers, and they +require additional MAX_NR_TIERS-2 bits in page->flags. This feedback +model has a few advantages over the current feedforward model: + 1) It has a negligible overhead in the access path because + activations are done in the reclaim path. + 2) It takes mapped pages into account and avoids overprotecting + pages accessed multiple times via file descriptors. + 3) More tiers offer better protection to pages accessed more than + twice when buffered-I/O-intensive workloads are under memory + pressure. + +For pages mapped upon page faults, the accessed bit is set and they +must be properly aged. We add them to the per-zone lists index by +max_seq, i.e., the youngest generation. For pages not in page cache +or swap cache, this can be done easily in the page fault path: we +rename lru_cache_add_inactive_or_unevictable() to +lru_cache_add_page_vma() and add a new parameter, which is set to true +for pages mapped upon page faults. For pages in page cache or swap +cache, we cannot differentiate the page fault path from the read ahead +path at the time we call lru_cache_add() in add_to_page_cache_lru() +and __read_swap_cache_async(). So we add a new function +lru_gen_activation(), which is essentially activate_page(), to move +pages to the per-zone lists indexed by max_seq at a later time. +Hopefully we would find those pages in lru_pvecs.lru_add and simply +set PageActive() on them without having to actually move them. + +Finally, we need to be compatible with the existing notion of active +and inactive. We cannot use PageActive() because it is not set on +active pages unless they are isolated, in order to spare the aging the +trouble of clearing it when an active generation becomes inactive. A +new function page_is_active() compares the generation number of a page +with max_seq and max_seq-1 (modulo MAX_NR_GENS), which are considered +active and protected from the eviction. Other generations, which may +or may not exist, are considered inactive. + +Signed-off-by: Yu Zhao +--- + fs/proc/task_mmu.c | 3 +- + include/linux/mm_inline.h | 101 +++++++++++++++++++++ + include/linux/swap.h | 4 +- + kernel/events/uprobes.c | 2 +- + mm/huge_memory.c | 2 +- + mm/khugepaged.c | 2 +- + mm/memory.c | 14 +-- + mm/migrate.c | 2 +- + mm/swap.c | 26 +++--- + mm/swapfile.c | 2 +- + mm/userfaultfd.c | 2 +- + mm/vmscan.c | 91 ++++++++++++++++++- + mm/workingset.c | 179 +++++++++++++++++++++++++++++++------- + 13 files changed, 371 insertions(+), 59 deletions(-) + +diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c +index e862cab69583..d292f20c4e3d 100644 +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -1718,7 +1719,7 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty, + if (PageSwapCache(page)) + md->swapcache += nr_pages; + +- if (PageActive(page) || PageUnevictable(page)) ++ if (PageUnevictable(page) || page_is_active(compound_head(page), NULL)) + md->active += nr_pages; + + if (PageWriteback(page)) +diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h +index 2bf910eb3dd7..5eb4b12972ec 100644 +--- a/include/linux/mm_inline.h ++++ b/include/linux/mm_inline.h +@@ -95,6 +95,12 @@ static inline int lru_gen_from_seq(unsigned long seq) + return seq % MAX_NR_GENS; + } + ++/* Convert the level of usage to a tier. See the comment on MAX_NR_TIERS. */ ++static inline int lru_tier_from_usage(int usage) ++{ ++ return order_base_2(usage + 1); ++} ++ + /* Return a proper index regardless whether we keep a full history of stats. */ + static inline int sid_from_seq_or_gen(int seq_or_gen) + { +@@ -238,12 +244,93 @@ static inline bool lru_gen_deletion(struct page *page, struct lruvec *lruvec) + return true; + } + ++/* Activate a page from page cache or swap cache after it's mapped. */ ++static inline void lru_gen_activation(struct page *page, struct vm_area_struct *vma) ++{ ++ if (!lru_gen_enabled()) ++ return; ++ ++ if (PageActive(page) || PageUnevictable(page) || vma_is_dax(vma) || ++ (vma->vm_flags & (VM_LOCKED | VM_SPECIAL))) ++ return; ++ /* ++ * TODO: pass vm_fault to add_to_page_cache_lru() and ++ * __read_swap_cache_async() so they can activate pages directly when in ++ * the page fault path. ++ */ ++ activate_page(page); ++} ++ + /* Return -1 when a page is not on a list of the multigenerational lru. */ + static inline int page_lru_gen(struct page *page) + { + return ((READ_ONCE(page->flags) & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; + } + ++/* This function works regardless whether the multigenerational lru is enabled. */ ++static inline bool page_is_active(struct page *page, struct lruvec *lruvec) ++{ ++ struct mem_cgroup *memcg; ++ int gen = page_lru_gen(page); ++ bool active = false; ++ ++ VM_BUG_ON_PAGE(PageTail(page), page); ++ ++ if (gen < 0) ++ return PageActive(page); ++ ++ if (lruvec) { ++ VM_BUG_ON_PAGE(PageUnevictable(page), page); ++ VM_BUG_ON_PAGE(PageActive(page), page); ++ lockdep_assert_held(&lruvec->lru_lock); ++ ++ return lru_gen_is_active(lruvec, gen); ++ } ++ ++ rcu_read_lock(); ++ ++ memcg = page_memcg_rcu(page); ++ lruvec = mem_cgroup_lruvec(memcg, page_pgdat(page)); ++ active = lru_gen_is_active(lruvec, gen); ++ ++ rcu_read_unlock(); ++ ++ return active; ++} ++ ++/* Return the level of usage of a page. See the comment on MAX_NR_TIERS. */ ++static inline int page_tier_usage(struct page *page) ++{ ++ unsigned long flags = READ_ONCE(page->flags); ++ ++ return flags & BIT(PG_workingset) ? ++ ((flags & LRU_USAGE_MASK) >> LRU_USAGE_PGOFF) + 1 : 0; ++} ++ ++/* Increment the usage counter after a page is accessed via file descriptors. */ ++static inline bool page_inc_usage(struct page *page) ++{ ++ unsigned long old_flags, new_flags; ++ ++ if (!lru_gen_enabled()) ++ return PageActive(page); ++ ++ do { ++ old_flags = READ_ONCE(page->flags); ++ ++ if (!(old_flags & BIT(PG_workingset))) ++ new_flags = old_flags | BIT(PG_workingset); ++ else ++ new_flags = (old_flags & ~LRU_USAGE_MASK) | min(LRU_USAGE_MASK, ++ (old_flags & LRU_USAGE_MASK) + BIT(LRU_USAGE_PGOFF)); ++ ++ if (old_flags == new_flags) ++ break; ++ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); ++ ++ return true; ++} ++ + #else /* CONFIG_LRU_GEN */ + + static inline bool lru_gen_enabled(void) +@@ -261,6 +348,20 @@ static inline bool lru_gen_deletion(struct page *page, struct lruvec *lruvec) + return false; + } + ++static inline void lru_gen_activation(struct page *page, struct vm_area_struct *vma) ++{ ++} ++ ++static inline bool page_is_active(struct page *page, struct lruvec *lruvec) ++{ ++ return PageActive(page); ++} ++ ++static inline bool page_inc_usage(struct page *page) ++{ ++ return PageActive(page); ++} ++ + #endif /* CONFIG_LRU_GEN */ + + static __always_inline void add_page_to_lru_list(struct page *page, +diff --git a/include/linux/swap.h b/include/linux/swap.h +index de2bbbf181ba..0e7532c7db22 100644 +--- a/include/linux/swap.h ++++ b/include/linux/swap.h +@@ -350,8 +350,8 @@ extern void deactivate_page(struct page *page); + extern void mark_page_lazyfree(struct page *page); + extern void swap_setup(void); + +-extern void lru_cache_add_inactive_or_unevictable(struct page *page, +- struct vm_area_struct *vma); ++extern void lru_cache_add_page_vma(struct page *page, struct vm_area_struct *vma, ++ bool faulting); + + /* linux/mm/vmscan.c */ + extern unsigned long zone_reclaimable_pages(struct zone *zone); +diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c +index 6addc9780319..4e93e5602723 100644 +--- a/kernel/events/uprobes.c ++++ b/kernel/events/uprobes.c +@@ -184,7 +184,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, + if (new_page) { + get_page(new_page); + page_add_new_anon_rmap(new_page, vma, addr, false); +- lru_cache_add_inactive_or_unevictable(new_page, vma); ++ lru_cache_add_page_vma(new_page, vma, false); + } else + /* no new page, just dec_mm_counter for old_page */ + dec_mm_counter(mm, MM_ANONPAGES); +diff --git a/mm/huge_memory.c b/mm/huge_memory.c +index 26d3cc4a7a0b..2cf46270c84b 100644 +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -637,7 +637,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, + entry = mk_huge_pmd(page, vma->vm_page_prot); + entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); + page_add_new_anon_rmap(page, vma, haddr, true); +- lru_cache_add_inactive_or_unevictable(page, vma); ++ lru_cache_add_page_vma(page, vma, true); + pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); + set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); + update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); +diff --git a/mm/khugepaged.c b/mm/khugepaged.c +index a7d6cb912b05..08a43910f232 100644 +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -1199,7 +1199,7 @@ static void collapse_huge_page(struct mm_struct *mm, + spin_lock(pmd_ptl); + BUG_ON(!pmd_none(*pmd)); + page_add_new_anon_rmap(new_page, vma, address, true); +- lru_cache_add_inactive_or_unevictable(new_page, vma); ++ lru_cache_add_page_vma(new_page, vma, true); + pgtable_trans_huge_deposit(mm, pmd, pgtable); + set_pmd_at(mm, address, pmd, _pmd); + update_mmu_cache_pmd(vma, address, pmd); +diff --git a/mm/memory.c b/mm/memory.c +index 550405fc3b5e..9a6cb6d31430 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -73,6 +73,7 @@ + #include + #include + #include ++#include + + #include + +@@ -839,7 +840,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma + copy_user_highpage(new_page, page, addr, src_vma); + __SetPageUptodate(new_page); + page_add_new_anon_rmap(new_page, dst_vma, addr, false); +- lru_cache_add_inactive_or_unevictable(new_page, dst_vma); ++ lru_cache_add_page_vma(new_page, dst_vma, false); + rss[mm_counter(new_page)]++; + + /* All done, just insert the new page copy in the child */ +@@ -2907,7 +2908,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) + */ + ptep_clear_flush_notify(vma, vmf->address, vmf->pte); + page_add_new_anon_rmap(new_page, vma, vmf->address, false); +- lru_cache_add_inactive_or_unevictable(new_page, vma); ++ lru_cache_add_page_vma(new_page, vma, true); + /* + * We call the notify macro here because, when using secondary + * mmu page tables (such as kvm shadow page tables), we want the +@@ -3438,9 +3439,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) + /* ksm created a completely new copy */ + if (unlikely(page != swapcache && swapcache)) { + page_add_new_anon_rmap(page, vma, vmf->address, false); +- lru_cache_add_inactive_or_unevictable(page, vma); ++ lru_cache_add_page_vma(page, vma, true); + } else { + do_page_add_anon_rmap(page, vma, vmf->address, exclusive); ++ lru_gen_activation(page, vma); + } + + swap_free(entry); +@@ -3584,7 +3586,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) + + inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + page_add_new_anon_rmap(page, vma, vmf->address, false); +- lru_cache_add_inactive_or_unevictable(page, vma); ++ lru_cache_add_page_vma(page, vma, true); + setpte: + set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); + +@@ -3709,6 +3711,7 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) + + add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR); + page_add_file_rmap(page, true); ++ lru_gen_activation(page, vma); + /* + * deposit and withdraw with pmd lock held + */ +@@ -3752,10 +3755,11 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) + if (write && !(vma->vm_flags & VM_SHARED)) { + inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + page_add_new_anon_rmap(page, vma, addr, false); +- lru_cache_add_inactive_or_unevictable(page, vma); ++ lru_cache_add_page_vma(page, vma, true); + } else { + inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); + page_add_file_rmap(page, false); ++ lru_gen_activation(page, vma); + } + set_pte_at(vma->vm_mm, addr, vmf->pte, entry); + } +diff --git a/mm/migrate.c b/mm/migrate.c +index 62b81d5257aa..1064b03cac33 100644 +--- a/mm/migrate.c ++++ b/mm/migrate.c +@@ -3004,7 +3004,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, + inc_mm_counter(mm, MM_ANONPAGES); + page_add_new_anon_rmap(page, vma, addr, false); + if (!is_zone_device_page(page)) +- lru_cache_add_inactive_or_unevictable(page, vma); ++ lru_cache_add_page_vma(page, vma, false); + get_page(page); + + if (flush) { +diff --git a/mm/swap.c b/mm/swap.c +index f20ed56ebbbf..d6458ee1e9f8 100644 +--- a/mm/swap.c ++++ b/mm/swap.c +@@ -306,7 +306,7 @@ void lru_note_cost_page(struct page *page) + + static void __activate_page(struct page *page, struct lruvec *lruvec) + { +- if (!PageActive(page) && !PageUnevictable(page)) { ++ if (!PageUnevictable(page) && !page_is_active(page, lruvec)) { + int nr_pages = thp_nr_pages(page); + + del_page_from_lru_list(page, lruvec); +@@ -337,7 +337,7 @@ static bool need_activate_page_drain(int cpu) + static void activate_page_on_lru(struct page *page) + { + page = compound_head(page); +- if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { ++ if (PageLRU(page) && !PageUnevictable(page) && !page_is_active(page, NULL)) { + struct pagevec *pvec; + + local_lock(&lru_pvecs.lock); +@@ -431,7 +431,7 @@ void mark_page_accessed(struct page *page) + * this list is never rotated or maintained, so marking an + * evictable page accessed has no effect. + */ +- } else if (!PageActive(page)) { ++ } else if (!page_inc_usage(page)) { + activate_page(page); + ClearPageReferenced(page); + workingset_activation(page); +@@ -467,15 +467,14 @@ void lru_cache_add(struct page *page) + EXPORT_SYMBOL(lru_cache_add); + + /** +- * lru_cache_add_inactive_or_unevictable ++ * lru_cache_add_page_vma + * @page: the page to be added to LRU + * @vma: vma in which page is mapped for determining reclaimability + * +- * Place @page on the inactive or unevictable LRU list, depending on its +- * evictability. ++ * Place @page on an LRU list, depending on its evictability. + */ +-void lru_cache_add_inactive_or_unevictable(struct page *page, +- struct vm_area_struct *vma) ++void lru_cache_add_page_vma(struct page *page, struct vm_area_struct *vma, ++ bool faulting) + { + bool unevictable; + +@@ -492,6 +491,11 @@ void lru_cache_add_inactive_or_unevictable(struct page *page, + __mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages); + count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages); + } ++ ++ /* tell the multigenerational lru that the page is being faulted in */ ++ if (lru_gen_enabled() && !unevictable && faulting) ++ SetPageActive(page); ++ + lru_cache_add(page); + } + +@@ -518,7 +522,7 @@ void lru_cache_add_inactive_or_unevictable(struct page *page, + */ + static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec) + { +- bool active = PageActive(page); ++ bool active = page_is_active(page, lruvec); + int nr_pages = thp_nr_pages(page); + + if (PageUnevictable(page)) +@@ -558,7 +562,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec) + + static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec) + { +- if (PageActive(page) && !PageUnevictable(page)) { ++ if (!PageUnevictable(page) && page_is_active(page, lruvec)) { + int nr_pages = thp_nr_pages(page); + + del_page_from_lru_list(page, lruvec); +@@ -672,7 +676,7 @@ void deactivate_file_page(struct page *page) + */ + void deactivate_page(struct page *page) + { +- if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { ++ if (PageLRU(page) && !PageUnevictable(page) && page_is_active(page, NULL)) { + struct pagevec *pvec; + + local_lock(&lru_pvecs.lock); +diff --git a/mm/swapfile.c b/mm/swapfile.c +index c6041d10a73a..ab3b5ca404fd 100644 +--- a/mm/swapfile.c ++++ b/mm/swapfile.c +@@ -1936,7 +1936,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, + page_add_anon_rmap(page, vma, addr, false); + } else { /* ksm created a completely new copy */ + page_add_new_anon_rmap(page, vma, addr, false); +- lru_cache_add_inactive_or_unevictable(page, vma); ++ lru_cache_add_page_vma(page, vma, false); + } + swap_free(entry); + out: +diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c +index 9a3d451402d7..e1d4cd3103b8 100644 +--- a/mm/userfaultfd.c ++++ b/mm/userfaultfd.c +@@ -123,7 +123,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, + + inc_mm_counter(dst_mm, MM_ANONPAGES); + page_add_new_anon_rmap(page, dst_vma, dst_addr, false); +- lru_cache_add_inactive_or_unevictable(page, dst_vma); ++ lru_cache_add_page_vma(page, dst_vma, true); + + set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte); + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 8559bb94d452..c74ebe2039f7 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -898,9 +898,11 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, + + if (PageSwapCache(page)) { + swp_entry_t swap = { .val = page_private(page) }; +- mem_cgroup_swapout(page, swap); ++ ++ /* get a shadow entry before page_memcg() is cleared */ + if (reclaimed && !mapping_exiting(mapping)) + shadow = workingset_eviction(page, target_memcg); ++ mem_cgroup_swapout(page, swap); + __delete_from_swap_cache(page, swap, shadow); + xa_unlock_irqrestore(&mapping->i_pages, flags); + put_swap_page(page, swap); +@@ -4375,6 +4377,93 @@ static bool __maybe_unused seq_is_valid(struct lruvec *lruvec) + get_nr_gens(lruvec, 1) <= MAX_NR_GENS; + } + ++/****************************************************************************** ++ * refault feedback loop ++ ******************************************************************************/ ++ ++/* ++ * A feedback loop modeled after the PID controller. Currently supports the ++ * proportional (P) and the integral (I) terms; the derivative (D) term can be ++ * added if necessary. The setpoint (SP) is the desired position; the process ++ * variable (PV) is the measured position. The error is the difference between ++ * the SP and the PV. A positive error results in a positive control output ++ * correction, which, in our case, is to allow eviction. ++ * ++ * The P term is the current refault rate refaulted/(evicted+activated), which ++ * has a weight of 1. The I term is the arithmetic mean of the last N refault ++ * rates, weighted by geometric series 1/2, 1/4, ..., 1/(1<evictable; ++ int sid = sid_from_seq_or_gen(lrugen->min_seq[file]); ++ ++ pos->refaulted = lrugen->avg_refaulted[file][tier] + ++ atomic_long_read(&lrugen->refaulted[sid][file][tier]); ++ pos->total = lrugen->avg_total[file][tier] + ++ atomic_long_read(&lrugen->evicted[sid][file][tier]); ++ if (tier) ++ pos->total += lrugen->activated[sid][file][tier - 1]; ++ pos->gain = gain; ++} ++ ++static void reset_controller_pos(struct lruvec *lruvec, int gen, int file) ++{ ++ int tier; ++ int sid = sid_from_seq_or_gen(gen); ++ struct lrugen *lrugen = &lruvec->evictable; ++ bool carryover = gen == lru_gen_from_seq(lrugen->min_seq[file]); ++ ++ if (!carryover && NR_STAT_GENS == 1) ++ return; ++ ++ for (tier = 0; tier < MAX_NR_TIERS; tier++) { ++ if (carryover) { ++ unsigned long sum; ++ ++ sum = lrugen->avg_refaulted[file][tier] + ++ atomic_long_read(&lrugen->refaulted[sid][file][tier]); ++ WRITE_ONCE(lrugen->avg_refaulted[file][tier], sum >> 1); ++ ++ sum = lrugen->avg_total[file][tier] + ++ atomic_long_read(&lrugen->evicted[sid][file][tier]); ++ if (tier) ++ sum += lrugen->activated[sid][file][tier - 1]; ++ WRITE_ONCE(lrugen->avg_total[file][tier], sum >> 1); ++ ++ if (NR_STAT_GENS > 1) ++ continue; ++ } ++ ++ atomic_long_set(&lrugen->refaulted[sid][file][tier], 0); ++ atomic_long_set(&lrugen->evicted[sid][file][tier], 0); ++ if (tier) ++ WRITE_ONCE(lrugen->activated[sid][file][tier - 1], 0); ++ } ++} ++ ++static bool positive_ctrl_err(struct controller_pos *sp, struct controller_pos *pv) ++{ ++ /* ++ * Allow eviction if the PV has a limited number of refaulted pages or a ++ * lower refault rate than the SP. ++ */ ++ return pv->refaulted < SWAP_CLUSTER_MAX || ++ pv->refaulted * max(sp->total, 1UL) * sp->gain <= ++ sp->refaulted * max(pv->total, 1UL) * pv->gain; ++} ++ + /****************************************************************************** + * state change + ******************************************************************************/ +diff --git a/mm/workingset.c b/mm/workingset.c +index cd39902c1062..df363f9419fc 100644 +--- a/mm/workingset.c ++++ b/mm/workingset.c +@@ -168,9 +168,9 @@ + * refault distance will immediately activate the refaulting page. + */ + +-#define EVICTION_SHIFT ((BITS_PER_LONG - BITS_PER_XA_VALUE) + \ +- 1 + NODES_SHIFT + MEM_CGROUP_ID_SHIFT) +-#define EVICTION_MASK (~0UL >> EVICTION_SHIFT) ++#define EVICTION_SHIFT (BITS_PER_XA_VALUE - MEM_CGROUP_ID_SHIFT - NODES_SHIFT) ++#define EVICTION_MASK (BIT(EVICTION_SHIFT) - 1) ++#define WORKINGSET_WIDTH 1 + + /* + * Eviction timestamps need to be able to cover the full range of +@@ -182,38 +182,139 @@ + */ + static unsigned int bucket_order __read_mostly; + +-static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction, +- bool workingset) ++static void *pack_shadow(int memcg_id, struct pglist_data *pgdat, unsigned long val) + { +- eviction >>= bucket_order; +- eviction &= EVICTION_MASK; +- eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; +- eviction = (eviction << NODES_SHIFT) | pgdat->node_id; +- eviction = (eviction << 1) | workingset; ++ val = (val << MEM_CGROUP_ID_SHIFT) | memcg_id; ++ val = (val << NODES_SHIFT) | pgdat->node_id; + +- return xa_mk_value(eviction); ++ return xa_mk_value(val); + } + +-static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, +- unsigned long *evictionp, bool *workingsetp) ++static unsigned long unpack_shadow(void *shadow, int *memcg_id, struct pglist_data **pgdat) + { +- unsigned long entry = xa_to_value(shadow); +- int memcgid, nid; +- bool workingset; +- +- workingset = entry & 1; +- entry >>= 1; +- nid = entry & ((1UL << NODES_SHIFT) - 1); +- entry >>= NODES_SHIFT; +- memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1); +- entry >>= MEM_CGROUP_ID_SHIFT; +- +- *memcgidp = memcgid; +- *pgdat = NODE_DATA(nid); +- *evictionp = entry << bucket_order; +- *workingsetp = workingset; ++ unsigned long val = xa_to_value(shadow); ++ ++ *pgdat = NODE_DATA(val & (BIT(NODES_SHIFT) - 1)); ++ val >>= NODES_SHIFT; ++ *memcg_id = val & (BIT(MEM_CGROUP_ID_SHIFT) - 1); ++ ++ return val >> MEM_CGROUP_ID_SHIFT; ++} ++ ++#ifdef CONFIG_LRU_GEN ++ ++#if LRU_GEN_SHIFT + LRU_USAGE_SHIFT >= EVICTION_SHIFT ++#error "Please try smaller NODES_SHIFT, NR_LRU_GENS and TIERS_PER_GEN configurations" ++#endif ++ ++static void page_set_usage(struct page *page, int usage) ++{ ++ unsigned long old_flags, new_flags; ++ ++ VM_BUG_ON(usage > BIT(LRU_USAGE_WIDTH)); ++ ++ if (!usage) ++ return; ++ ++ do { ++ old_flags = READ_ONCE(page->flags); ++ new_flags = (old_flags & ~LRU_USAGE_MASK) | LRU_TIER_FLAGS | ++ ((usage - 1UL) << LRU_USAGE_PGOFF); ++ if (old_flags == new_flags) ++ break; ++ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); ++} ++ ++/* Return a token to be stored in the shadow entry of a page being evicted. */ ++static void *lru_gen_eviction(struct page *page) ++{ ++ int sid, tier; ++ unsigned long token; ++ unsigned long min_seq; ++ struct lruvec *lruvec; ++ struct lrugen *lrugen; ++ int file = page_is_file_lru(page); ++ int usage = page_tier_usage(page); ++ struct mem_cgroup *memcg = page_memcg(page); ++ struct pglist_data *pgdat = page_pgdat(page); ++ ++ if (!lru_gen_enabled()) ++ return NULL; ++ ++ lruvec = mem_cgroup_lruvec(memcg, pgdat); ++ lrugen = &lruvec->evictable; ++ min_seq = READ_ONCE(lrugen->min_seq[file]); ++ token = (min_seq << LRU_USAGE_SHIFT) | usage; ++ ++ sid = sid_from_seq_or_gen(min_seq); ++ tier = lru_tier_from_usage(usage); ++ atomic_long_add(thp_nr_pages(page), &lrugen->evicted[sid][file][tier]); ++ ++ return pack_shadow(mem_cgroup_id(memcg), pgdat, token); ++} ++ ++/* Account a refaulted page based on the token stored in its shadow entry. */ ++static bool lru_gen_refault(struct page *page, void *shadow) ++{ ++ int sid, tier, usage; ++ int memcg_id; ++ unsigned long token; ++ unsigned long min_seq; ++ struct lruvec *lruvec; ++ struct lrugen *lrugen; ++ struct pglist_data *pgdat; ++ struct mem_cgroup *memcg; ++ int file = page_is_file_lru(page); ++ ++ if (!lru_gen_enabled()) ++ return false; ++ ++ token = unpack_shadow(shadow, &memcg_id, &pgdat); ++ if (page_pgdat(page) != pgdat) ++ return true; ++ ++ rcu_read_lock(); ++ memcg = page_memcg_rcu(page); ++ if (mem_cgroup_id(memcg) != memcg_id) ++ goto unlock; ++ ++ usage = token & (BIT(LRU_USAGE_SHIFT) - 1); ++ token >>= LRU_USAGE_SHIFT; ++ ++ lruvec = mem_cgroup_lruvec(memcg, pgdat); ++ lrugen = &lruvec->evictable; ++ min_seq = READ_ONCE(lrugen->min_seq[file]); ++ if (token != (min_seq & (EVICTION_MASK >> LRU_USAGE_SHIFT))) ++ goto unlock; ++ ++ page_set_usage(page, usage); ++ ++ sid = sid_from_seq_or_gen(min_seq); ++ tier = lru_tier_from_usage(usage); ++ atomic_long_add(thp_nr_pages(page), &lrugen->refaulted[sid][file][tier]); ++ inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file); ++ if (tier) ++ inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file); ++unlock: ++ rcu_read_unlock(); ++ ++ return true; ++} ++ ++#else /* CONFIG_LRU_GEN */ ++ ++static void *lru_gen_eviction(struct page *page) ++{ ++ return NULL; + } + ++static bool lru_gen_refault(struct page *page, void *shadow) ++{ ++ return false; ++} ++ ++#endif /* CONFIG_LRU_GEN */ ++ + /** + * workingset_age_nonresident - age non-resident entries as LRU ages + * @lruvec: the lruvec that was aged +@@ -256,18 +357,25 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) + unsigned long eviction; + struct lruvec *lruvec; + int memcgid; ++ void *shadow; + + /* Page is fully exclusive and pins page's memory cgroup pointer */ + VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_PAGE(page_count(page), page); + VM_BUG_ON_PAGE(!PageLocked(page), page); + ++ shadow = lru_gen_eviction(page); ++ if (shadow) ++ return shadow; ++ + lruvec = mem_cgroup_lruvec(target_memcg, pgdat); + /* XXX: target_memcg can be NULL, go through lruvec */ + memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); + eviction = atomic_long_read(&lruvec->nonresident_age); ++ eviction >>= bucket_order; ++ eviction = (eviction << WORKINGSET_WIDTH) | PageWorkingset(page); + workingset_age_nonresident(lruvec, thp_nr_pages(page)); +- return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page)); ++ return pack_shadow(memcgid, pgdat, eviction); + } + + /** +@@ -294,7 +402,10 @@ void workingset_refault(struct page *page, void *shadow) + bool workingset; + int memcgid; + +- unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset); ++ if (lru_gen_refault(page, shadow)) ++ return; ++ ++ eviction = unpack_shadow(shadow, &memcgid, &pgdat); + + rcu_read_lock(); + /* +@@ -318,6 +429,8 @@ void workingset_refault(struct page *page, void *shadow) + goto out; + eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat); + refault = atomic_long_read(&eviction_lruvec->nonresident_age); ++ workingset = eviction & (BIT(WORKINGSET_WIDTH) - 1); ++ eviction = (eviction >> WORKINGSET_WIDTH) << bucket_order; + + /* + * Calculate the refault distance +@@ -335,7 +448,7 @@ void workingset_refault(struct page *page, void *shadow) + * longest time, so the occasional inappropriate activation + * leading to pressure on the active list is not a problem. + */ +- refault_distance = (refault - eviction) & EVICTION_MASK; ++ refault_distance = (refault - eviction) & (EVICTION_MASK >> WORKINGSET_WIDTH); + + /* + * The activation decision for this page is made at the level +@@ -594,7 +707,7 @@ static int __init workingset_init(void) + unsigned int max_order; + int ret; + +- BUILD_BUG_ON(BITS_PER_LONG < EVICTION_SHIFT); ++ BUILD_BUG_ON(EVICTION_SHIFT < WORKINGSET_WIDTH); + /* + * Calculate the eviction bucket size to cover the longest + * actionable refault distance, which is currently half of +@@ -602,7 +715,7 @@ static int __init workingset_init(void) + * some more pages at runtime, so keep working with up to + * double the initial memory by using totalram_pages as-is. + */ +- timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT; ++ timestamp_bits = EVICTION_SHIFT - WORKINGSET_WIDTH; + max_order = fls_long(totalram_pages() - 1); + if (max_order > timestamp_bits) + bucket_order = max_order - timestamp_bits; +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-10-16-mm-multigenerational-lru-mm_struct-list.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-10-16-mm-multigenerational-lru-mm_struct-list.patch new file mode 100644 index 0000000..8b8974b --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-10-16-mm-multigenerational-lru-mm_struct-list.patch @@ -0,0 +1,814 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id 584E2C433B4 + for ; Tue, 13 Apr 2021 06:57:24 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 364F560FDB + for ; Tue, 13 Apr 2021 06:57:24 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S1345163AbhDMG5l (ORCPT + ); + Tue, 13 Apr 2021 02:57:41 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44208 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S1345079AbhDMG5P (ORCPT + ); + Tue, 13 Apr 2021 02:57:15 -0400 +Received: from mail-qt1-x849.google.com (mail-qt1-x849.google.com [IPv6:2607:f8b0:4864:20::849]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 33EA5C061574 + for ; Mon, 12 Apr 2021 23:56:56 -0700 (PDT) +Received: by mail-qt1-x849.google.com with SMTP id o15so661346qtq.20 + for ; Mon, 12 Apr 2021 23:56:56 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=cu41ToFIF+otWxvIfaf0+qpOwdOuOIiyHS34SY2MKlA=; + b=DVE8DNOLCIkGnchiTSJf1aDqFVGLrvEGecUeUN0sDIHBw/EmgoB7xYiwrDwlmTJzfB + 7mJ9wgXcC3xTW/xg8bwqYmzHvC/L4X4KSoDnIWPKnc562ObAH2IGWhiD3korjYqggzne + pjoL+Xglz7D6A6bOmM8M5cZKQhXRisrB5aDyIVUvRJmQLTWP2WB2n4JPqTvP/wVMQ9Sn + hXTZFKELKJbKA+BHU0pwjNA7cFy1nW2rJ9X9d+VP21+ThijMrCLuken/5O6OvPkUefZl + sakH+0tV7Yy/fR7EVGJoWcpUjUiGxd6+0AUNvryVNuijwkPETOtPNH6UfyfgZ6xdkl9P + OYsw== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=cu41ToFIF+otWxvIfaf0+qpOwdOuOIiyHS34SY2MKlA=; + b=Hv9g3zJZwz81XIFNxdDjfOsfsikJNtFff85YKjuCIJR3ru0Fl/o3i0TbhFzOjTUKBt + yhJLayQgM9XxSudGQ47m0Ya49B4k58xttPSNqFNA93EXYaxcUN7fG8T+ZYA0VxA96PeD + qZHRzegQrJ6SM3hYDYpBhvClDfl9zRD0Gpns+vVl2DjteDrRi+wekSzyz6MvMlGhtb/s + F1O38FNuucDx0CgK/so+BE9vzBcN8TzGAU9OaMBW6lDAhAcq+NxEl32LeO/a/P6Oz9A1 + x77ZeDzQXRkpTd7y0bgBYZWdg+h/cc09EJonEBfUTa9tDdaDfqMhPlllI6ZHFFJYrlkh + gSDw== +X-Gm-Message-State: AOAM530hiDEzMAP2in3GTJKn5AqypprG9ZgOZOECg5xoh9CUzK15XTUw + 0N5X5CtrUDDlCTAUV9QB3qMFCzKiHHg= +X-Google-Smtp-Source: ABdhPJzMmLOgNcb9fea/k5rqaH2vAtKGPRWVf2ZxGZXPr5TIM1jkpFwnMYJAYMnOr+dtOuXM8dcYCymh2hY= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a05:6214:7ed:: with SMTP id + bp13mr7059024qvb.17.1618297015323; Mon, 12 Apr 2021 23:56:55 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:27 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-11-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 10/16] mm: multigenerational lru: mm_struct list +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +In order to scan page tables, we add an infrastructure to maintain +either a system-wide mm_struct list or per-memcg mm_struct lists. +Multiple threads can concurrently work on the same mm_struct list, and +each of them will be given a different mm_struct. + +This infrastructure also tracks whether an mm_struct is being used on +any CPUs or has been used since the last time a worker looked at it. +In other words, workers will not be given an mm_struct that belongs to +a process that has been sleeping. + +Signed-off-by: Yu Zhao +--- + fs/exec.c | 2 + + include/linux/memcontrol.h | 6 + + include/linux/mm_types.h | 117 ++++++++++++++ + include/linux/mmzone.h | 2 - + kernel/exit.c | 1 + + kernel/fork.c | 10 ++ + kernel/kthread.c | 1 + + kernel/sched/core.c | 2 + + mm/memcontrol.c | 28 ++++ + mm/vmscan.c | 316 +++++++++++++++++++++++++++++++++++++ + 10 files changed, 483 insertions(+), 2 deletions(-) + +diff --git a/fs/exec.c b/fs/exec.c +index 18594f11c31f..c691d4d7720c 100644 +--- a/fs/exec.c ++++ b/fs/exec.c +@@ -1008,6 +1008,7 @@ static int exec_mmap(struct mm_struct *mm) + active_mm = tsk->active_mm; + tsk->active_mm = mm; + tsk->mm = mm; ++ lru_gen_add_mm(mm); + /* + * This prevents preemption while active_mm is being loaded and + * it and mm are being updated, which could cause problems for +@@ -1018,6 +1019,7 @@ static int exec_mmap(struct mm_struct *mm) + if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); + activate_mm(active_mm, mm); ++ lru_gen_switch_mm(active_mm, mm); + if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) + local_irq_enable(); + tsk->mm->vmacache_seqnum = 0; +diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h +index f13dc02cf277..cff95ed1ee2b 100644 +--- a/include/linux/memcontrol.h ++++ b/include/linux/memcontrol.h +@@ -212,6 +212,8 @@ struct obj_cgroup { + }; + }; + ++struct lru_gen_mm_list; ++ + /* + * The memory controller data structure. The memory controller controls both + * page cache and RSS per cgroup. We would eventually like to provide +@@ -335,6 +337,10 @@ struct mem_cgroup { + struct deferred_split deferred_split_queue; + #endif + ++#ifdef CONFIG_LRU_GEN ++ struct lru_gen_mm_list *mm_list; ++#endif ++ + struct mem_cgroup_per_node *nodeinfo[0]; + /* WARNING: nodeinfo must be the last member here */ + }; +diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h +index 6613b26a8894..f8a239fbb958 100644 +--- a/include/linux/mm_types.h ++++ b/include/linux/mm_types.h +@@ -15,6 +15,8 @@ + #include + #include + #include ++#include ++#include + + #include + +@@ -383,6 +385,8 @@ struct core_state { + struct completion startup; + }; + ++#define ANON_AND_FILE 2 ++ + struct kioctx_table; + struct mm_struct { + struct { +@@ -561,6 +565,22 @@ struct mm_struct { + + #ifdef CONFIG_IOMMU_SUPPORT + u32 pasid; ++#endif ++#ifdef CONFIG_LRU_GEN ++ struct { ++ /* the node of a global or per-memcg mm_struct list */ ++ struct list_head list; ++#ifdef CONFIG_MEMCG ++ /* points to memcg of the owner task above */ ++ struct mem_cgroup *memcg; ++#endif ++ /* whether this mm_struct has been used since the last walk */ ++ nodemask_t nodes[ANON_AND_FILE]; ++#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH ++ /* the number of CPUs using this mm_struct */ ++ atomic_t nr_cpus; ++#endif ++ } lrugen; + #endif + } __randomize_layout; + +@@ -588,6 +608,103 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) + return (struct cpumask *)&mm->cpu_bitmap; + } + ++#ifdef CONFIG_LRU_GEN ++ ++void lru_gen_init_mm(struct mm_struct *mm); ++void lru_gen_add_mm(struct mm_struct *mm); ++void lru_gen_del_mm(struct mm_struct *mm); ++#ifdef CONFIG_MEMCG ++int lru_gen_alloc_mm_list(struct mem_cgroup *memcg); ++void lru_gen_free_mm_list(struct mem_cgroup *memcg); ++void lru_gen_migrate_mm(struct mm_struct *mm); ++#endif ++ ++/* ++ * Track the usage so mm_struct's that haven't been used since the last walk can ++ * be skipped. This function adds a theoretical overhead to each context switch, ++ * which hasn't been measurable. ++ */ ++static inline void lru_gen_switch_mm(struct mm_struct *old, struct mm_struct *new) ++{ ++ int file; ++ ++ /* exclude init_mm, efi_mm, etc. */ ++ if (!core_kernel_data((unsigned long)old)) { ++ VM_BUG_ON(old == &init_mm); ++ ++ for (file = 0; file < ANON_AND_FILE; file++) ++ nodes_setall(old->lrugen.nodes[file]); ++ ++#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH ++ atomic_dec(&old->lrugen.nr_cpus); ++ VM_BUG_ON_MM(atomic_read(&old->lrugen.nr_cpus) < 0, old); ++#endif ++ } else ++ VM_BUG_ON_MM(READ_ONCE(old->lrugen.list.prev) || ++ READ_ONCE(old->lrugen.list.next), old); ++ ++ if (!core_kernel_data((unsigned long)new)) { ++ VM_BUG_ON(new == &init_mm); ++ ++#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH ++ atomic_inc(&new->lrugen.nr_cpus); ++ VM_BUG_ON_MM(atomic_read(&new->lrugen.nr_cpus) < 0, new); ++#endif ++ } else ++ VM_BUG_ON_MM(READ_ONCE(new->lrugen.list.prev) || ++ READ_ONCE(new->lrugen.list.next), new); ++} ++ ++/* Return whether this mm_struct is being used on any CPUs. */ ++static inline bool lru_gen_mm_is_active(struct mm_struct *mm) ++{ ++#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH ++ return !cpumask_empty(mm_cpumask(mm)); ++#else ++ return atomic_read(&mm->lrugen.nr_cpus); ++#endif ++} ++ ++#else /* CONFIG_LRU_GEN */ ++ ++static inline void lru_gen_init_mm(struct mm_struct *mm) ++{ ++} ++ ++static inline void lru_gen_add_mm(struct mm_struct *mm) ++{ ++} ++ ++static inline void lru_gen_del_mm(struct mm_struct *mm) ++{ ++} ++ ++#ifdef CONFIG_MEMCG ++static inline int lru_gen_alloc_mm_list(struct mem_cgroup *memcg) ++{ ++ return 0; ++} ++ ++static inline void lru_gen_free_mm_list(struct mem_cgroup *memcg) ++{ ++} ++ ++static inline void lru_gen_migrate_mm(struct mm_struct *mm) ++{ ++} ++#endif ++ ++static inline void lru_gen_switch_mm(struct mm_struct *old, struct mm_struct *new) ++{ ++} ++ ++static inline bool lru_gen_mm_is_active(struct mm_struct *mm) ++{ ++ return false; ++} ++ ++#endif /* CONFIG_LRU_GEN */ ++ + struct mmu_gather; + extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); + extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index a60c7498afd7..dcfadf6a8c07 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -285,8 +285,6 @@ static inline bool is_active_lru(enum lru_list lru) + return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); + } + +-#define ANON_AND_FILE 2 +- + enum lruvec_flags { + LRUVEC_CONGESTED, /* lruvec has many dirty pages + * backed by a congested BDI +diff --git a/kernel/exit.c b/kernel/exit.c +index 04029e35e69a..e4292717ce37 100644 +--- a/kernel/exit.c ++++ b/kernel/exit.c +@@ -422,6 +422,7 @@ void mm_update_next_owner(struct mm_struct *mm) + goto retry; + } + WRITE_ONCE(mm->owner, c); ++ lru_gen_migrate_mm(mm); + task_unlock(c); + put_task_struct(c); + } +diff --git a/kernel/fork.c b/kernel/fork.c +index 426cd0c51f9e..dfa84200229f 100644 +--- a/kernel/fork.c ++++ b/kernel/fork.c +@@ -665,6 +665,7 @@ static void check_mm(struct mm_struct *mm) + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS + VM_BUG_ON_MM(mm->pmd_huge_pte, mm); + #endif ++ VM_BUG_ON_MM(lru_gen_mm_is_active(mm), mm); + } + + #define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL)) +@@ -1055,6 +1056,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, + goto fail_nocontext; + + mm->user_ns = get_user_ns(user_ns); ++ lru_gen_init_mm(mm); + return mm; + + fail_nocontext: +@@ -1097,6 +1099,7 @@ static inline void __mmput(struct mm_struct *mm) + } + if (mm->binfmt) + module_put(mm->binfmt->module); ++ lru_gen_del_mm(mm); + mmdrop(mm); + } + +@@ -2521,6 +2524,13 @@ pid_t kernel_clone(struct kernel_clone_args *args) + get_task_struct(p); + } + ++ if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) { ++ /* lock the task to synchronize with memcg migration */ ++ task_lock(p); ++ lru_gen_add_mm(p->mm); ++ task_unlock(p); ++ } ++ + wake_up_new_task(p); + + /* forking complete and child started to run, tell ptracer */ +diff --git a/kernel/kthread.c b/kernel/kthread.c +index 1578973c5740..8da7767bb06a 100644 +--- a/kernel/kthread.c ++++ b/kernel/kthread.c +@@ -1303,6 +1303,7 @@ void kthread_use_mm(struct mm_struct *mm) + tsk->mm = mm; + membarrier_update_current_mm(mm); + switch_mm_irqs_off(active_mm, mm, tsk); ++ lru_gen_switch_mm(active_mm, mm); + local_irq_enable(); + task_unlock(tsk); + #ifdef finish_arch_post_lock_switch +diff --git a/kernel/sched/core.c b/kernel/sched/core.c +index 98191218d891..bd626dbdb816 100644 +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -4306,6 +4306,7 @@ context_switch(struct rq *rq, struct task_struct *prev, + * finish_task_switch()'s mmdrop(). + */ + switch_mm_irqs_off(prev->active_mm, next->mm, next); ++ lru_gen_switch_mm(prev->active_mm, next->mm); + + if (!prev->mm) { // from kernel + /* will mmdrop() in finish_task_switch(). */ +@@ -7597,6 +7598,7 @@ void idle_task_exit(void) + + if (mm != &init_mm) { + switch_mm(mm, &init_mm, current); ++ lru_gen_switch_mm(mm, &init_mm); + finish_arch_post_lock_switch(); + } + +diff --git a/mm/memcontrol.c b/mm/memcontrol.c +index e064ac0d850a..496e91e813af 100644 +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -5206,6 +5206,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) + free_mem_cgroup_per_node_info(memcg, node); + free_percpu(memcg->vmstats_percpu); + free_percpu(memcg->vmstats_local); ++ lru_gen_free_mm_list(memcg); + kfree(memcg); + } + +@@ -5258,6 +5259,9 @@ static struct mem_cgroup *mem_cgroup_alloc(void) + if (alloc_mem_cgroup_per_node_info(memcg, node)) + goto fail; + ++ if (lru_gen_alloc_mm_list(memcg)) ++ goto fail; ++ + if (memcg_wb_domain_init(memcg, GFP_KERNEL)) + goto fail; + +@@ -6162,6 +6166,29 @@ static void mem_cgroup_move_task(void) + } + #endif + ++#ifdef CONFIG_LRU_GEN ++static void mem_cgroup_attach(struct cgroup_taskset *tset) ++{ ++ struct cgroup_subsys_state *css; ++ struct task_struct *task = NULL; ++ ++ cgroup_taskset_for_each_leader(task, css, tset) ++ ; ++ ++ if (!task) ++ return; ++ ++ task_lock(task); ++ if (task->mm && task->mm->owner == task) ++ lru_gen_migrate_mm(task->mm); ++ task_unlock(task); ++} ++#else ++static void mem_cgroup_attach(struct cgroup_taskset *tset) ++{ ++} ++#endif ++ + static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value) + { + if (value == PAGE_COUNTER_MAX) +@@ -6502,6 +6529,7 @@ struct cgroup_subsys memory_cgrp_subsys = { + .css_free = mem_cgroup_css_free, + .css_reset = mem_cgroup_css_reset, + .can_attach = mem_cgroup_can_attach, ++ .attach = mem_cgroup_attach, + .cancel_attach = mem_cgroup_cancel_attach, + .post_attach = mem_cgroup_move_task, + .dfl_cftypes = memory_files, +diff --git a/mm/vmscan.c b/mm/vmscan.c +index c74ebe2039f7..d67dfd1e3930 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -4464,6 +4464,313 @@ static bool positive_ctrl_err(struct controller_pos *sp, struct controller_pos * + sp->refaulted * max(pv->total, 1UL) * pv->gain; + } + ++/****************************************************************************** ++ * mm_struct list ++ ******************************************************************************/ ++ ++enum { ++ MM_SCHED_ACTIVE, /* running processes */ ++ MM_SCHED_INACTIVE, /* sleeping processes */ ++ MM_LOCK_CONTENTION, /* lock contentions */ ++ MM_VMA_INTERVAL, /* VMAs within the range of current table */ ++ MM_LEAF_OTHER_NODE, /* entries not from node under reclaim */ ++ MM_LEAF_OTHER_MEMCG, /* entries not from memcg under reclaim */ ++ MM_LEAF_OLD, /* old entries */ ++ MM_LEAF_YOUNG, /* young entries */ ++ MM_LEAF_DIRTY, /* dirty entries */ ++ MM_LEAF_HOLE, /* non-present entries */ ++ MM_NONLEAF_OLD, /* old non-leaf pmd entries */ ++ MM_NONLEAF_YOUNG, /* young non-leaf pmd entries */ ++ NR_MM_STATS ++}; ++ ++/* mnemonic codes for the stats above */ ++#define MM_STAT_CODES "aicvnmoydhlu" ++ ++struct lru_gen_mm_list { ++ /* the head of a global or per-memcg mm_struct list */ ++ struct list_head head; ++ /* protects the list */ ++ spinlock_t lock; ++ struct { ++ /* set to max_seq after each round of walk */ ++ unsigned long cur_seq; ++ /* the next mm on the list to walk */ ++ struct list_head *iter; ++ /* to wait for the last worker to finish */ ++ struct wait_queue_head wait; ++ /* the number of concurrent workers */ ++ int nr_workers; ++ /* stats for debugging */ ++ unsigned long stats[NR_STAT_GENS][NR_MM_STATS]; ++ } nodes[0]; ++}; ++ ++static struct lru_gen_mm_list *global_mm_list; ++ ++static struct lru_gen_mm_list *alloc_mm_list(void) ++{ ++ int nid; ++ struct lru_gen_mm_list *mm_list; ++ ++ mm_list = kzalloc(struct_size(mm_list, nodes, nr_node_ids), GFP_KERNEL); ++ if (!mm_list) ++ return NULL; ++ ++ INIT_LIST_HEAD(&mm_list->head); ++ spin_lock_init(&mm_list->lock); ++ ++ for_each_node(nid) { ++ mm_list->nodes[nid].cur_seq = MIN_NR_GENS; ++ mm_list->nodes[nid].iter = &mm_list->head; ++ init_waitqueue_head(&mm_list->nodes[nid].wait); ++ } ++ ++ return mm_list; ++} ++ ++static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg) ++{ ++#ifdef CONFIG_MEMCG ++ if (!mem_cgroup_disabled()) ++ return memcg ? memcg->mm_list : root_mem_cgroup->mm_list; ++#endif ++ VM_BUG_ON(memcg); ++ ++ return global_mm_list; ++} ++ ++void lru_gen_init_mm(struct mm_struct *mm) ++{ ++ int file; ++ ++ INIT_LIST_HEAD(&mm->lrugen.list); ++#ifdef CONFIG_MEMCG ++ mm->lrugen.memcg = NULL; ++#endif ++#ifndef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH ++ atomic_set(&mm->lrugen.nr_cpus, 0); ++#endif ++ for (file = 0; file < ANON_AND_FILE; file++) ++ nodes_clear(mm->lrugen.nodes[file]); ++} ++ ++void lru_gen_add_mm(struct mm_struct *mm) ++{ ++ struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm); ++ struct lru_gen_mm_list *mm_list = get_mm_list(memcg); ++ ++ VM_BUG_ON_MM(!list_empty(&mm->lrugen.list), mm); ++#ifdef CONFIG_MEMCG ++ VM_BUG_ON_MM(mm->lrugen.memcg, mm); ++ WRITE_ONCE(mm->lrugen.memcg, memcg); ++#endif ++ spin_lock(&mm_list->lock); ++ list_add_tail(&mm->lrugen.list, &mm_list->head); ++ spin_unlock(&mm_list->lock); ++} ++ ++void lru_gen_del_mm(struct mm_struct *mm) ++{ ++ int nid; ++#ifdef CONFIG_MEMCG ++ struct lru_gen_mm_list *mm_list = get_mm_list(mm->lrugen.memcg); ++#else ++ struct lru_gen_mm_list *mm_list = get_mm_list(NULL); ++#endif ++ ++ spin_lock(&mm_list->lock); ++ ++ for_each_node(nid) { ++ if (mm_list->nodes[nid].iter != &mm->lrugen.list) ++ continue; ++ ++ mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next; ++ if (mm_list->nodes[nid].iter == &mm_list->head) ++ WRITE_ONCE(mm_list->nodes[nid].cur_seq, ++ mm_list->nodes[nid].cur_seq + 1); ++ } ++ ++ list_del_init(&mm->lrugen.list); ++ ++ spin_unlock(&mm_list->lock); ++ ++#ifdef CONFIG_MEMCG ++ mem_cgroup_put(mm->lrugen.memcg); ++ WRITE_ONCE(mm->lrugen.memcg, NULL); ++#endif ++} ++ ++#ifdef CONFIG_MEMCG ++int lru_gen_alloc_mm_list(struct mem_cgroup *memcg) ++{ ++ if (mem_cgroup_disabled()) ++ return 0; ++ ++ memcg->mm_list = alloc_mm_list(); ++ ++ return memcg->mm_list ? 0 : -ENOMEM; ++} ++ ++void lru_gen_free_mm_list(struct mem_cgroup *memcg) ++{ ++ kfree(memcg->mm_list); ++ memcg->mm_list = NULL; ++} ++ ++void lru_gen_migrate_mm(struct mm_struct *mm) ++{ ++ struct mem_cgroup *memcg; ++ ++ lockdep_assert_held(&mm->owner->alloc_lock); ++ ++ if (mem_cgroup_disabled()) ++ return; ++ ++ rcu_read_lock(); ++ memcg = mem_cgroup_from_task(mm->owner); ++ rcu_read_unlock(); ++ if (memcg == mm->lrugen.memcg) ++ return; ++ ++ VM_BUG_ON_MM(!mm->lrugen.memcg, mm); ++ VM_BUG_ON_MM(list_empty(&mm->lrugen.list), mm); ++ ++ lru_gen_del_mm(mm); ++ lru_gen_add_mm(mm); ++} ++ ++static bool mm_has_migrated(struct mm_struct *mm, struct mem_cgroup *memcg) ++{ ++ return READ_ONCE(mm->lrugen.memcg) != memcg; ++} ++#else ++static bool mm_has_migrated(struct mm_struct *mm, struct mem_cgroup *memcg) ++{ ++ return false; ++} ++#endif ++ ++struct mm_walk_args { ++ struct mem_cgroup *memcg; ++ unsigned long max_seq; ++ unsigned long next_addr; ++ unsigned long start_pfn; ++ unsigned long end_pfn; ++ int node_id; ++ int batch_size; ++ int mm_stats[NR_MM_STATS]; ++ int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; ++ bool should_walk[ANON_AND_FILE]; ++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG) ++ unsigned long bitmap[BITS_TO_LONGS(PTRS_PER_PMD)]; ++#endif ++}; ++ ++static void reset_mm_stats(struct lru_gen_mm_list *mm_list, bool last, ++ struct mm_walk_args *args) ++{ ++ int i; ++ int nid = args->node_id; ++ int sid = sid_from_seq_or_gen(args->max_seq); ++ ++ lockdep_assert_held(&mm_list->lock); ++ ++ for (i = 0; i < NR_MM_STATS; i++) { ++ WRITE_ONCE(mm_list->nodes[nid].stats[sid][i], ++ mm_list->nodes[nid].stats[sid][i] + args->mm_stats[i]); ++ args->mm_stats[i] = 0; ++ } ++ ++ if (!last || NR_STAT_GENS == 1) ++ return; ++ ++ sid = sid_from_seq_or_gen(args->max_seq + 1); ++ for (i = 0; i < NR_MM_STATS; i++) ++ WRITE_ONCE(mm_list->nodes[nid].stats[sid][i], 0); ++} ++ ++static bool should_skip_mm(struct mm_struct *mm, int nid, int swappiness) ++{ ++ int file; ++ unsigned long size = 0; ++ ++ if (mm_is_oom_victim(mm)) ++ return true; ++ ++ for (file = !swappiness; file < ANON_AND_FILE; file++) { ++ if (lru_gen_mm_is_active(mm) || node_isset(nid, mm->lrugen.nodes[file])) ++ size += file ? get_mm_counter(mm, MM_FILEPAGES) : ++ get_mm_counter(mm, MM_ANONPAGES) + ++ get_mm_counter(mm, MM_SHMEMPAGES); ++ } ++ ++ /* leave the legwork to the rmap if mapped pages are too sparse */ ++ if (size < max(SWAP_CLUSTER_MAX, mm_pgtables_bytes(mm) / PAGE_SIZE)) ++ return true; ++ ++ return !mmget_not_zero(mm); ++} ++ ++/* To support multiple workers that concurrently walk mm_struct list. */ ++static bool get_next_mm(struct mm_walk_args *args, int swappiness, struct mm_struct **iter) ++{ ++ bool last = true; ++ struct mm_struct *mm = NULL; ++ int nid = args->node_id; ++ struct lru_gen_mm_list *mm_list = get_mm_list(args->memcg); ++ ++ if (*iter) ++ mmput_async(*iter); ++ else if (args->max_seq <= READ_ONCE(mm_list->nodes[nid].cur_seq)) ++ return false; ++ ++ spin_lock(&mm_list->lock); ++ ++ VM_BUG_ON(args->max_seq > mm_list->nodes[nid].cur_seq + 1); ++ VM_BUG_ON(*iter && args->max_seq < mm_list->nodes[nid].cur_seq); ++ VM_BUG_ON(*iter && !mm_list->nodes[nid].nr_workers); ++ ++ if (args->max_seq <= mm_list->nodes[nid].cur_seq) { ++ last = *iter; ++ goto done; ++ } ++ ++ if (mm_list->nodes[nid].iter == &mm_list->head) { ++ VM_BUG_ON(*iter || mm_list->nodes[nid].nr_workers); ++ mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next; ++ } ++ ++ while (!mm && mm_list->nodes[nid].iter != &mm_list->head) { ++ mm = list_entry(mm_list->nodes[nid].iter, struct mm_struct, lrugen.list); ++ mm_list->nodes[nid].iter = mm_list->nodes[nid].iter->next; ++ if (should_skip_mm(mm, nid, swappiness)) ++ mm = NULL; ++ ++ args->mm_stats[mm ? MM_SCHED_ACTIVE : MM_SCHED_INACTIVE]++; ++ } ++ ++ if (mm_list->nodes[nid].iter == &mm_list->head) ++ WRITE_ONCE(mm_list->nodes[nid].cur_seq, ++ mm_list->nodes[nid].cur_seq + 1); ++done: ++ if (*iter && !mm) ++ mm_list->nodes[nid].nr_workers--; ++ if (!*iter && mm) ++ mm_list->nodes[nid].nr_workers++; ++ ++ last = last && !mm_list->nodes[nid].nr_workers && ++ mm_list->nodes[nid].iter == &mm_list->head; ++ ++ reset_mm_stats(mm_list, last, args); ++ ++ spin_unlock(&mm_list->lock); ++ ++ *iter = mm; ++ ++ return last; ++} ++ + /****************************************************************************** + * state change + ******************************************************************************/ +@@ -4694,6 +5001,15 @@ static int __init init_lru_gen(void) + { + BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS); + BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS); ++ BUILD_BUG_ON(sizeof(MM_STAT_CODES) != NR_MM_STATS + 1); ++ ++ if (mem_cgroup_disabled()) { ++ global_mm_list = alloc_mm_list(); ++ if (!global_mm_list) { ++ pr_err("lru_gen: failed to allocate global mm_struct list\n"); ++ return -ENOMEM; ++ } ++ } + + if (hotplug_memory_notifier(lru_gen_online_mem, 0)) + pr_err("lru_gen: failed to subscribe hotplug notifications\n"); +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-11-16-mm-multigenerational-lru-aging.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-11-16-mm-multigenerational-lru-aging.patch new file mode 100644 index 0000000..2e2afbb --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-11-16-mm-multigenerational-lru-aging.patch @@ -0,0 +1,853 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id CC788C43460 + for ; Tue, 13 Apr 2021 06:57:20 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id ADF7B6128E + for ; Tue, 13 Apr 2021 06:57:20 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S1345140AbhDMG5i (ORCPT + ); + Tue, 13 Apr 2021 02:57:38 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44200 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S1345088AbhDMG5Q (ORCPT + ); + Tue, 13 Apr 2021 02:57:16 -0400 +Received: from mail-qt1-x849.google.com (mail-qt1-x849.google.com [IPv6:2607:f8b0:4864:20::849]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8F91CC061756 + for ; Mon, 12 Apr 2021 23:56:57 -0700 (PDT) +Received: by mail-qt1-x849.google.com with SMTP id t18so666548qtw.15 + for ; Mon, 12 Apr 2021 23:56:57 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=6fhJHIbNqUBjvtOegfE2MyphyVhL6hJWTXmeiM/7CYU=; + b=nCCKEcrZRzhFu47i9x+KHFgV9bpn2QVPdLNp94/tvI2vdGJLS5yFnnrPQk/ZvV+805 + oU9Y2xHhJFPVW5TfOLl+0cfdlw6G7bEAFmF1h4Uf+m4IIGVwMY+rg0tngfuV3hILEC/m + n+gQGstNi8BWz/WCQfT/CZcdFvYSUN04sTRJQZuLJPkujaFh7e8KEoTWM8Els3JqHgbc + LgYf9G3svPIdXSaGd7VPKBNPPf6gEFy/2HFBYAgJkJKvcduCSex9l6NdzI0GMRm0OYUM + C4BaQwaJZ6SJQXdHUAecfaC52R8b2Z/IZLmM44hUGJ3NGHSotvQ6lyAB8x6J2J/K2F2i + PJ9A== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=6fhJHIbNqUBjvtOegfE2MyphyVhL6hJWTXmeiM/7CYU=; + b=RKke0otlx0Z8q7yzlS4XpyZ5aovH7VEdxD07op8jejoFs5sh8CiOsB0OWYJ7WtpxIx + 5eGpQFXb9BDl7z/w8mHGGABHKc6R44O+H6hfTDY7lBM6ycMXzUSbjQvnLzA1hgsk5Qzz + dFshVj2i3XpZoeXGBCx8f9E8lOrxcWydcMYmGU5PvLhJcJh5otr+dDPYiOpTdW+v1h1F + 7zmsGOz9U6qOA3KwGKCLm44MrC1JtdV9omiuSJHBD+QfkfnIBcdeKCwgyRE44/35eufm + 6b2R7XpOsNHciIksiDnzt5wgJJ1KnlB7E7hjCN/Q77qQcVL7cnSVQBCcYQOvUHoJ8lNg + fXFA== +X-Gm-Message-State: AOAM532Oo0F4MpWnfaEOY3TDummCsibMAZArGFkZs9eTu66X+a59qfdI + ziZoz/a2u1Q+YaODOe4XEW2tOqr3t3c= +X-Google-Smtp-Source: ABdhPJwG6wdrxi/hta1GN0K/zTCsJXK0CKzWYrx4efW6qkJhGiiXfKR8fAg0J/tzxkhd2xOMwJf4T1jXgvA= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:ad4:4894:: with SMTP id bv20mr10806368qvb.34.1618297016759; + Mon, 12 Apr 2021 23:56:56 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:28 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-12-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 11/16] mm: multigenerational lru: aging +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +The aging produces young generations. Given an lruvec, the aging walks +the mm_struct list associated with this lruvec to scan page tables for +referenced pages. Upon finding one, the aging updates the generation +number of this page to max_seq. After each round of scan, the aging +increments max_seq. The aging is due when both of min_seq[2] reaches +max_seq-1, assuming both anon and file types are reclaimable. + +The aging uses the following optimizations when scanning page tables: + 1) It will not scan page tables from processes that have been + sleeping since the last scan. + 2) It will not scan PTE tables under non-leaf PMD entries that do + not have the accessed bit set, when + CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG=y. + 3) It will not zigzag between the PGD table and the same PMD or PTE + table spanning multiple VMAs. In other words, it finishes all the + VMAs with the range of the same PMD or PTE table before it returns + to the PGD table. This optimizes workloads that have large numbers + of tiny VMAs, especially when CONFIG_PGTABLE_LEVELS=5. + +Signed-off-by: Yu Zhao +--- + mm/vmscan.c | 700 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 700 insertions(+) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index d67dfd1e3930..31e1b4155677 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -50,6 +50,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -4771,6 +4772,702 @@ static bool get_next_mm(struct mm_walk_args *args, int swappiness, struct mm_str + return last; + } + ++/****************************************************************************** ++ * the aging ++ ******************************************************************************/ ++ ++static void update_batch_size(struct page *page, int old_gen, int new_gen, ++ struct mm_walk_args *args) ++{ ++ int file = page_is_file_lru(page); ++ int zone = page_zonenum(page); ++ int delta = thp_nr_pages(page); ++ ++ VM_BUG_ON(old_gen >= MAX_NR_GENS); ++ VM_BUG_ON(new_gen >= MAX_NR_GENS); ++ ++ args->batch_size++; ++ ++ args->nr_pages[old_gen][file][zone] -= delta; ++ args->nr_pages[new_gen][file][zone] += delta; ++} ++ ++static void reset_batch_size(struct lruvec *lruvec, struct mm_walk_args *args) ++{ ++ int gen, file, zone; ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ args->batch_size = 0; ++ ++ spin_lock_irq(&lruvec->lru_lock); ++ ++ for_each_gen_type_zone(gen, file, zone) { ++ enum lru_list lru = LRU_FILE * file; ++ int total = args->nr_pages[gen][file][zone]; ++ ++ if (!total) ++ continue; ++ ++ args->nr_pages[gen][file][zone] = 0; ++ WRITE_ONCE(lrugen->sizes[gen][file][zone], ++ lrugen->sizes[gen][file][zone] + total); ++ ++ if (lru_gen_is_active(lruvec, gen)) ++ lru += LRU_ACTIVE; ++ update_lru_size(lruvec, lru, zone, total); ++ } ++ ++ spin_unlock_irq(&lruvec->lru_lock); ++} ++ ++static int page_update_gen(struct page *page, int new_gen) ++{ ++ int old_gen; ++ unsigned long old_flags, new_flags; ++ ++ VM_BUG_ON(new_gen >= MAX_NR_GENS); ++ ++ do { ++ old_flags = READ_ONCE(page->flags); ++ ++ old_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; ++ if (old_gen < 0) ++ new_flags = old_flags | BIT(PG_referenced); ++ else ++ new_flags = (old_flags & ~(LRU_GEN_MASK | LRU_USAGE_MASK | ++ LRU_TIER_FLAGS)) | ((new_gen + 1UL) << LRU_GEN_PGOFF); ++ ++ if (old_flags == new_flags) ++ break; ++ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); ++ ++ return old_gen; ++} ++ ++static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *walk) ++{ ++ struct vm_area_struct *vma = walk->vma; ++ struct mm_walk_args *args = walk->private; ++ ++ if (!vma_is_accessible(vma) || is_vm_hugetlb_page(vma) || ++ (vma->vm_flags & (VM_LOCKED | VM_SPECIAL))) ++ return true; ++ ++ if (vma_is_anonymous(vma)) ++ return !args->should_walk[0]; ++ ++ if (vma_is_shmem(vma)) ++ return !args->should_walk[0] || ++ mapping_unevictable(vma->vm_file->f_mapping); ++ ++ return !args->should_walk[1] || vma_is_dax(vma) || ++ vma == get_gate_vma(vma->vm_mm) || ++ mapping_unevictable(vma->vm_file->f_mapping); ++} ++ ++/* ++ * Some userspace memory allocators create many single-page VMAs. So instead of ++ * returning back to the PGD table for each of such VMAs, we finish at least an ++ * entire PMD table and therefore avoid many zigzags. This optimizes page table ++ * walks for workloads that have large numbers of tiny VMAs. ++ * ++ * We scan PMD tables in two pass. The first pass reaches to PTE tables and ++ * doesn't take the PMD lock. The second pass clears the accessed bit on PMD ++ * entries and needs to take the PMD lock. The second pass is only done on the ++ * PMD entries that first pass has found the accessed bit is set, and they must ++ * be: ++ * 1) leaf entries mapping huge pages from the node under reclaim ++ * 2) non-leaf entries whose leaf entries only map pages from the node under ++ * reclaim, when CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG=y. ++ */ ++static bool get_next_interval(struct mm_walk *walk, unsigned long mask, unsigned long size, ++ unsigned long *start, unsigned long *end) ++{ ++ unsigned long next = round_up(*end, size); ++ struct mm_walk_args *args = walk->private; ++ ++ VM_BUG_ON(mask & size); ++ VM_BUG_ON(*start != *end); ++ VM_BUG_ON(!(*end & ~mask)); ++ VM_BUG_ON((*end & mask) != (next & mask)); ++ ++ while (walk->vma) { ++ if (next >= walk->vma->vm_end) { ++ walk->vma = walk->vma->vm_next; ++ continue; ++ } ++ ++ if ((next & mask) != (walk->vma->vm_start & mask)) ++ return false; ++ ++ if (next <= walk->vma->vm_start && ++ should_skip_vma(walk->vma->vm_start, walk->vma->vm_end, walk)) { ++ walk->vma = walk->vma->vm_next; ++ continue; ++ } ++ ++ args->mm_stats[MM_VMA_INTERVAL]++; ++ ++ *start = max(next, walk->vma->vm_start); ++ next = (next | ~mask) + 1; ++ /* rounded-up boundaries can wrap to 0 */ ++ *end = next && next < walk->vma->vm_end ? next : walk->vma->vm_end; ++ ++ return true; ++ } ++ ++ return false; ++} ++ ++static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, ++ struct mm_walk *walk) ++{ ++ int i; ++ pte_t *pte; ++ spinlock_t *ptl; ++ int remote = 0; ++ struct mm_walk_args *args = walk->private; ++ int old_gen, new_gen = lru_gen_from_seq(args->max_seq); ++ ++ VM_BUG_ON(pmd_leaf(*pmd)); ++ ++ pte = pte_offset_map_lock(walk->mm, pmd, start & PMD_MASK, &ptl); ++ arch_enter_lazy_mmu_mode(); ++restart: ++ for (i = pte_index(start); start != end; i++, start += PAGE_SIZE) { ++ struct page *page; ++ unsigned long pfn = pte_pfn(pte[i]); ++ ++ if (!pte_present(pte[i]) || is_zero_pfn(pfn)) { ++ args->mm_stats[MM_LEAF_HOLE]++; ++ continue; ++ } ++ ++ if (!pte_young(pte[i])) { ++ args->mm_stats[MM_LEAF_OLD]++; ++ continue; ++ } ++ ++ if (pfn < args->start_pfn || pfn >= args->end_pfn) { ++ remote++; ++ args->mm_stats[MM_LEAF_OTHER_NODE]++; ++ continue; ++ } ++ ++ page = compound_head(pfn_to_page(pfn)); ++ if (page_to_nid(page) != args->node_id) { ++ remote++; ++ args->mm_stats[MM_LEAF_OTHER_NODE]++; ++ continue; ++ } ++ ++ if (!ptep_test_and_clear_young(walk->vma, start, pte + i)) ++ continue; ++ ++ if (pte_dirty(pte[i]) && !PageDirty(page) && ++ !(PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page))) { ++ set_page_dirty(page); ++ args->mm_stats[MM_LEAF_DIRTY]++; ++ } ++ ++ if (page_memcg_rcu(page) != args->memcg) { ++ args->mm_stats[MM_LEAF_OTHER_MEMCG]++; ++ continue; ++ } ++ ++ old_gen = page_update_gen(page, new_gen); ++ if (old_gen >= 0 && old_gen != new_gen) ++ update_batch_size(page, old_gen, new_gen, args); ++ args->mm_stats[MM_LEAF_YOUNG]++; ++ } ++ ++ if (i < PTRS_PER_PTE && get_next_interval(walk, PMD_MASK, PAGE_SIZE, &start, &end)) ++ goto restart; ++ ++ arch_leave_lazy_mmu_mode(); ++ pte_unmap_unlock(pte, ptl); ++ ++ return !remote; ++} ++ ++static bool walk_pmd_range_unlocked(pud_t *pud, unsigned long start, unsigned long end, ++ struct mm_walk *walk) ++{ ++ int i; ++ pmd_t *pmd; ++ unsigned long next; ++ int young = 0; ++ struct mm_walk_args *args = walk->private; ++ ++ VM_BUG_ON(pud_leaf(*pud)); ++ ++ pmd = pmd_offset(pud, start & PUD_MASK); ++restart: ++ for (i = pmd_index(start); start != end; i++, start = next) { ++ pmd_t val = pmd_read_atomic(pmd + i); ++ ++ next = pmd_addr_end(start, end); ++ ++ barrier(); ++ if (!pmd_present(val) || is_huge_zero_pmd(val)) { ++ args->mm_stats[MM_LEAF_HOLE]++; ++ continue; ++ } ++ ++ if (pmd_trans_huge(val)) { ++ unsigned long pfn = pmd_pfn(val); ++ ++ if (!pmd_young(val)) { ++ args->mm_stats[MM_LEAF_OLD]++; ++ continue; ++ } ++ ++ if (pfn < args->start_pfn || pfn >= args->end_pfn) { ++ args->mm_stats[MM_LEAF_OTHER_NODE]++; ++ continue; ++ } ++ ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE ++ young++; ++ __set_bit(i, args->bitmap); ++#endif ++ continue; ++ } ++ ++#ifdef CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG ++ if (!pmd_young(val)) { ++ args->mm_stats[MM_NONLEAF_OLD]++; ++ continue; ++ } ++#endif ++ ++ if (walk_pte_range(&val, start, next, walk)) { ++#ifdef CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG ++ young++; ++ __set_bit(i, args->bitmap); ++#endif ++ } ++ } ++ ++ if (i < PTRS_PER_PMD && get_next_interval(walk, PUD_MASK, PMD_SIZE, &start, &end)) ++ goto restart; ++ ++ return young; ++} ++ ++#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG) ++static void walk_pmd_range_locked(pud_t *pud, unsigned long start, unsigned long end, ++ struct mm_walk *walk) ++{ ++ int i; ++ pmd_t *pmd; ++ spinlock_t *ptl; ++ struct mm_walk_args *args = walk->private; ++ int old_gen, new_gen = lru_gen_from_seq(args->max_seq); ++ ++ VM_BUG_ON(pud_leaf(*pud)); ++ ++ start &= PUD_MASK; ++ pmd = pmd_offset(pud, start); ++ ptl = pmd_lock(walk->mm, pmd); ++ arch_enter_lazy_mmu_mode(); ++ ++ for_each_set_bit(i, args->bitmap, PTRS_PER_PMD) { ++ struct page *page; ++ unsigned long pfn = pmd_pfn(pmd[i]); ++ unsigned long addr = start + PMD_SIZE * i; ++ ++ if (!pmd_present(pmd[i]) || is_huge_zero_pmd(pmd[i])) { ++ args->mm_stats[MM_LEAF_HOLE]++; ++ continue; ++ } ++ ++ if (!pmd_young(pmd[i])) { ++ args->mm_stats[MM_LEAF_OLD]++; ++ continue; ++ } ++ ++ if (!pmd_trans_huge(pmd[i])) { ++#ifdef CONFIG_HAVE_ARCH_PARENT_PMD_YOUNG ++ args->mm_stats[MM_NONLEAF_YOUNG]++; ++ pmdp_test_and_clear_young(walk->vma, addr, pmd + i); ++#endif ++ continue; ++ } ++ ++ if (pfn < args->start_pfn || pfn >= args->end_pfn) { ++ args->mm_stats[MM_LEAF_OTHER_NODE]++; ++ continue; ++ } ++ ++ page = pfn_to_page(pfn); ++ VM_BUG_ON_PAGE(PageTail(page), page); ++ if (page_to_nid(page) != args->node_id) { ++ args->mm_stats[MM_LEAF_OTHER_NODE]++; ++ continue; ++ } ++ ++ if (!pmdp_test_and_clear_young(walk->vma, addr, pmd + i)) ++ continue; ++ ++ if (pmd_dirty(pmd[i]) && !PageDirty(page) && ++ !(PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page))) { ++ set_page_dirty(page); ++ args->mm_stats[MM_LEAF_DIRTY]++; ++ } ++ ++ if (page_memcg_rcu(page) != args->memcg) { ++ args->mm_stats[MM_LEAF_OTHER_MEMCG]++; ++ continue; ++ } ++ ++ old_gen = page_update_gen(page, new_gen); ++ if (old_gen >= 0 && old_gen != new_gen) ++ update_batch_size(page, old_gen, new_gen, args); ++ args->mm_stats[MM_LEAF_YOUNG]++; ++ } ++ ++ arch_leave_lazy_mmu_mode(); ++ spin_unlock(ptl); ++ ++ memset(args->bitmap, 0, sizeof(args->bitmap)); ++} ++#else ++static void walk_pmd_range_locked(pud_t *pud, unsigned long start, unsigned long end, ++ struct mm_walk *walk) ++{ ++} ++#endif ++ ++static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end, ++ struct mm_walk *walk) ++{ ++ int i; ++ pud_t *pud; ++ unsigned long next; ++ struct mm_walk_args *args = walk->private; ++ ++ VM_BUG_ON(p4d_leaf(*p4d)); ++ ++ pud = pud_offset(p4d, start & P4D_MASK); ++restart: ++ for (i = pud_index(start); start != end; i++, start = next) { ++ pud_t val = READ_ONCE(pud[i]); ++ ++ next = pud_addr_end(start, end); ++ ++ if (!pud_present(val) || WARN_ON_ONCE(pud_leaf(val))) ++ continue; ++ ++ if (walk_pmd_range_unlocked(&val, start, next, walk)) ++ walk_pmd_range_locked(&val, start, next, walk); ++ ++ if (args->batch_size >= MAX_BATCH_SIZE) { ++ end = (start | ~PUD_MASK) + 1; ++ goto done; ++ } ++ } ++ ++ if (i < PTRS_PER_PUD && get_next_interval(walk, P4D_MASK, PUD_SIZE, &start, &end)) ++ goto restart; ++ ++ end = round_up(end, P4D_SIZE); ++done: ++ /* rounded-up boundaries can wrap to 0 */ ++ args->next_addr = end && walk->vma ? max(end, walk->vma->vm_start) : 0; ++ ++ return -EAGAIN; ++} ++ ++static void walk_mm(struct mm_walk_args *args, int swappiness, struct mm_struct *mm) ++{ ++ static const struct mm_walk_ops mm_walk_ops = { ++ .test_walk = should_skip_vma, ++ .p4d_entry = walk_pud_range, ++ }; ++ ++ int err; ++ int file; ++ int nid = args->node_id; ++ struct mem_cgroup *memcg = args->memcg; ++ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); ++ ++ args->next_addr = FIRST_USER_ADDRESS; ++ for (file = !swappiness; file < ANON_AND_FILE; file++) ++ args->should_walk[file] = lru_gen_mm_is_active(mm) || ++ node_isset(nid, mm->lrugen.nodes[file]); ++ ++ do { ++ unsigned long start = args->next_addr; ++ unsigned long end = mm->highest_vm_end; ++ ++ err = -EBUSY; ++ ++ preempt_disable(); ++ rcu_read_lock(); ++ ++#ifdef CONFIG_MEMCG ++ if (memcg && atomic_read(&memcg->moving_account)) { ++ args->mm_stats[MM_LOCK_CONTENTION]++; ++ goto contended; ++ } ++#endif ++ if (!mmap_read_trylock(mm)) { ++ args->mm_stats[MM_LOCK_CONTENTION]++; ++ goto contended; ++ } ++ ++ err = walk_page_range(mm, start, end, &mm_walk_ops, args); ++ ++ mmap_read_unlock(mm); ++ ++ if (args->batch_size) ++ reset_batch_size(lruvec, args); ++contended: ++ rcu_read_unlock(); ++ preempt_enable(); ++ ++ cond_resched(); ++ } while (err == -EAGAIN && args->next_addr && ++ !mm_is_oom_victim(mm) && !mm_has_migrated(mm, memcg)); ++ ++ if (err == -EBUSY) ++ return; ++ ++ for (file = !swappiness; file < ANON_AND_FILE; file++) { ++ if (args->should_walk[file]) ++ node_clear(nid, mm->lrugen.nodes[file]); ++ } ++} ++ ++static void page_inc_gen(struct page *page, struct lruvec *lruvec, bool front) ++{ ++ int old_gen, new_gen; ++ unsigned long old_flags, new_flags; ++ int file = page_is_file_lru(page); ++ int zone = page_zonenum(page); ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ old_gen = lru_gen_from_seq(lrugen->min_seq[file]); ++ ++ do { ++ old_flags = READ_ONCE(page->flags); ++ new_gen = ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; ++ VM_BUG_ON_PAGE(new_gen < 0, page); ++ if (new_gen >= 0 && new_gen != old_gen) ++ goto sort; ++ ++ new_gen = (old_gen + 1) % MAX_NR_GENS; ++ new_flags = (old_flags & ~(LRU_GEN_MASK | LRU_USAGE_MASK | LRU_TIER_FLAGS)) | ++ ((new_gen + 1UL) << LRU_GEN_PGOFF); ++ /* mark the page for reclaim if it's pending writeback */ ++ if (front) ++ new_flags |= BIT(PG_reclaim); ++ } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); ++ ++ lru_gen_update_size(page, lruvec, old_gen, new_gen); ++sort: ++ if (front) ++ list_move(&page->lru, &lrugen->lists[new_gen][file][zone]); ++ else ++ list_move_tail(&page->lru, &lrugen->lists[new_gen][file][zone]); ++} ++ ++static bool try_inc_min_seq(struct lruvec *lruvec, int file) ++{ ++ int gen, zone; ++ bool success = false; ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ VM_BUG_ON(!seq_is_valid(lruvec)); ++ ++ while (get_nr_gens(lruvec, file) > MIN_NR_GENS) { ++ gen = lru_gen_from_seq(lrugen->min_seq[file]); ++ ++ for (zone = 0; zone < MAX_NR_ZONES; zone++) { ++ if (!list_empty(&lrugen->lists[gen][file][zone])) ++ return success; ++ } ++ ++ reset_controller_pos(lruvec, gen, file); ++ WRITE_ONCE(lrugen->min_seq[file], lrugen->min_seq[file] + 1); ++ ++ success = true; ++ } ++ ++ return success; ++} ++ ++static bool inc_min_seq(struct lruvec *lruvec, int file) ++{ ++ int gen, zone; ++ int batch_size = 0; ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ VM_BUG_ON(!seq_is_valid(lruvec)); ++ ++ if (get_nr_gens(lruvec, file) != MAX_NR_GENS) ++ return true; ++ ++ gen = lru_gen_from_seq(lrugen->min_seq[file]); ++ ++ for (zone = 0; zone < MAX_NR_ZONES; zone++) { ++ struct list_head *head = &lrugen->lists[gen][file][zone]; ++ ++ while (!list_empty(head)) { ++ struct page *page = lru_to_page(head); ++ ++ VM_BUG_ON_PAGE(PageTail(page), page); ++ VM_BUG_ON_PAGE(PageUnevictable(page), page); ++ VM_BUG_ON_PAGE(PageActive(page), page); ++ VM_BUG_ON_PAGE(page_is_file_lru(page) != file, page); ++ VM_BUG_ON_PAGE(page_zonenum(page) != zone, page); ++ ++ prefetchw_prev_lru_page(page, head, flags); ++ ++ page_inc_gen(page, lruvec, false); ++ ++ if (++batch_size == MAX_BATCH_SIZE) ++ return false; ++ } ++ ++ VM_BUG_ON(lrugen->sizes[gen][file][zone]); ++ } ++ ++ reset_controller_pos(lruvec, gen, file); ++ WRITE_ONCE(lrugen->min_seq[file], lrugen->min_seq[file] + 1); ++ ++ return true; ++} ++ ++static void inc_max_seq(struct lruvec *lruvec) ++{ ++ int gen, file, zone; ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ spin_lock_irq(&lruvec->lru_lock); ++ ++ VM_BUG_ON(!seq_is_valid(lruvec)); ++ ++ for (file = 0; file < ANON_AND_FILE; file++) { ++ if (try_inc_min_seq(lruvec, file)) ++ continue; ++ ++ while (!inc_min_seq(lruvec, file)) { ++ spin_unlock_irq(&lruvec->lru_lock); ++ cond_resched(); ++ spin_lock_irq(&lruvec->lru_lock); ++ } ++ } ++ ++ gen = lru_gen_from_seq(lrugen->max_seq - 1); ++ for_each_type_zone(file, zone) { ++ enum lru_list lru = LRU_FILE * file; ++ long total = lrugen->sizes[gen][file][zone]; ++ ++ if (!total) ++ continue; ++ ++ WARN_ON_ONCE(total != (int)total); ++ ++ update_lru_size(lruvec, lru, zone, total); ++ update_lru_size(lruvec, lru + LRU_ACTIVE, zone, -total); ++ } ++ ++ gen = lru_gen_from_seq(lrugen->max_seq + 1); ++ for_each_type_zone(file, zone) { ++ VM_BUG_ON(lrugen->sizes[gen][file][zone]); ++ VM_BUG_ON(!list_empty(&lrugen->lists[gen][file][zone])); ++ } ++ ++ for (file = 0; file < ANON_AND_FILE; file++) ++ reset_controller_pos(lruvec, gen, file); ++ ++ WRITE_ONCE(lrugen->timestamps[gen], jiffies); ++ /* make sure all preceding modifications appear first */ ++ smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1); ++ ++ spin_unlock_irq(&lruvec->lru_lock); ++} ++ ++/* Main function used by foreground, background and user-triggered aging. */ ++static bool walk_mm_list(struct lruvec *lruvec, unsigned long max_seq, ++ struct scan_control *sc, int swappiness, struct mm_walk_args *args) ++{ ++ bool last; ++ bool alloc = !args; ++ struct mm_struct *mm = NULL; ++ struct lrugen *lrugen = &lruvec->evictable; ++ struct pglist_data *pgdat = lruvec_pgdat(lruvec); ++ int nid = pgdat->node_id; ++ struct mem_cgroup *memcg = lruvec_memcg(lruvec); ++ struct lru_gen_mm_list *mm_list = get_mm_list(memcg); ++ ++ VM_BUG_ON(max_seq > READ_ONCE(lrugen->max_seq)); ++ ++ /* ++ * For each walk of the mm_struct list of a memcg, we decrement the ++ * priority of its lrugen. For each walk of all memcgs in kswapd, we ++ * increment the priority of every lrugen. ++ * ++ * So if this lrugen has a higher priority (smaller value), it means ++ * other concurrent reclaimers have walked its mm list, and we skip it ++ * for this priority in order to balance the pressure on all memcgs. ++ */ ++ if (!mem_cgroup_disabled() && !cgroup_reclaim(sc) && ++ sc->priority > atomic_read(&lrugen->priority)) ++ return false; ++ ++ if (alloc) { ++ args = kvzalloc_node(sizeof(*args), GFP_KERNEL, nid); ++ if (!args) ++ return false; ++ } ++ ++ args->memcg = memcg; ++ args->max_seq = max_seq; ++ args->start_pfn = pgdat->node_start_pfn; ++ args->end_pfn = pgdat_end_pfn(pgdat); ++ args->node_id = nid; ++ ++ do { ++ last = get_next_mm(args, swappiness, &mm); ++ if (mm) ++ walk_mm(args, swappiness, mm); ++ ++ cond_resched(); ++ } while (mm); ++ ++ if (alloc) ++ kvfree(args); ++ ++ if (!last) { ++ /* foreground aging prefers not to wait unless "necessary" */ ++ if (!current_is_kswapd() && sc->priority < DEF_PRIORITY - 2) ++ wait_event_killable(mm_list->nodes[nid].wait, ++ max_seq < READ_ONCE(lrugen->max_seq)); ++ ++ return max_seq < READ_ONCE(lrugen->max_seq); ++ } ++ ++ VM_BUG_ON(max_seq != READ_ONCE(lrugen->max_seq)); ++ ++ inc_max_seq(lruvec); ++ ++ if (!mem_cgroup_disabled()) ++ atomic_add_unless(&lrugen->priority, -1, 0); ++ ++ /* order against inc_max_seq() */ ++ smp_mb(); ++ /* either we see any waiters or they will see the updated max_seq */ ++ if (waitqueue_active(&mm_list->nodes[nid].wait)) ++ wake_up_all(&mm_list->nodes[nid].wait); ++ ++ wakeup_flusher_threads(WB_REASON_VMSCAN); ++ ++ return true; ++} ++ + /****************************************************************************** + * state change + ******************************************************************************/ +@@ -5002,6 +5699,9 @@ static int __init init_lru_gen(void) + BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS); + BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS); + BUILD_BUG_ON(sizeof(MM_STAT_CODES) != NR_MM_STATS + 1); ++ BUILD_BUG_ON(PMD_SIZE / PAGE_SIZE != PTRS_PER_PTE); ++ BUILD_BUG_ON(PUD_SIZE / PMD_SIZE != PTRS_PER_PMD); ++ BUILD_BUG_ON(P4D_SIZE / PUD_SIZE != PTRS_PER_PUD); + + if (mem_cgroup_disabled()) { + global_mm_list = alloc_mm_list(); +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-12-16-mm-multigenerational-lru-eviction.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-12-16-mm-multigenerational-lru-eviction.patch new file mode 100644 index 0000000..1085563 --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-12-16-mm-multigenerational-lru-eviction.patch @@ -0,0 +1,474 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id D9882C433ED + for ; Tue, 13 Apr 2021 06:57:29 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id B8C7E613B1 + for ; Tue, 13 Apr 2021 06:57:29 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S1345148AbhDMG5r (ORCPT + ); + Tue, 13 Apr 2021 02:57:47 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44204 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S1345094AbhDMG5S (ORCPT + ); + Tue, 13 Apr 2021 02:57:18 -0400 +Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 3319EC06175F + for ; Mon, 12 Apr 2021 23:56:59 -0700 (PDT) +Received: by mail-yb1-xb4a.google.com with SMTP id p75so9209456ybc.8 + for ; Mon, 12 Apr 2021 23:56:59 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=AtcshlKlEpO25DWX4HdWHYKkg2qmJuRhLpG3jAQhwYc=; + b=KpNWVguu83mUBVdG9rV7ayYNm+Qrzu5gAuasFnKSoWlkRinGKl/FvUmCisXgOrxGC0 + C9Wgab1jU/EJCdE85EdYCvp7ANytDv3ICBmljKThBcjCsU/wnl68RE3qlTlwro63hIWt + MNfXX7skFRf+i1zpUlA6T7R/rTDSlD3n0pboX0T6KXoxN8TAWeB2SgBy2EDQkapMZU3f + Yj8IM3/wDy/W+hgIexStVVze+0Y+gs0LOFo9um6QLrtZfsj/heNSAn50raUEB2w/UGHv + wBBLmbIZyRpiDtLinzpzu1fIqj9Y/2CPQeg1p+ZMcg3wMV0JQXyTUvVglWkME0v6fKsG + fSRw== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=AtcshlKlEpO25DWX4HdWHYKkg2qmJuRhLpG3jAQhwYc=; + b=I5wcigjJOE57JyIN1RgYnvjQfqi/Tu5QohjDJ3zHpF6wCQbLs1mU8eUZ+TYGRp5xwm + PxULqfFEi9PFVydtMob1umooK7ndwpJBomSO9+hgGyBluwloY/kUvS3XtnV4b4UD45J/ + Ny/ylsjBg1K+INdvvcBjsJ62q+kSQWanrORUhTCG8yKu+Uug/vhGdOECiKug4pBAgktX + gjqN4aglQeOGaw3UbEG4s6mQuxRdsGY9S1TSistPPCZr+GCvEHf6tG/uc1wmO0zvm3M9 + 5zAnThurIlICc11ju7PpVVH/k5HZNlo7SLO0yxf5Pr03wG+SAnHTeSmT9zPzHWGTfA/6 + FxdA== +X-Gm-Message-State: AOAM532rwFd52QDY7yVuzhsUHKx/vQ3mvqMJUIYRA4CK/9WfDNvEvp4X + aLVlWGREIYgvAVa4LwBCuixrg5f/t3I= +X-Google-Smtp-Source: ABdhPJxtAb+i00KPB+eZ1AkPEHseGFum+ilW8ElwcmLIJblIT+FK3beKZjdoBl7K4l7X3wfk5ecz7lYtrhU= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a25:f0b:: with SMTP id 11mr41690159ybp.208.1618297018316; + Mon, 12 Apr 2021 23:56:58 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:29 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-13-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 12/16] mm: multigenerational lru: eviction +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +The eviction consumes old generations. Given an lruvec, the eviction +scans the pages on the per-zone lists indexed by either of min_seq[2]. +It first tries to select a type based on the values of min_seq[2]. +When anon and file types are both available from the same generation, +it selects the one that has a lower refault rate. + +During a scan, the eviction sorts pages according to their generation +numbers, if the aging has found them referenced. It also moves pages +from the tiers that have higher refault rates than tier 0 to the next +generation. When it finds all the per-zone lists of a selected type +are empty, the eviction increments min_seq[2] indexed by this selected +type. + +Signed-off-by: Yu Zhao +--- + mm/vmscan.c | 341 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 341 insertions(+) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 31e1b4155677..6239b1acd84f 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -5468,6 +5468,347 @@ static bool walk_mm_list(struct lruvec *lruvec, unsigned long max_seq, + return true; + } + ++/****************************************************************************** ++ * the eviction ++ ******************************************************************************/ ++ ++static bool sort_page(struct page *page, struct lruvec *lruvec, int tier_to_isolate) ++{ ++ bool success; ++ int gen = page_lru_gen(page); ++ int file = page_is_file_lru(page); ++ int zone = page_zonenum(page); ++ int tier = lru_tier_from_usage(page_tier_usage(page)); ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ VM_BUG_ON_PAGE(gen == -1, page); ++ VM_BUG_ON_PAGE(tier_to_isolate < 0, page); ++ ++ /* a lazy-free page that has been written into? */ ++ if (file && PageDirty(page) && PageAnon(page)) { ++ success = lru_gen_deletion(page, lruvec); ++ VM_BUG_ON_PAGE(!success, page); ++ SetPageSwapBacked(page); ++ add_page_to_lru_list_tail(page, lruvec); ++ return true; ++ } ++ ++ /* page_update_gen() has updated the page? */ ++ if (gen != lru_gen_from_seq(lrugen->min_seq[file])) { ++ list_move(&page->lru, &lrugen->lists[gen][file][zone]); ++ return true; ++ } ++ ++ /* activate the page if its tier has a higher refault rate */ ++ if (tier_to_isolate < tier) { ++ int sid = sid_from_seq_or_gen(gen); ++ ++ page_inc_gen(page, lruvec, false); ++ WRITE_ONCE(lrugen->activated[sid][file][tier - 1], ++ lrugen->activated[sid][file][tier - 1] + thp_nr_pages(page)); ++ inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file); ++ return true; ++ } ++ ++ /* ++ * A page can't be immediately evicted, and page_inc_gen() will mark it ++ * for reclaim and hopefully writeback will write it soon if it's dirty. ++ */ ++ if (PageLocked(page) || PageWriteback(page) || (file && PageDirty(page))) { ++ page_inc_gen(page, lruvec, true); ++ return true; ++ } ++ ++ return false; ++} ++ ++static bool should_skip_page(struct page *page, struct scan_control *sc) ++{ ++ if (!sc->may_unmap && page_mapped(page)) ++ return true; ++ ++ if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) && ++ (PageDirty(page) || (PageAnon(page) && !PageSwapCache(page)))) ++ return true; ++ ++ if (!get_page_unless_zero(page)) ++ return true; ++ ++ if (!TestClearPageLRU(page)) { ++ put_page(page); ++ return true; ++ } ++ ++ return false; ++} ++ ++static void isolate_page(struct page *page, struct lruvec *lruvec) ++{ ++ bool success; ++ ++ success = lru_gen_deletion(page, lruvec); ++ VM_BUG_ON_PAGE(!success, page); ++ ++ if (PageActive(page)) { ++ ClearPageActive(page); ++ /* make sure shrink_page_list() rejects this page */ ++ SetPageReferenced(page); ++ return; ++ } ++ ++ /* make sure shrink_page_list() doesn't try to write this page */ ++ ClearPageReclaim(page); ++ /* make sure shrink_page_list() doesn't reject this page */ ++ ClearPageReferenced(page); ++} ++ ++static int scan_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc, ++ long *nr_to_scan, int file, int tier, ++ struct list_head *list) ++{ ++ bool success; ++ int gen, zone; ++ enum vm_event_item item; ++ int sorted = 0; ++ int scanned = 0; ++ int isolated = 0; ++ int batch_size = 0; ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ VM_BUG_ON(!list_empty(list)); ++ ++ if (get_nr_gens(lruvec, file) == MIN_NR_GENS) ++ return -ENOENT; ++ ++ gen = lru_gen_from_seq(lrugen->min_seq[file]); ++ ++ for (zone = sc->reclaim_idx; zone >= 0; zone--) { ++ LIST_HEAD(moved); ++ int skipped = 0; ++ struct list_head *head = &lrugen->lists[gen][file][zone]; ++ ++ while (!list_empty(head)) { ++ struct page *page = lru_to_page(head); ++ int delta = thp_nr_pages(page); ++ ++ VM_BUG_ON_PAGE(PageTail(page), page); ++ VM_BUG_ON_PAGE(PageUnevictable(page), page); ++ VM_BUG_ON_PAGE(PageActive(page), page); ++ VM_BUG_ON_PAGE(page_is_file_lru(page) != file, page); ++ VM_BUG_ON_PAGE(page_zonenum(page) != zone, page); ++ ++ prefetchw_prev_lru_page(page, head, flags); ++ ++ scanned += delta; ++ ++ if (sort_page(page, lruvec, tier)) ++ sorted += delta; ++ else if (should_skip_page(page, sc)) { ++ list_move(&page->lru, &moved); ++ skipped += delta; ++ } else { ++ isolate_page(page, lruvec); ++ list_add(&page->lru, list); ++ isolated += delta; ++ } ++ ++ if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX || ++ ++batch_size == MAX_BATCH_SIZE) ++ break; ++ } ++ ++ list_splice(&moved, head); ++ __count_zid_vm_events(PGSCAN_SKIP, zone, skipped); ++ ++ if (scanned >= *nr_to_scan || isolated >= SWAP_CLUSTER_MAX || ++ batch_size == MAX_BATCH_SIZE) ++ break; ++ } ++ ++ success = try_inc_min_seq(lruvec, file); ++ ++ item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT; ++ if (!cgroup_reclaim(sc)) ++ __count_vm_events(item, scanned); ++ __count_memcg_events(lruvec_memcg(lruvec), item, scanned); ++ __count_vm_events(PGSCAN_ANON + file, scanned); ++ ++ *nr_to_scan -= scanned; ++ ++ if (*nr_to_scan <= 0 || success || isolated) ++ return isolated; ++ /* ++ * We may have trouble finding eligible pages due to reclaim_idx, ++ * may_unmap and may_writepage. The following check makes sure we won't ++ * be stuck if we aren't making enough progress. ++ */ ++ return batch_size == MAX_BATCH_SIZE && sorted >= SWAP_CLUSTER_MAX ? 0 : -ENOENT; ++} ++ ++static int get_tier_to_isolate(struct lruvec *lruvec, int file) ++{ ++ int tier; ++ struct controller_pos sp, pv; ++ ++ /* ++ * Ideally we don't want to evict upper tiers that have higher refault ++ * rates. However, we need to leave some margin for the fluctuation in ++ * refault rates. So we use a larger gain factor to make sure upper ++ * tiers are indeed more active. We choose 2 because the lowest upper ++ * tier would have twice of the refault rate of the base tier, according ++ * to their numbers of accesses. ++ */ ++ read_controller_pos(&sp, lruvec, file, 0, 1); ++ for (tier = 1; tier < MAX_NR_TIERS; tier++) { ++ read_controller_pos(&pv, lruvec, file, tier, 2); ++ if (!positive_ctrl_err(&sp, &pv)) ++ break; ++ } ++ ++ return tier - 1; ++} ++ ++static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_to_isolate) ++{ ++ int file, tier; ++ struct controller_pos sp, pv; ++ int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness }; ++ ++ /* ++ * Compare the refault rates between the base tiers of anon and file to ++ * determine which type to evict. Also need to compare the refault rates ++ * of the upper tiers of the selected type with that of the base tier to ++ * determine which tier of the selected type to evict. ++ */ ++ read_controller_pos(&sp, lruvec, 0, 0, gain[0]); ++ read_controller_pos(&pv, lruvec, 1, 0, gain[1]); ++ file = positive_ctrl_err(&sp, &pv); ++ ++ read_controller_pos(&sp, lruvec, !file, 0, gain[!file]); ++ for (tier = 1; tier < MAX_NR_TIERS; tier++) { ++ read_controller_pos(&pv, lruvec, file, tier, gain[file]); ++ if (!positive_ctrl_err(&sp, &pv)) ++ break; ++ } ++ ++ *tier_to_isolate = tier - 1; ++ ++ return file; ++} ++ ++static int isolate_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc, ++ int swappiness, long *nr_to_scan, int *type_to_scan, ++ struct list_head *list) ++{ ++ int i; ++ int file; ++ int isolated; ++ int tier = -1; ++ DEFINE_MAX_SEQ(); ++ DEFINE_MIN_SEQ(); ++ ++ VM_BUG_ON(!seq_is_valid(lruvec)); ++ ++ if (max_nr_gens(max_seq, min_seq, swappiness) == MIN_NR_GENS) ++ return 0; ++ /* ++ * Try to select a type based on generations and swappiness, and if that ++ * fails, fall back to get_type_to_scan(). When anon and file are both ++ * available from the same generation, swappiness 200 is interpreted as ++ * anon first and swappiness 1 is interpreted as file first. ++ */ ++ file = !swappiness || min_seq[0] > min_seq[1] || ++ (min_seq[0] == min_seq[1] && swappiness != 200 && ++ (swappiness == 1 || get_type_to_scan(lruvec, swappiness, &tier))); ++ ++ if (tier == -1) ++ tier = get_tier_to_isolate(lruvec, file); ++ ++ for (i = !swappiness; i < ANON_AND_FILE; i++) { ++ isolated = scan_lru_gen_pages(lruvec, sc, nr_to_scan, file, tier, list); ++ if (isolated >= 0) ++ break; ++ ++ file = !file; ++ tier = get_tier_to_isolate(lruvec, file); ++ } ++ ++ if (isolated < 0) ++ isolated = *nr_to_scan = 0; ++ ++ *type_to_scan = file; ++ ++ return isolated; ++} ++ ++/* Main function used by foreground, background and user-triggered eviction. */ ++static bool evict_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc, ++ int swappiness, long *nr_to_scan) ++{ ++ int file; ++ int isolated; ++ int reclaimed; ++ LIST_HEAD(list); ++ struct page *page; ++ enum vm_event_item item; ++ struct reclaim_stat stat; ++ struct pglist_data *pgdat = lruvec_pgdat(lruvec); ++ ++ spin_lock_irq(&lruvec->lru_lock); ++ ++ isolated = isolate_lru_gen_pages(lruvec, sc, swappiness, nr_to_scan, &file, &list); ++ VM_BUG_ON(list_empty(&list) == !!isolated); ++ ++ if (isolated) ++ __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, isolated); ++ ++ spin_unlock_irq(&lruvec->lru_lock); ++ ++ if (!isolated) ++ goto done; ++ ++ reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false); ++ /* ++ * We need to prevent rejected pages from being added back to the same ++ * lists they were isolated from. Otherwise we may risk looping on them ++ * forever. We use PageActive() or !PageReferenced() && PageWorkingset() ++ * to tell lru_gen_addition() not to add them to the oldest generation. ++ */ ++ list_for_each_entry(page, &list, lru) { ++ if (PageMlocked(page)) ++ continue; ++ ++ if (PageReferenced(page)) { ++ SetPageActive(page); ++ ClearPageReferenced(page); ++ } else { ++ ClearPageActive(page); ++ SetPageWorkingset(page); ++ } ++ } ++ ++ spin_lock_irq(&lruvec->lru_lock); ++ ++ move_pages_to_lru(lruvec, &list); ++ ++ __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -isolated); ++ ++ item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT; ++ if (!cgroup_reclaim(sc)) ++ __count_vm_events(item, reclaimed); ++ __count_memcg_events(lruvec_memcg(lruvec), item, reclaimed); ++ __count_vm_events(PGSTEAL_ANON + file, reclaimed); ++ ++ spin_unlock_irq(&lruvec->lru_lock); ++ ++ mem_cgroup_uncharge_list(&list); ++ free_unref_page_list(&list); ++ ++ sc->nr_reclaimed += reclaimed; ++done: ++ return *nr_to_scan > 0 && sc->nr_reclaimed < sc->nr_to_reclaim; ++} ++ + /****************************************************************************** + * state change + ******************************************************************************/ +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-13-16-mm-multigenerational-lru-page-reclaim.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-13-16-mm-multigenerational-lru-page-reclaim.patch new file mode 100644 index 0000000..ccef7d9 --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-13-16-mm-multigenerational-lru-page-reclaim.patch @@ -0,0 +1,479 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id 555A5C43461 + for ; Tue, 13 Apr 2021 06:57:36 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 3220B60FDB + for ; Tue, 13 Apr 2021 06:57:35 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S237146AbhDMG5w (ORCPT ); + Tue, 13 Apr 2021 02:57:52 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44208 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S1345113AbhDMG5T (ORCPT + ); + Tue, 13 Apr 2021 02:57:19 -0400 +Received: from mail-qk1-x749.google.com (mail-qk1-x749.google.com [IPv6:2607:f8b0:4864:20::749]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id A2F06C061574 + for ; Mon, 12 Apr 2021 23:57:00 -0700 (PDT) +Received: by mail-qk1-x749.google.com with SMTP id n191so10007274qka.9 + for ; Mon, 12 Apr 2021 23:57:00 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=ZkZkBuwvqnJ3RNHJhCNbR3K9qvaxv7Y+ShqFogGYPM4=; + b=YuhzAl4jnf9B8DPsAHH+IEn6TeEK8tkXzqeIIUWrV6MKmrDwRVWEaxlfpyho7LEl9c + Yb/oFtKUHNb53oILQT33tlmVOzpPgzylMipFZ2l5j9KHbcsDyRmB0oqQUa1QZ2PJMYNK + fWpCu7LXduAtYRU+OGHNrJHXp576QKDulX5A0p9heBIoiC+vWWS/x+GcCoUk17noPsZC + Su6UQCzg6NAfh+hiQZUMluxkVxIZLc0tUeagDPWX8AYcx4WshWUrgTPuDgI3s1vI7M8C + K9lLKPVh9VeBFpsycJM4koujbXoOVbPXyfWOhPPIE23ETJR5Yb0o5n5VqtBZYTB2FIhK + TPQw== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=ZkZkBuwvqnJ3RNHJhCNbR3K9qvaxv7Y+ShqFogGYPM4=; + b=HJexHWiiyDZpfXt8l6/EGRqdtM5RulG6u9GDFQ0UJD2T5+wffn01FXEWBORtSlloEv + JVoGieHk3qJawZml66nLtDTbcVGYn6Nqs6EXRfNoDgICSYXdL9NTIaojCI0ZKGyD+IxL + PUrN7oxaD8d5VGq+sBRezfThw/BfDEZnlAKs7my6MuuAOjBT8on5yBIH8/j/ICvIEG6I + gMkvHTcz3g9emOaHqBpNgMwnOo6Nuia/0YbXpr3xWCmezGFqPyDmC8JYVrlrE7T1sOtt + aM45XTkzlUnUnCLZq+dVQPAsg4IjqDoWZ7K2SbzPqIHFPVW2baQfIGX+oVazwypGzv4P + ZVCw== +X-Gm-Message-State: AOAM531aC+Fl2Rjia4/Q8PO4GqZNI/QjyevwkXojS3zWLyfXFHA97+i9 + GxwWwyU1OIpVhDJlWVmUnXRSn1z/KbE= +X-Google-Smtp-Source: ABdhPJz8UXRBXxFnHjwU9KHKJ57aCdWAlTupj/VfQPjKJc1AKD7gBysJ6np5sy0VpO9JJLZsJRX7gVcs/zM= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a05:6214:1c0c:: with SMTP id + u12mr31837398qvc.24.1618297019786; Mon, 12 Apr 2021 23:56:59 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:30 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-14-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 13/16] mm: multigenerational lru: page reclaim +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +With the aging and the eviction in place, we can build the page +reclaim in a straightforward manner: + 1) In order to reduce the latency, direct reclaim only invokes the + aging when both min_seq[2] reaches max_seq-1; otherwise it invokes + the eviction. + 2) In order to avoid the aging in the direct reclaim path, kswapd + does the background aging more proactively. It invokes the aging + when either of min_seq[2] reaches max_seq-1; otherwise it invokes + the eviction. + +And we add another optimization: pages mapped around a referenced PTE +may also have been referenced due to the spatial locality. In the +reclaim path, if the rmap finds the PTE mapping a page under reclaim +referenced, it calls a new function lru_gen_scan_around() to scan the +vicinity of the PTE. And if this new function finds others referenced +PTEs, it updates the generation number of the pages mapped by those +PTEs. + +Signed-off-by: Yu Zhao +--- + include/linux/mmzone.h | 6 ++ + mm/rmap.c | 6 ++ + mm/vmscan.c | 236 +++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 248 insertions(+) + +diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h +index dcfadf6a8c07..a22e9e40083f 100644 +--- a/include/linux/mmzone.h ++++ b/include/linux/mmzone.h +@@ -292,6 +292,7 @@ enum lruvec_flags { + }; + + struct lruvec; ++struct page_vma_mapped_walk; + + #define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) + #define LRU_USAGE_MASK ((BIT(LRU_USAGE_WIDTH) - 1) << LRU_USAGE_PGOFF) +@@ -384,6 +385,7 @@ struct lrugen { + + void lru_gen_init_lruvec(struct lruvec *lruvec); + void lru_gen_set_state(bool enable, bool main, bool swap); ++void lru_gen_scan_around(struct page_vma_mapped_walk *pvmw); + + #else /* CONFIG_LRU_GEN */ + +@@ -395,6 +397,10 @@ static inline void lru_gen_set_state(bool enable, bool main, bool swap) + { + } + ++static inline void lru_gen_scan_around(struct page_vma_mapped_walk *pvmw) ++{ ++} ++ + #endif /* CONFIG_LRU_GEN */ + + struct lruvec { +diff --git a/mm/rmap.c b/mm/rmap.c +index b0fc27e77d6d..d600b282ced5 100644 +--- a/mm/rmap.c ++++ b/mm/rmap.c +@@ -72,6 +72,7 @@ + #include + #include + #include ++#include + + #include + +@@ -792,6 +793,11 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma, + } + + if (pvmw.pte) { ++ /* the multigenerational lru exploits the spatial locality */ ++ if (lru_gen_enabled() && pte_young(*pvmw.pte)) { ++ lru_gen_scan_around(&pvmw); ++ referenced++; ++ } + if (ptep_clear_flush_young_notify(vma, address, + pvmw.pte)) { + /* +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 6239b1acd84f..01c475386379 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -1114,6 +1114,10 @@ static unsigned int shrink_page_list(struct list_head *page_list, + if (!sc->may_unmap && page_mapped(page)) + goto keep_locked; + ++ /* in case the page was found accessed by lru_gen_scan_around() */ ++ if (lru_gen_enabled() && !ignore_references && PageReferenced(page)) ++ goto keep_locked; ++ + may_enter_fs = (sc->gfp_mask & __GFP_FS) || + (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); + +@@ -2233,6 +2237,10 @@ static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc) + unsigned long file; + struct lruvec *target_lruvec; + ++ /* the multigenerational lru doesn't use these counters */ ++ if (lru_gen_enabled()) ++ return; ++ + target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); + + /* +@@ -2522,6 +2530,19 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, + } + } + ++#ifdef CONFIG_LRU_GEN ++static void age_lru_gens(struct pglist_data *pgdat, struct scan_control *sc); ++static void shrink_lru_gens(struct lruvec *lruvec, struct scan_control *sc); ++#else ++static void age_lru_gens(struct pglist_data *pgdat, struct scan_control *sc) ++{ ++} ++ ++static void shrink_lru_gens(struct lruvec *lruvec, struct scan_control *sc) ++{ ++} ++#endif ++ + static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) + { + unsigned long nr[NR_LRU_LISTS]; +@@ -2533,6 +2554,11 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) + struct blk_plug plug; + bool scan_adjusted; + ++ if (lru_gen_enabled()) { ++ shrink_lru_gens(lruvec, sc); ++ return; ++ } ++ + get_scan_count(lruvec, sc, nr); + + /* Record the original scan target for proportional adjustments later */ +@@ -2999,6 +3025,10 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat) + struct lruvec *target_lruvec; + unsigned long refaults; + ++ /* the multigenerational lru doesn't use these counters */ ++ if (lru_gen_enabled()) ++ return; ++ + target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat); + refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON); + target_lruvec->refaults[0] = refaults; +@@ -3373,6 +3403,11 @@ static void age_active_anon(struct pglist_data *pgdat, + struct mem_cgroup *memcg; + struct lruvec *lruvec; + ++ if (lru_gen_enabled()) { ++ age_lru_gens(pgdat, sc); ++ return; ++ } ++ + if (!total_swap_pages) + return; + +@@ -5468,6 +5503,57 @@ static bool walk_mm_list(struct lruvec *lruvec, unsigned long max_seq, + return true; + } + ++void lru_gen_scan_around(struct page_vma_mapped_walk *pvmw) ++{ ++ pte_t *pte; ++ unsigned long start, end; ++ int old_gen, new_gen; ++ unsigned long flags; ++ struct lruvec *lruvec; ++ struct mem_cgroup *memcg; ++ struct pglist_data *pgdat = page_pgdat(pvmw->page); ++ ++ lockdep_assert_held(pvmw->ptl); ++ ++ start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start); ++ end = pmd_addr_end(pvmw->address, pvmw->vma->vm_end); ++ pte = pvmw->pte - ((pvmw->address - start) >> PAGE_SHIFT); ++ ++ memcg = lock_page_memcg(pvmw->page); ++ lruvec = lock_page_lruvec_irqsave(pvmw->page, &flags); ++ ++ new_gen = lru_gen_from_seq(lruvec->evictable.max_seq); ++ ++ for (; start != end; pte++, start += PAGE_SIZE) { ++ struct page *page; ++ unsigned long pfn = pte_pfn(*pte); ++ ++ if (!pte_present(*pte) || !pte_young(*pte) || is_zero_pfn(pfn)) ++ continue; ++ ++ if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) ++ continue; ++ ++ page = compound_head(pfn_to_page(pfn)); ++ if (page_to_nid(page) != pgdat->node_id) ++ continue; ++ ++ if (page_memcg_rcu(page) != memcg) ++ continue; ++ /* ++ * We may be holding many locks. So try to finish as fast as ++ * possible and leave the accessed and the dirty bits to page ++ * table walks. ++ */ ++ old_gen = page_update_gen(page, new_gen); ++ if (old_gen >= 0 && old_gen != new_gen) ++ lru_gen_update_size(page, lruvec, old_gen, new_gen); ++ } ++ ++ unlock_page_lruvec_irqrestore(lruvec, flags); ++ unlock_page_memcg(pvmw->page); ++} ++ + /****************************************************************************** + * the eviction + ******************************************************************************/ +@@ -5809,6 +5895,156 @@ static bool evict_lru_gen_pages(struct lruvec *lruvec, struct scan_control *sc, + return *nr_to_scan > 0 && sc->nr_reclaimed < sc->nr_to_reclaim; + } + ++/****************************************************************************** ++ * page reclaim ++ ******************************************************************************/ ++ ++static int get_swappiness(struct lruvec *lruvec) ++{ ++ struct mem_cgroup *memcg = lruvec_memcg(lruvec); ++ int swappiness = mem_cgroup_get_nr_swap_pages(memcg) >= (long)SWAP_CLUSTER_MAX ? ++ mem_cgroup_swappiness(memcg) : 0; ++ ++ VM_BUG_ON(swappiness > 200U); ++ ++ return swappiness; ++} ++ ++static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, ++ int swappiness) ++{ ++ int gen, file, zone; ++ long nr_to_scan = 0; ++ struct lrugen *lrugen = &lruvec->evictable; ++ DEFINE_MAX_SEQ(); ++ DEFINE_MIN_SEQ(); ++ ++ lru_add_drain(); ++ ++ for (file = !swappiness; file < ANON_AND_FILE; file++) { ++ unsigned long seq; ++ ++ for (seq = min_seq[file]; seq <= max_seq; seq++) { ++ gen = lru_gen_from_seq(seq); ++ ++ for (zone = 0; zone <= sc->reclaim_idx; zone++) ++ nr_to_scan += READ_ONCE(lrugen->sizes[gen][file][zone]); ++ } ++ } ++ ++ nr_to_scan = max(nr_to_scan, 0L); ++ nr_to_scan = round_up(nr_to_scan >> sc->priority, SWAP_CLUSTER_MAX); ++ ++ if (max_nr_gens(max_seq, min_seq, swappiness) > MIN_NR_GENS) ++ return nr_to_scan; ++ ++ /* kswapd uses age_lru_gens() */ ++ if (current_is_kswapd()) ++ return 0; ++ ++ return walk_mm_list(lruvec, max_seq, sc, swappiness, NULL) ? nr_to_scan : 0; ++} ++ ++static void shrink_lru_gens(struct lruvec *lruvec, struct scan_control *sc) ++{ ++ struct blk_plug plug; ++ unsigned long scanned = 0; ++ struct mem_cgroup *memcg = lruvec_memcg(lruvec); ++ ++ blk_start_plug(&plug); ++ ++ while (true) { ++ long nr_to_scan; ++ int swappiness = sc->may_swap ? get_swappiness(lruvec) : 0; ++ ++ nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness) - scanned; ++ if (nr_to_scan < (long)SWAP_CLUSTER_MAX) ++ break; ++ ++ scanned += nr_to_scan; ++ ++ if (!evict_lru_gen_pages(lruvec, sc, swappiness, &nr_to_scan)) ++ break; ++ ++ scanned -= nr_to_scan; ++ ++ if (mem_cgroup_below_min(memcg) || ++ (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim)) ++ break; ++ ++ cond_resched(); ++ } ++ ++ blk_finish_plug(&plug); ++} ++ ++/****************************************************************************** ++ * the background aging ++ ******************************************************************************/ ++ ++static int lru_gen_spread = MIN_NR_GENS; ++ ++static void try_walk_mm_list(struct lruvec *lruvec, struct scan_control *sc) ++{ ++ int gen, file, zone; ++ long old_and_young[2] = {}; ++ struct mm_walk_args args = {}; ++ int spread = READ_ONCE(lru_gen_spread); ++ int swappiness = get_swappiness(lruvec); ++ struct lrugen *lrugen = &lruvec->evictable; ++ DEFINE_MAX_SEQ(); ++ DEFINE_MIN_SEQ(); ++ ++ lru_add_drain(); ++ ++ for (file = !swappiness; file < ANON_AND_FILE; file++) { ++ unsigned long seq; ++ ++ for (seq = min_seq[file]; seq <= max_seq; seq++) { ++ gen = lru_gen_from_seq(seq); ++ ++ for (zone = 0; zone < MAX_NR_ZONES; zone++) ++ old_and_young[seq == max_seq] += ++ READ_ONCE(lrugen->sizes[gen][file][zone]); ++ } ++ } ++ ++ old_and_young[0] = max(old_and_young[0], 0L); ++ old_and_young[1] = max(old_and_young[1], 0L); ++ ++ if (old_and_young[0] + old_and_young[1] < SWAP_CLUSTER_MAX) ++ return; ++ ++ /* try to spread pages out across spread+1 generations */ ++ if (old_and_young[0] >= old_and_young[1] * spread && ++ min_nr_gens(max_seq, min_seq, swappiness) > max(spread, MIN_NR_GENS)) ++ return; ++ ++ walk_mm_list(lruvec, max_seq, sc, swappiness, &args); ++} ++ ++static void age_lru_gens(struct pglist_data *pgdat, struct scan_control *sc) ++{ ++ struct mem_cgroup *memcg; ++ ++ VM_BUG_ON(!current_is_kswapd()); ++ ++ memcg = mem_cgroup_iter(NULL, NULL, NULL); ++ do { ++ struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); ++ struct lrugen *lrugen = &lruvec->evictable; ++ ++ if (!mem_cgroup_below_min(memcg) && ++ (!mem_cgroup_below_low(memcg) || sc->memcg_low_reclaim)) ++ try_walk_mm_list(lruvec, sc); ++ ++ if (!mem_cgroup_disabled()) ++ atomic_add_unless(&lrugen->priority, 1, DEF_PRIORITY); ++ ++ cond_resched(); ++ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); ++} ++ + /****************************************************************************** + * state change + ******************************************************************************/ +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-14-16-mm-multigenerational-lru-user-interface.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-14-16-mm-multigenerational-lru-user-interface.patch new file mode 100644 index 0000000..f48128f --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-14-16-mm-multigenerational-lru-user-interface.patch @@ -0,0 +1,575 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id 31B6EC43470 + for ; Tue, 13 Apr 2021 06:57:41 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 0EBEA613B6 + for ; Tue, 13 Apr 2021 06:57:41 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S1345183AbhDMG54 (ORCPT + ); + Tue, 13 Apr 2021 02:57:56 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44232 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S1345118AbhDMG5V (ORCPT + ); + Tue, 13 Apr 2021 02:57:21 -0400 +Received: from mail-qt1-x84a.google.com (mail-qt1-x84a.google.com [IPv6:2607:f8b0:4864:20::84a]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1C4E9C061756 + for ; Mon, 12 Apr 2021 23:57:02 -0700 (PDT) +Received: by mail-qt1-x84a.google.com with SMTP id n21so671176qtv.12 + for ; Mon, 12 Apr 2021 23:57:02 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=bmixlp7YQskn8XLZNyskyxhbwQtBt0A28uS5+zjhVpk=; + b=oTGv6qg5bh0RzTaKM94g35MK59AI58jsQR7J4vE6o+6XFd35Jv2Zv+kkD/7cK0zRLR + Ck7Cs2RVKnfve+J1zVD+wa928VjcHUKUO3MuA+Cqt34BQiaAdVe26f2184VnzLQ3dvKx + z82OqBG1tTUndbk4EMVoB1ATBCP4BFNxWu8pKBJpk/N+I2MMj2uihIz/YB8QlxmuXlys + RwrXkZxVCCOUoq3encVAfJmCxv6JvxFy63iWYxkmY36qXToBwfkANHFMZAz4lcdJeH/y + xKzfHqA5vpuNdb9vsTsrozNb0UaKCAiSMM4mlUb5dey98HhAeu/oBRqdnxUz2tE+0+pZ + Z0TA== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=bmixlp7YQskn8XLZNyskyxhbwQtBt0A28uS5+zjhVpk=; + b=UkYkM6FO076Fvajq5s8whylzCbb+PpiJnz1vKUeJJXZu6YCbEYmOvaEH6+8Ddzo48Z + TI3guaaJl9qnC428Yf6FHDGXp6NeOwEblCvtmM2G7+umy+SrfwybHn1bw50Lo872DXbJ + gYls4kvFU7JQc7MioauxTlqJLpTYk3NcULfKC0GiHMuK9jrn/IsdHkAmjv1ZmsU5rVoi + eYiTShjU5iY513/VeoflBCVf0ixDD4Cr5lmm93z+i5Ey1yfqM+TVJShH9XlNUFONylgl + TRTw7Ayvc0f+UlyZ1Xa33Rbw0PvwoKpCYxcb1nsFqUtIjWowX+qxSCaJQ1u1t4X2KqnJ + hJ9w== +X-Gm-Message-State: AOAM530ZLR/zJQAB2NNEhfhm5mkXL3qXLlx6Z2Tl7QIoprpbg2sjKICU + bChNTP+0Q6f93KyJAtViluogruaRpm8= +X-Google-Smtp-Source: ABdhPJxZXtgEcQpB3hP9KaxSvf/XzXOIauyaS8KaFnbmO18XK2qWP28shfFcib9xRh5nN+wBlrRX+XvdCw8= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:ad4:4894:: with SMTP id bv20mr10806518qvb.34.1618297021214; + Mon, 12 Apr 2021 23:57:01 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:31 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-15-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 14/16] mm: multigenerational lru: user interface +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +Add a sysfs file /sys/kernel/mm/lru_gen/enabled so users can enable +and disable the multigenerational lru at runtime. + +Add a sysfs file /sys/kernel/mm/lru_gen/spread so users can spread +pages out across multiple generations. More generations make the +background aging more aggressive. + +Add a debugfs file /sys/kernel/debug/lru_gen so users can monitor the +multigenerational lru and trigger the aging and the eviction. This +file has the following output: + memcg memcg_id memcg_path + node node_id + min_gen birth_time anon_size file_size + ... + max_gen birth_time anon_size file_size + +Given a memcg and a node, "min_gen" is the oldest generation (number) +and "max_gen" is the youngest. Birth time is in milliseconds. The +sizes of anon and file types are in pages. + +This file takes the following input: + + memcg_id node_id gen [swappiness] + - memcg_id node_id gen [swappiness] [nr_to_reclaim] + +The first command line accounts referenced pages to generation +"max_gen" and creates the next generation "max_gen"+1. In this case, +"gen" should be equal to "max_gen". A swap file and a non-zero +"swappiness" are required to scan anon type. If swapping is not +desired, set vm.swappiness to 0. The second command line evicts +generations less than or equal to "gen". In this case, "gen" should be +less than "max_gen"-1 as "max_gen" and "max_gen"-1 are active +generations and therefore protected from the eviction. Use +"nr_to_reclaim" to limit the number of pages to be evicted. Multiple +command lines are supported, so does concatenation with delimiters "," +and ";". + +Signed-off-by: Yu Zhao +--- + mm/vmscan.c | 405 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 405 insertions(+) + +diff --git a/mm/vmscan.c b/mm/vmscan.c +index 01c475386379..284e32d897cf 100644 +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -51,6 +51,8 @@ + #include + #include + #include ++#include ++#include + + #include + #include +@@ -6248,6 +6250,403 @@ static int __meminit __maybe_unused lru_gen_online_mem(struct notifier_block *se + return NOTIFY_DONE; + } + ++/****************************************************************************** ++ * sysfs interface ++ ******************************************************************************/ ++ ++static ssize_t show_lru_gen_spread(struct kobject *kobj, struct kobj_attribute *attr, ++ char *buf) ++{ ++ return sprintf(buf, "%d\n", READ_ONCE(lru_gen_spread)); ++} ++ ++static ssize_t store_lru_gen_spread(struct kobject *kobj, struct kobj_attribute *attr, ++ const char *buf, size_t len) ++{ ++ int spread; ++ ++ if (kstrtoint(buf, 10, &spread) || spread >= MAX_NR_GENS) ++ return -EINVAL; ++ ++ WRITE_ONCE(lru_gen_spread, spread); ++ ++ return len; ++} ++ ++static struct kobj_attribute lru_gen_spread_attr = __ATTR( ++ spread, 0644, show_lru_gen_spread, store_lru_gen_spread ++); ++ ++static ssize_t show_lru_gen_enabled(struct kobject *kobj, struct kobj_attribute *attr, ++ char *buf) ++{ ++ return snprintf(buf, PAGE_SIZE, "%ld\n", lru_gen_enabled()); ++} ++ ++static ssize_t store_lru_gen_enabled(struct kobject *kobj, struct kobj_attribute *attr, ++ const char *buf, size_t len) ++{ ++ int enable; ++ ++ if (kstrtoint(buf, 10, &enable)) ++ return -EINVAL; ++ ++ lru_gen_set_state(enable, true, false); ++ ++ return len; ++} ++ ++static struct kobj_attribute lru_gen_enabled_attr = __ATTR( ++ enabled, 0644, show_lru_gen_enabled, store_lru_gen_enabled ++); ++ ++static struct attribute *lru_gen_attrs[] = { ++ &lru_gen_spread_attr.attr, ++ &lru_gen_enabled_attr.attr, ++ NULL ++}; ++ ++static struct attribute_group lru_gen_attr_group = { ++ .name = "lru_gen", ++ .attrs = lru_gen_attrs, ++}; ++ ++/****************************************************************************** ++ * debugfs interface ++ ******************************************************************************/ ++ ++static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos) ++{ ++ struct mem_cgroup *memcg; ++ loff_t nr_to_skip = *pos; ++ ++ m->private = kzalloc(PATH_MAX, GFP_KERNEL); ++ if (!m->private) ++ return ERR_PTR(-ENOMEM); ++ ++ memcg = mem_cgroup_iter(NULL, NULL, NULL); ++ do { ++ int nid; ++ ++ for_each_node_state(nid, N_MEMORY) { ++ if (!nr_to_skip--) ++ return mem_cgroup_lruvec(memcg, NODE_DATA(nid)); ++ } ++ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); ++ ++ return NULL; ++} ++ ++static void lru_gen_seq_stop(struct seq_file *m, void *v) ++{ ++ if (!IS_ERR_OR_NULL(v)) ++ mem_cgroup_iter_break(NULL, lruvec_memcg(v)); ++ ++ kfree(m->private); ++ m->private = NULL; ++} ++ ++static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos) ++{ ++ int nid = lruvec_pgdat(v)->node_id; ++ struct mem_cgroup *memcg = lruvec_memcg(v); ++ ++ ++*pos; ++ ++ nid = next_memory_node(nid); ++ if (nid == MAX_NUMNODES) { ++ memcg = mem_cgroup_iter(NULL, memcg, NULL); ++ if (!memcg) ++ return NULL; ++ ++ nid = first_memory_node; ++ } ++ ++ return mem_cgroup_lruvec(memcg, NODE_DATA(nid)); ++} ++ ++static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, ++ unsigned long max_seq, unsigned long *min_seq, ++ unsigned long seq) ++{ ++ int i; ++ int file, tier; ++ int sid = sid_from_seq_or_gen(seq); ++ struct lrugen *lrugen = &lruvec->evictable; ++ int nid = lruvec_pgdat(lruvec)->node_id; ++ struct mem_cgroup *memcg = lruvec_memcg(lruvec); ++ struct lru_gen_mm_list *mm_list = get_mm_list(memcg); ++ ++ for (tier = 0; tier < MAX_NR_TIERS; tier++) { ++ seq_printf(m, " %10d", tier); ++ for (file = 0; file < ANON_AND_FILE; file++) { ++ unsigned long n[3] = {}; ++ ++ if (seq == max_seq) { ++ n[0] = READ_ONCE(lrugen->avg_refaulted[file][tier]); ++ n[1] = READ_ONCE(lrugen->avg_total[file][tier]); ++ ++ seq_printf(m, " %10luR %10luT %10lu ", n[0], n[1], n[2]); ++ } else if (seq == min_seq[file] || NR_STAT_GENS > 1) { ++ n[0] = atomic_long_read(&lrugen->refaulted[sid][file][tier]); ++ n[1] = atomic_long_read(&lrugen->evicted[sid][file][tier]); ++ if (tier) ++ n[2] = READ_ONCE(lrugen->activated[sid][file][tier - 1]); ++ ++ seq_printf(m, " %10lur %10lue %10lua", n[0], n[1], n[2]); ++ } else ++ seq_puts(m, " 0 0 0 "); ++ } ++ seq_putc(m, '\n'); ++ } ++ ++ seq_puts(m, " "); ++ for (i = 0; i < NR_MM_STATS; i++) { ++ if (seq == max_seq && NR_STAT_GENS == 1) ++ seq_printf(m, " %10lu%c", READ_ONCE(mm_list->nodes[nid].stats[sid][i]), ++ toupper(MM_STAT_CODES[i])); ++ else if (seq != max_seq && NR_STAT_GENS > 1) ++ seq_printf(m, " %10lu%c", READ_ONCE(mm_list->nodes[nid].stats[sid][i]), ++ MM_STAT_CODES[i]); ++ else ++ seq_puts(m, " 0 "); ++ } ++ seq_putc(m, '\n'); ++} ++ ++static int lru_gen_seq_show(struct seq_file *m, void *v) ++{ ++ unsigned long seq; ++ bool full = !debugfs_real_fops(m->file)->write; ++ struct lruvec *lruvec = v; ++ struct lrugen *lrugen = &lruvec->evictable; ++ int nid = lruvec_pgdat(lruvec)->node_id; ++ struct mem_cgroup *memcg = lruvec_memcg(lruvec); ++ DEFINE_MAX_SEQ(); ++ DEFINE_MIN_SEQ(); ++ ++ if (nid == first_memory_node) { ++#ifdef CONFIG_MEMCG ++ if (memcg) ++ cgroup_path(memcg->css.cgroup, m->private, PATH_MAX); ++#endif ++ seq_printf(m, "memcg %5hu %s\n", ++ mem_cgroup_id(memcg), (char *)m->private); ++ } ++ ++ seq_printf(m, " node %5d %10d\n", nid, atomic_read(&lrugen->priority)); ++ ++ seq = full ? (max_seq < MAX_NR_GENS ? 0 : max_seq - MAX_NR_GENS + 1) : ++ min(min_seq[0], min_seq[1]); ++ ++ for (; seq <= max_seq; seq++) { ++ int gen, file, zone; ++ unsigned int msecs; ++ ++ gen = lru_gen_from_seq(seq); ++ msecs = jiffies_to_msecs(jiffies - READ_ONCE(lrugen->timestamps[gen])); ++ ++ seq_printf(m, " %10lu %10u", seq, msecs); ++ ++ for (file = 0; file < ANON_AND_FILE; file++) { ++ long size = 0; ++ ++ if (seq < min_seq[file]) { ++ seq_puts(m, " -0 "); ++ continue; ++ } ++ ++ for (zone = 0; zone < MAX_NR_ZONES; zone++) ++ size += READ_ONCE(lrugen->sizes[gen][file][zone]); ++ ++ seq_printf(m, " %10lu ", max(size, 0L)); ++ } ++ ++ seq_putc(m, '\n'); ++ ++ if (full) ++ lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq); ++ } ++ ++ return 0; ++} ++ ++static const struct seq_operations lru_gen_seq_ops = { ++ .start = lru_gen_seq_start, ++ .stop = lru_gen_seq_stop, ++ .next = lru_gen_seq_next, ++ .show = lru_gen_seq_show, ++}; ++ ++static int advance_max_seq(struct lruvec *lruvec, unsigned long seq, int swappiness) ++{ ++ struct mm_walk_args args = {}; ++ struct scan_control sc = { ++ .target_mem_cgroup = lruvec_memcg(lruvec), ++ }; ++ DEFINE_MAX_SEQ(); ++ ++ if (seq == max_seq) ++ walk_mm_list(lruvec, max_seq, &sc, swappiness, &args); ++ ++ return seq > max_seq ? -EINVAL : 0; ++} ++ ++static int advance_min_seq(struct lruvec *lruvec, unsigned long seq, int swappiness, ++ unsigned long nr_to_reclaim) ++{ ++ struct blk_plug plug; ++ int err = -EINTR; ++ long nr_to_scan = LONG_MAX; ++ struct scan_control sc = { ++ .nr_to_reclaim = nr_to_reclaim, ++ .target_mem_cgroup = lruvec_memcg(lruvec), ++ .may_writepage = 1, ++ .may_unmap = 1, ++ .may_swap = 1, ++ .reclaim_idx = MAX_NR_ZONES - 1, ++ .gfp_mask = GFP_KERNEL, ++ }; ++ DEFINE_MAX_SEQ(); ++ ++ if (seq >= max_seq - 1) ++ return -EINVAL; ++ ++ blk_start_plug(&plug); ++ ++ while (!signal_pending(current)) { ++ DEFINE_MIN_SEQ(); ++ ++ if (seq < min(min_seq[!swappiness], min_seq[swappiness < 200]) || ++ !evict_lru_gen_pages(lruvec, &sc, swappiness, &nr_to_scan)) { ++ err = 0; ++ break; ++ } ++ ++ cond_resched(); ++ } ++ ++ blk_finish_plug(&plug); ++ ++ return err; ++} ++ ++static int advance_seq(char cmd, int memcg_id, int nid, unsigned long seq, ++ int swappiness, unsigned long nr_to_reclaim) ++{ ++ struct lruvec *lruvec; ++ int err = -EINVAL; ++ struct mem_cgroup *memcg = NULL; ++ ++ if (!mem_cgroup_disabled()) { ++ rcu_read_lock(); ++ memcg = mem_cgroup_from_id(memcg_id); ++#ifdef CONFIG_MEMCG ++ if (memcg && !css_tryget(&memcg->css)) ++ memcg = NULL; ++#endif ++ rcu_read_unlock(); ++ ++ if (!memcg) ++ goto done; ++ } ++ if (memcg_id != mem_cgroup_id(memcg)) ++ goto done; ++ ++ if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY)) ++ goto done; ++ ++ lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); ++ ++ if (swappiness == -1) ++ swappiness = get_swappiness(lruvec); ++ else if (swappiness > 200U) ++ goto done; ++ ++ switch (cmd) { ++ case '+': ++ err = advance_max_seq(lruvec, seq, swappiness); ++ break; ++ case '-': ++ err = advance_min_seq(lruvec, seq, swappiness, nr_to_reclaim); ++ break; ++ } ++done: ++ mem_cgroup_put(memcg); ++ ++ return err; ++} ++ ++static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, ++ size_t len, loff_t *pos) ++{ ++ void *buf; ++ char *cur, *next; ++ int err = 0; ++ ++ buf = kvmalloc(len + 1, GFP_USER); ++ if (!buf) ++ return -ENOMEM; ++ ++ if (copy_from_user(buf, src, len)) { ++ kvfree(buf); ++ return -EFAULT; ++ } ++ ++ next = buf; ++ next[len] = '\0'; ++ ++ while ((cur = strsep(&next, ",;\n"))) { ++ int n; ++ int end; ++ char cmd; ++ int memcg_id; ++ int nid; ++ unsigned long seq; ++ int swappiness = -1; ++ unsigned long nr_to_reclaim = -1; ++ ++ cur = skip_spaces(cur); ++ if (!*cur) ++ continue; ++ ++ n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid, ++ &seq, &end, &swappiness, &end, &nr_to_reclaim, &end); ++ if (n < 4 || cur[end]) { ++ err = -EINVAL; ++ break; ++ } ++ ++ err = advance_seq(cmd, memcg_id, nid, seq, swappiness, nr_to_reclaim); ++ if (err) ++ break; ++ } ++ ++ kvfree(buf); ++ ++ return err ? : len; ++} ++ ++static int lru_gen_seq_open(struct inode *inode, struct file *file) ++{ ++ return seq_open(file, &lru_gen_seq_ops); ++} ++ ++static const struct file_operations lru_gen_rw_fops = { ++ .open = lru_gen_seq_open, ++ .read = seq_read, ++ .write = lru_gen_seq_write, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ ++static const struct file_operations lru_gen_ro_fops = { ++ .open = lru_gen_seq_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release, ++}; ++ + /****************************************************************************** + * initialization + ******************************************************************************/ +@@ -6291,6 +6690,12 @@ static int __init init_lru_gen(void) + if (hotplug_memory_notifier(lru_gen_online_mem, 0)) + pr_err("lru_gen: failed to subscribe hotplug notifications\n"); + ++ if (sysfs_create_group(mm_kobj, &lru_gen_attr_group)) ++ pr_err("lru_gen: failed to create sysfs group\n"); ++ ++ debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops); ++ debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops); ++ + return 0; + }; + /* +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-15-16-mm-multigenerational-lru-Kconfig.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-15-16-mm-multigenerational-lru-Kconfig.patch new file mode 100644 index 0000000..9a1aa7b --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-15-16-mm-multigenerational-lru-Kconfig.patch @@ -0,0 +1,175 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id 922C4C43461 + for ; Tue, 13 Apr 2021 06:57:45 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 7572660FDB + for ; Tue, 13 Apr 2021 06:57:45 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S1345185AbhDMG6D (ORCPT + ); + Tue, 13 Apr 2021 02:58:03 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44240 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S1345121AbhDMG5X (ORCPT + ); + Tue, 13 Apr 2021 02:57:23 -0400 +Received: from mail-yb1-xb4a.google.com (mail-yb1-xb4a.google.com [IPv6:2607:f8b0:4864:20::b4a]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 90316C061574 + for ; Mon, 12 Apr 2021 23:57:03 -0700 (PDT) +Received: by mail-yb1-xb4a.google.com with SMTP id c4so2057580ybp.6 + for ; Mon, 12 Apr 2021 23:57:03 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=OWomzY5O6nIPdL3TO8CK9fbu3idsDsiJdhlQbcNCNmk=; + b=hLHfxFzp5QFiDV0NCweRKZIoXrgJbYlQcW+yuS+vLMPcNKKc255Fg3tjNqfooV/OLd + U6CQ3iwK8H6zMls3pFdMBN0NLbmWj6RWEYNi/DCM+PrHNrSzMnt6S2Lg4zq0wvg3486H + +sx4x6j4kxGh5x9L9qgA+TxXylPtgpu5ds2+dsX0pD8ntrVyPxV7AvsnWB6UiW1V9ZVk + /LsyUFz5OtLMbBTake9P8xyrPjX9eTcGBEel6+oOeQ/dZObXKYPRK8qTg6fk2FWETrnD + Zbg2sgYJYwkCg4UC1pmuVjLWdyS1iObkTDP9YTfrBRXxxrrkE/8ced456rnZvUMSg1he + l4YQ== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=OWomzY5O6nIPdL3TO8CK9fbu3idsDsiJdhlQbcNCNmk=; + b=V7wMyHi072dce6ZnPpEv7/vgyxfGH4iYzC8xiwylgcN9u4SyLFR8AsWrgIpv2mVFrC + H9+fkRd2whFAERf06443LAgIA7SIiztKoG2b9INedj5rird9Kes1pDEafZP04/dNwIll + hJeAUb9N1qmeVv6vZIZsKpWDp0D/wa5gCBze6PfyzFRL82n1sUxPv6wP/l9ClegByA3J + 8il8uC4X+iRjk3XACwZG+JrS7i4d2Q+qkj3ANVNNGNcDhaHbgsucUpMzpVDJleKoVoBL + Luvyo5PCSA38KyflkQS+SzfwNoU60rrlTa6oBMVzyUgoPqp3RNtFIp4yyJUcill3qvqi + 5ymw== +X-Gm-Message-State: AOAM532nNDpt3iSLmHBos2xzSSPUScQwSS+AZ2hM1blhHygr52zHuQkq + triAdzH/rSQIePQ4klFd5q1eM3rRWnU= +X-Google-Smtp-Source: ABdhPJzyLPRGqf29+Ytj/xVq/duL5XVOMgJinIYyL+dmRy0rCrFAsDcush6F7fQT1oukQxSVakciHbYtiFU= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a25:cc90:: with SMTP id l138mr2006126ybf.150.1618297022801; + Mon, 12 Apr 2021 23:57:02 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:32 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-16-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 15/16] mm: multigenerational lru: Kconfig +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +Add configuration options for the multigenerational lru. + +Signed-off-by: Yu Zhao +--- + mm/Kconfig | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 55 insertions(+) + +diff --git a/mm/Kconfig b/mm/Kconfig +index 24c045b24b95..0be1c6c90cc0 100644 +--- a/mm/Kconfig ++++ b/mm/Kconfig +@@ -872,4 +872,59 @@ config MAPPING_DIRTY_HELPERS + config KMAP_LOCAL + bool + ++config LRU_GEN ++ bool "Multigenerational LRU" ++ depends on MMU ++ help ++ A high performance LRU implementation to heavily overcommit workloads ++ that are not IO bound. See Documentation/vm/multigen_lru.rst for ++ details. ++ ++ Warning: do not enable this option unless you plan to use it because ++ it introduces a small per-process and per-memcg and per-node memory ++ overhead. ++ ++config NR_LRU_GENS ++ int "Max number of generations" ++ depends on LRU_GEN ++ range 4 31 ++ default 7 ++ help ++ This will use order_base_2(N+1) spare bits from page flags. ++ ++ Warning: do not use numbers larger than necessary because each ++ generation introduces a small per-node and per-memcg memory overhead. ++ ++config TIERS_PER_GEN ++ int "Number of tiers per generation" ++ depends on LRU_GEN ++ range 2 5 ++ default 4 ++ help ++ This will use N-2 spare bits from page flags. ++ ++ Higher values generally offer better protection to active pages under ++ heavy buffered I/O workloads. ++ ++config LRU_GEN_ENABLED ++ bool "Turn on by default" ++ depends on LRU_GEN ++ help ++ The default value of /sys/kernel/mm/lru_gen/enabled is 0. This option ++ changes it to 1. ++ ++ Warning: the default value is the fast path. See ++ Documentation/static-keys.txt for details. ++ ++config LRU_GEN_STATS ++ bool "Full stats for debugging" ++ depends on LRU_GEN ++ help ++ This option keeps full stats for each generation, which can be read ++ from /sys/kernel/debug/lru_gen_full. ++ ++ Warning: do not enable this option unless you plan to use it because ++ it introduces an additional small per-process and per-memcg and ++ per-node memory overhead. ++ + endmenu +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/files/PATCH-v2-16-16-mm-multigenerational-lru-documentation.patch b/sys-kernel/pinephone-sources/files/PATCH-v2-16-16-mm-multigenerational-lru-documentation.patch new file mode 100644 index 0000000..831b4fa --- /dev/null +++ b/sys-kernel/pinephone-sources/files/PATCH-v2-16-16-mm-multigenerational-lru-documentation.patch @@ -0,0 +1,322 @@ +From mboxrd@z Thu Jan 1 00:00:00 1970 +Return-Path: +X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on + aws-us-west-2-korg-lkml-1.web.codeaurora.org +X-Spam-Level: +X-Spam-Status: No, score=-26.2 required=3.0 tests=BAYES_00,DKIMWL_WL_MED, + DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,HEADER_FROM_DIFFERENT_DOMAINS, + INCLUDES_CR_TRAILER,INCLUDES_PATCH,MAILING_LIST_MULTI,SPF_HELO_NONE, + USER_AGENT_GIT,USER_IN_DEF_DKIM_WL autolearn=unavailable autolearn_force=no + version=3.4.0 +Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) + by smtp.lore.kernel.org (Postfix) with ESMTP id 8D664C433B4 + for ; Tue, 13 Apr 2021 06:57:53 +0000 (UTC) +Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) + by mail.kernel.org (Postfix) with ESMTP id 5CED260FDB + for ; Tue, 13 Apr 2021 06:57:53 +0000 (UTC) +Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand + id S244503AbhDMG6L (ORCPT ); + Tue, 13 Apr 2021 02:58:11 -0400 +Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44250 "EHLO + lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org + with ESMTP id S1345123AbhDMG5Y (ORCPT + ); + Tue, 13 Apr 2021 02:57:24 -0400 +Received: from mail-yb1-xb49.google.com (mail-yb1-xb49.google.com [IPv6:2607:f8b0:4864:20::b49]) + by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 14C06C061756 + for ; Mon, 12 Apr 2021 23:57:05 -0700 (PDT) +Received: by mail-yb1-xb49.google.com with SMTP id p75so9209574ybc.8 + for ; Mon, 12 Apr 2021 23:57:05 -0700 (PDT) +DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=google.com; s=20161025; + h=date:in-reply-to:message-id:mime-version:references:subject:from:to + :cc; + bh=fZsS4S+ppDN6vse6LQilTb+995ZpejDyoXEkWEzhPiI=; + b=JPzEmLg8IXqkikE/b+k7FNKSdKIPd2lLmXlP9sfI87JvOkw09qdZ+KRrlaAD+a9Dhn + 005sbjcbFZ0lFEPYPSKaDUzlN3hBr3DSo7pYAg76+SLl3Ga5vXEbxhKRzSwelQO0SjpX + rhHL0KytAzNOPmRXNi0zkAQkCW4EAqyrBAkMJuC7dTB6jIRG6ER1dzInKps5oaOL1wQs + HLIiBt2/Ahnea89fcjAFJPIS7nNG2lwTqqUVTkoanckNkavhBDYk0VsP07i7LdiYi9zN + +LOuJNV+snejmLdfr2/3+aMXbxqjF2clhWnkNv/9X/ng5LI35tZxiwJOcncdT6c0vONU + rPQA== +X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; + d=1e100.net; s=20161025; + h=x-gm-message-state:date:in-reply-to:message-id:mime-version + :references:subject:from:to:cc; + bh=fZsS4S+ppDN6vse6LQilTb+995ZpejDyoXEkWEzhPiI=; + b=Mmy7jkv8AlhXjPNjblEwvM3ZtDGk7NKvJ6rsLmF6f0BWgbZq1tIB6pdyHgFU312oCj + y4lT+2OfaNXkHdc1m9GGWuWIiWBODWDms6SOZyoSt3DzZKzcdOzZvjUSS2YPZRhtMBP8 + dB9FKMTZmwSiNzB4tdOneaAVzDRY5bshb8bACVfCaWFqtKUYRJ7IUedFh3omjJHSY8FV + 6STGtMN3VWQZjRvtH7TufrAvCfWEWJ4oYHPhHmGG2DIS+7aQ6CbYgjel6Xiw7E9VkAg2 + JoiFRDcRNv+ByQW+uYw+Z96cYJm5wf4hkkC+/iCib2vWT1vXRgZ7CRYsjyRwZmHJd2Jy + fKJA== +X-Gm-Message-State: AOAM532ohDzhQEIUgvNgG4R8COEdtptVwp/WFnYFKQYURGql6xBpawoF + Y2GA+8fymXJP5OJ1UDw0RBDHBeXkM1Q= +X-Google-Smtp-Source: ABdhPJzHOTHYLMuXC88wBZEF39dm7Sun3+0TVIBRLg85pDR3z2FX1I51OcfzuM68n03ioC4rVU3FQw4etPM= +X-Received: from yuzhao.bld.corp.google.com ([2620:15c:183:200:d02d:cccc:9ebe:9fe9]) + (user=yuzhao job=sendgmr) by 2002:a25:e00f:: with SMTP id x15mr25695207ybg.85.1618297024186; + Mon, 12 Apr 2021 23:57:04 -0700 (PDT) +Date: Tue, 13 Apr 2021 00:56:33 -0600 +In-Reply-To: <20210413065633.2782273-1-yuzhao@google.com> +Message-Id: <20210413065633.2782273-17-yuzhao@google.com> +Mime-Version: 1.0 +References: <20210413065633.2782273-1-yuzhao@google.com> +X-Mailer: git-send-email 2.31.1.295.g9ea45b61b8-goog +Subject: [PATCH v2 16/16] mm: multigenerational lru: documentation +From: Yu Zhao +To: linux-mm@kvack.org +Cc: Alex Shi , Andi Kleen , + Andrew Morton , + Benjamin Manes , + Dave Chinner , + Dave Hansen , + Hillf Danton , Jens Axboe , + Johannes Weiner , + Jonathan Corbet , + Joonsoo Kim , + Matthew Wilcox , + Mel Gorman , + Miaohe Lin , + Michael Larabel , + Michal Hocko , + Michel Lespinasse , + Rik van Riel , + Roman Gushchin , + Rong Chen , + SeongJae Park , + Tim Chen , + Vlastimil Babka , + Yang Shi , + Ying Huang , Zi Yan , + linux-kernel@vger.kernel.org, lkp@lists.01.org, + page-reclaim@google.com, Yu Zhao +Content-Type: text/plain; charset="UTF-8" +Precedence: bulk +List-ID: +X-Mailing-List: linux-kernel@vger.kernel.org +Archived-At: +List-Archive: +List-Post: + +Add Documentation/vm/multigen_lru.rst. + +Signed-off-by: Yu Zhao +--- + Documentation/vm/index.rst | 1 + + Documentation/vm/multigen_lru.rst | 192 ++++++++++++++++++++++++++++++ + 2 files changed, 193 insertions(+) + create mode 100644 Documentation/vm/multigen_lru.rst + +diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst +index eff5fbd492d0..c353b3f55924 100644 +--- a/Documentation/vm/index.rst ++++ b/Documentation/vm/index.rst +@@ -17,6 +17,7 @@ various features of the Linux memory management + + swap_numa + zswap ++ multigen_lru + + Kernel developers MM documentation + ================================== +diff --git a/Documentation/vm/multigen_lru.rst b/Documentation/vm/multigen_lru.rst +new file mode 100644 +index 000000000000..cf772aeca317 +--- /dev/null ++++ b/Documentation/vm/multigen_lru.rst +@@ -0,0 +1,192 @@ ++===================== ++Multigenerational LRU ++===================== ++ ++Quick Start ++=========== ++Build Options ++------------- ++:Required: Set ``CONFIG_LRU_GEN=y``. ++ ++:Optional: Change ``CONFIG_NR_LRU_GENS`` to a number ``X`` to support ++ a maximum of ``X`` generations. ++ ++:Optional: Change ``CONFIG_TIERS_PER_GEN`` to a number ``Y`` to support ++ a maximum of ``Y`` tiers per generation. ++ ++:Optional: Set ``CONFIG_LRU_GEN_ENABLED=y`` to turn the feature on by ++ default. ++ ++Runtime Options ++--------------- ++:Required: Write ``1`` to ``/sys/kernel/mm/lru_gen/enable`` if the ++ feature was not turned on by default. ++ ++:Optional: Change ``/sys/kernel/mm/lru_gen/spread`` to a number ``N`` ++ to spread pages out across ``N+1`` generations. ``N`` should be less ++ than ``X``. Larger values make the background aging more aggressive. ++ ++:Optional: Read ``/sys/kernel/debug/lru_gen`` to verify the feature. ++ This file has the following output: ++ ++:: ++ ++ memcg memcg_id memcg_path ++ node node_id ++ min_gen birth_time anon_size file_size ++ ... ++ max_gen birth_time anon_size file_size ++ ++Given a memcg and a node, ``min_gen`` is the oldest generation ++(number) and ``max_gen`` is the youngest. Birth time is in ++milliseconds. The sizes of anon and file types are in pages. ++ ++Recipes ++------- ++:Android on ARMv8.1+: ``X=4``, ``N=0`` ++ ++:Android on pre-ARMv8.1 CPUs: Not recommended due to the lack of ++ ``ARM64_HW_AFDBM`` ++ ++:Laptops running Chrome on x86_64: ``X=7``, ``N=2`` ++ ++:Working set estimation: Write ``+ memcg_id node_id gen [swappiness]`` ++ to ``/sys/kernel/debug/lru_gen`` to account referenced pages to ++ generation ``max_gen`` and create the next generation ``max_gen+1``. ++ ``gen`` should be equal to ``max_gen``. A swap file and a non-zero ++ ``swappiness`` are required to scan anon type. If swapping is not ++ desired, set ``vm.swappiness`` to ``0``. ++ ++:Proactive reclaim: Write ``- memcg_id node_id gen [swappiness] ++ [nr_to_reclaim]`` to ``/sys/kernel/debug/lru_gen`` to evict ++ generations less than or equal to ``gen``. ``gen`` should be less ++ than ``max_gen-1`` as ``max_gen`` and ``max_gen-1`` are active ++ generations and therefore protected from the eviction. Use ++ ``nr_to_reclaim`` to limit the number of pages to be evicted. ++ Multiple command lines are supported, so does concatenation with ++ delimiters ``,`` and ``;``. ++ ++Framework ++========= ++For each ``lruvec``, evictable pages are divided into multiple ++generations. The youngest generation number is stored in ``max_seq`` ++for both anon and file types as they are aged on an equal footing. The ++oldest generation numbers are stored in ``min_seq[2]`` separately for ++anon and file types as clean file pages can be evicted regardless of ++swap and write-back constraints. Generation numbers are truncated into ++``order_base_2(CONFIG_NR_LRU_GENS+1)`` bits in order to fit into ++``page->flags``. The sliding window technique is used to prevent ++truncated generation numbers from overlapping. Each truncated ++generation number is an index to an array of per-type and per-zone ++lists. Evictable pages are added to the per-zone lists indexed by ++``max_seq`` or ``min_seq[2]`` (modulo ``CONFIG_NR_LRU_GENS``), ++depending on whether they are being faulted in. ++ ++Each generation is then divided into multiple tiers. Tiers represent ++levels of usage from file descriptors only. Pages accessed N times via ++file descriptors belong to tier order_base_2(N). In contrast to moving ++across generations which requires the lru lock, moving across tiers ++only involves an atomic operation on ``page->flags`` and therefore has ++a negligible cost. ++ ++The workflow comprises two conceptually independent functions: the ++aging and the eviction. ++ ++Aging ++----- ++The aging produces young generations. Given an ``lruvec``, the aging ++scans page tables for referenced pages of this ``lruvec``. Upon ++finding one, the aging updates its generation number to ``max_seq``. ++After each round of scan, the aging increments ``max_seq``. ++ ++The aging maintains either a system-wide ``mm_struct`` list or ++per-memcg ``mm_struct`` lists, and it only scans page tables of ++processes that have been scheduled since the last scan. Since scans ++are differential with respect to referenced pages, the cost is roughly ++proportional to their number. ++ ++The aging is due when both of ``min_seq[2]`` reaches ``max_seq-1``, ++assuming both anon and file types are reclaimable. ++ ++Eviction ++-------- ++The eviction consumes old generations. Given an ``lruvec``, the ++eviction scans the pages on the per-zone lists indexed by either of ++``min_seq[2]``. It first tries to select a type based on the values of ++``min_seq[2]``. When anon and file types are both available from the ++same generation, it selects the one that has a lower refault rate. ++ ++During a scan, the eviction sorts pages according to their generation ++numbers, if the aging has found them referenced. It also moves pages ++from the tiers that have higher refault rates than tier 0 to the next ++generation. ++ ++When it finds all the per-zone lists of a selected type are empty, the ++eviction increments ``min_seq[2]`` indexed by this selected type. ++ ++Rationale ++========= ++Limitations of Current Implementation ++------------------------------------- ++Notion of Active/Inactive ++~~~~~~~~~~~~~~~~~~~~~~~~~ ++For servers equipped with hundreds of gigabytes of memory, the ++granularity of the active/inactive is too coarse to be useful for job ++scheduling. False active/inactive rates are relatively high, and thus ++the assumed savings may not materialize. ++ ++For phones and laptops, executable pages are frequently evicted ++despite the fact that there are many less recently used anon pages. ++Major faults on executable pages cause ``janks`` (slow UI renderings) ++and negatively impact user experience. ++ ++For ``lruvec``\s from different memcgs or nodes, comparisons are ++impossible due to the lack of a common frame of reference. ++ ++Incremental Scans via ``rmap`` ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++Each incremental scan picks up at where the last scan left off and ++stops after it has found a handful of unreferenced pages. For ++workloads using a large amount of anon memory, incremental scans lose ++the advantage under sustained memory pressure due to high ratios of ++the number of scanned pages to the number of reclaimed pages. On top ++of that, the ``rmap`` has poor memory locality due to its complex data ++structures. The combined effects typically result in a high amount of ++CPU usage in the reclaim path. ++ ++Benefits of Multigenerational LRU ++--------------------------------- ++Notion of Generation Numbers ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++The notion of generation numbers introduces a quantitative approach to ++memory overcommit. A larger number of pages can be spread out across ++configurable generations, and thus they have relatively low false ++active/inactive rates. Each generation includes all pages that have ++been referenced since the last generation. ++ ++Given an ``lruvec``, scans and the selections between anon and file ++types are all based on generation numbers, which are simple and yet ++effective. For different ``lruvec``\s, comparisons are still possible ++based on birth times of generations. ++ ++Differential Scans via Page Tables ++~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++Each differential scan discovers all pages that have been referenced ++since the last scan. Specifically, it walks the ``mm_struct`` list ++associated with an ``lruvec`` to scan page tables of processes that ++have been scheduled since the last scan. The cost of each differential ++scan is roughly proportional to the number of referenced pages it ++discovers. Unless address spaces are extremely sparse, page tables ++usually have better memory locality than the ``rmap``. The end result ++is generally a significant reduction in CPU usage, for workloads ++using a large amount of anon memory. ++ ++To-do List ++========== ++KVM Optimization ++---------------- ++Support shadow page table scanning. ++ ++NUMA Optimization ++----------------- ++Support NUMA policies and per-node RSS counters. +-- +2.31.1.295.g9ea45b61b8-goog + + diff --git a/sys-kernel/pinephone-sources/pinephone-sources-5.11.6.ebuild b/sys-kernel/pinephone-sources/pinephone-sources-5.11.6.ebuild index c8d0fef..c430f30 100755 --- a/sys-kernel/pinephone-sources/pinephone-sources-5.11.6.ebuild +++ b/sys-kernel/pinephone-sources/pinephone-sources-5.11.6.ebuild @@ -48,7 +48,7 @@ pkg_postinst() { einfo "To build the kernel use the following command:" einfo "make Image Image.gz modules" einfo "make DTC_FLAGS="-@" dtbs" - einfo "make install; make modules_intall; make dtbs_install" + einfo "make install; make modules_install; make dtbs_install" einfo "If you use kernel config coming with this ebuild, don't forget to also copy dracut-pp.conf to /etc/dracut.conf.d/" einfo "to make sure proper kernel modules are loaded into initramfs" einfo "if you want to cross compile pinephone kernel on amd64 host, follow the https://wiki.gentoo.org/wiki/Cross_build_environment" diff --git a/sys-kernel/pinephone-sources/pinephone-sources-5.12.0.ebuild b/sys-kernel/pinephone-sources/pinephone-sources-5.12.0.ebuild new file mode 100644 index 0000000..ef8bfef --- /dev/null +++ b/sys-kernel/pinephone-sources/pinephone-sources-5.12.0.ebuild @@ -0,0 +1,92 @@ +# Copyright 1999-2021 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI="6" +UNIPATCH_STRICTORDER="yes" +K_NOUSENAME="yes" +K_NOSETEXTRAVERSION="yes" +K_NOUSEPR="yes" +K_SECURITY_UNSUPPORTED="1" +K_BASE_VER="5.12" +K_EXP_GENPATCHES_NOUSE="1" +K_FROM_GIT="yes" +ETYPE="sources" +CKV="${PVR/-r/-git}" + +# only use this if it's not an _rc/_pre release +[ "${PV/_pre}" == "${PV}" ] && [ "${PV/_rc}" == "${PV}" ] && OKV="${PV}" +inherit kernel-2 +detect_version + + +DEPEND="${RDEPEND} + >=sys-devel/patch-2.7.5" + +DESCRIPTION="Full sources for the Linux kernel, with megi's patch for pinephone" +HOMEPAGE="https://www.kernel.org" + +KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~mips ~ppc ~ppc64 ~s390 ~sparc ~x86" +MEGI_PATCH_URI="https://xff.cz/kernels/${PV:0:4}/patches/all.patch" +SRC_URI="${KERNEL_URI} ${MEGI_PATCH_URI} -> all-${PV}.patch" + +PATCHES=( + ${DISTDIR}/all-${PV}.patch + ${FILESDIR}/enable-hdmi-output-pinetab.patch + ${FILESDIR}/enable-jack-detection-pinetab.patch + ${FILESDIR}/pinetab-bluetooth.patch + ${FILESDIR}/pinetab-accelerometer.patch + ${FILESDIR}/dts-pinephone-drop-modem-power-node.patch + ${FILESDIR}/dts-headphone-jack-detection.patch + ${FILESDIR}/media-ov5640-Implement-autofocus.patch + ${FILESDIR}/0011-dts-pinetab-hardcode-mmc-numbers.patch + ${FILESDIR}/0012-pinephone-fix-pogopin-i2c.patch + ${FILESDIR}/0107-quirk-kernel-org-bug-210681-firmware_rome_error.patch + ${FILESDIR}/0177-leds-gpio-make-max_brightness-configurable.patch + ${FILESDIR}/0178-sun8i-codec-fix-headphone-jack-pin-name.patch + ${FILESDIR}/0179-arm64-dts-allwinner-pinephone-improve-device-tree-5.12.patch + ${FILESDIR}/panic-led-5.12.patch + ${FILESDIR}/PATCH-1-4-HID-magicmouse-add-Apple-Magic-Mouse-2-support.patch + ${FILESDIR}/PATCH-2-4-HID-magicmouse-fix-3-button-emulation-of-Mouse-2.patch + ${FILESDIR}/PATCH-3-4-HID-magicmouse-fix-reconnection-of-Magic-Mouse-2.patch + ${FILESDIR}/PATCH-v2-01-16-include-linux-memcontrol.h-do-not-warn-in-page_memcg_rcu-if-CONFIG_MEMCG.patch + ${FILESDIR}/PATCH-v2-02-16-include-linux-nodemask.h-define-next_memory_node-if-CONFIG_NUMA.patch + ${FILESDIR}/PATCH-v2-03-16-include-linux-huge_mm.h-define-is_huge_zero_pmd-if-CONFIG_TRANSPARENT_HUGEPAGE.patch + ${FILESDIR}/PATCH-v2-04-16-include-linux-cgroup.h-export-cgroup_mutex.patch + ${FILESDIR}/PATCH-v2-05-16-mm-swap.c-export-activate_page.patch + ${FILESDIR}/PATCH-v2-06-16-mm-x86-support-the-access-bit-on-non-leaf-PMD-entries.patch + ${FILESDIR}/PATCH-v2-07-16-mm-vmscan.c-refactor-shrink_node.patch + ${FILESDIR}/PATCH-v2-08-16-mm-multigenerational-lru-groundwork.patch + ${FILESDIR}/PATCH-v2-09-16-mm-multigenerational-lru-activation.patch + ${FILESDIR}/PATCH-v2-10-16-mm-multigenerational-lru-mm_struct-list.patch + ${FILESDIR}/PATCH-v2-11-16-mm-multigenerational-lru-aging.patch + ${FILESDIR}/PATCH-v2-12-16-mm-multigenerational-lru-eviction.patch + ${FILESDIR}/PATCH-v2-13-16-mm-multigenerational-lru-page-reclaim.patch + ${FILESDIR}/PATCH-v2-14-16-mm-multigenerational-lru-user-interface.patch + ${FILESDIR}/PATCH-v2-15-16-mm-multigenerational-lru-Kconfig.patch + ${FILESDIR}/PATCH-v2-16-16-mm-multigenerational-lru-documentation.patch +) + +src_prepare() { + default + eapply_user +} + +pkg_postinst() { + kernel-2_pkg_postinst + einfo "For more info on this patchset, and how to report problems, see:" + einfo "${HOMEPAGE}" + einfo "To build the kernel use the following command:" + einfo "make Image Image.gz modules" + einfo "make DTC_FLAGS="-@" dtbs" + einfo "make install; make modules_install; make dtbs_install" + einfo "If you use kernel config coming with this ebuild, don't forget to also copy dracut-pp.conf to /etc/dracut.conf.d/" + einfo "to make sure proper kernel modules are loaded into initramfs" + einfo "if you want to cross compile pinephone kernel on amd64 host, follow the https://wiki.gentoo.org/wiki/Cross_build_environment" + einfo "to setup cross toolchain environment, then create a xmake wrapper like the following, and replace make with xmake in above commands" + einfo "#!/bin/sh" + einfo "exec make ARCH='arm64' CROSS_COMPILE='aarch64-unknown-linux-gnu-' INSTALL_MOD_PATH='${SYSROOT}' '$@'" +} + +pkg_postrm() { + kernel-2_pkg_postrm +}