diff options
121 files changed, 900 insertions, 693 deletions
@@ -322,6 +322,7 @@ Jayachandran C <c.jayachandran@gmail.com> <jchandra@broadcom.com> Jayachandran C <c.jayachandran@gmail.com> <jchandra@digeo.com> Jayachandran C <c.jayachandran@gmail.com> <jnair@caviumnetworks.com> <jean-philippe@linaro.org> <jean-philippe.brucker@arm.com> +Jean-Michel Hautbois <jeanmichel.hautbois@yoseli.org> <jeanmichel.hautbois@ideasonboard.com> Jean Tourrilhes <jt@hpl.hp.com> Jeevan Shriram <quic_jshriram@quicinc.com> <jshriram@codeaurora.org> Jeff Garzik <jgarzik@pretzel.yyz.us> @@ -438,6 +439,8 @@ Linus Lüssing <linus.luessing@c0d3.blue> <ll@simonwunderlich.de> Li Yang <leoyang.li@nxp.com> <leoli@freescale.com> Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org> Lior David <quic_liord@quicinc.com> <liord@codeaurora.org> +Loic Poulain <loic.poulain@oss.qualcomm.com> <loic.poulain@linaro.org> +Loic Poulain <loic.poulain@oss.qualcomm.com> <loic.poulain@intel.com> Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com> Lorenzo Stoakes <lorenzo.stoakes@oracle.com> <lstoakes@gmail.com> Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net> diff --git a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml index 6ad78429dc74..c92341888a28 100644 --- a/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml +++ b/Documentation/devicetree/bindings/ata/ceva,ahci-1v84.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Ceva AHCI SATA Controller maintainers: - - Mubin Sayyed <mubin.sayyed@amd.com> - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com> description: | diff --git a/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml index 350fb8f400f0..5952e6448ed4 100644 --- a/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml +++ b/Documentation/devicetree/bindings/display/bridge/nwl-dsi.yaml @@ -111,11 +111,27 @@ properties: unevaluatedProperties: false port@1: - $ref: /schemas/graph.yaml#/properties/port + $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false description: DSI output port node to the panel or the next bridge in the chain + properties: + endpoint: + $ref: /schemas/media/video-interfaces.yaml# + unevaluatedProperties: false + + properties: + data-lanes: + description: array of physical DSI data lane indexes. + minItems: 1 + items: + - const: 1 + - const: 2 + - const: 3 + - const: 4 + required: - port@0 - port@1 diff --git a/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml b/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml index bb93baa88879..e13e9d6dd148 100644 --- a/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml +++ b/Documentation/devicetree/bindings/gpio/xlnx,zynqmp-gpio-modepin.yaml @@ -12,7 +12,6 @@ description: PS_MODE). Every pin can be configured as input/output. maintainers: - - Mubin Sayyed <mubin.sayyed@amd.com> - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com> properties: diff --git a/Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml b/Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml index 6076ddf56bb5..c49688be1058 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/fsl,irqsteer.yaml @@ -19,6 +19,7 @@ properties: - fsl,imx8mp-irqsteer - fsl,imx8qm-irqsteer - fsl,imx8qxp-irqsteer + - fsl,imx94-irqsteer - const: fsl,imx-irqsteer reg: diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml index a4dfa09344dd..f85ee5d20ccb 100644 --- a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml +++ b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.yaml @@ -9,15 +9,6 @@ title: Renesas R-Car Timer Pulse Unit PWM Controller maintainers: - Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com> -select: - properties: - compatible: - contains: - const: renesas,tpu - required: - - compatible - - '#pwm-cells' - properties: compatible: items: diff --git a/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml index 1f1b42dde94d..1db85fc9966f 100644 --- a/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml +++ b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Zynq UltraScale+ MPSoC and Versal reset maintainers: - - Mubin Sayyed <mubin.sayyed@amd.com> - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com> description: | diff --git a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml index 31295be91013..234089b5954d 100644 --- a/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml +++ b/Documentation/devicetree/bindings/soc/fsl/fsl,ls1028a-reset.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Freescale Layerscape Reset Registers Module maintainers: - - Frank Li + - Frank Li <Frank.Li@nxp.com> description: Reset Module includes chip reset, service processor control and Reset Control diff --git a/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml b/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml index 891cca009528..6b80b060672e 100644 --- a/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml +++ b/Documentation/devicetree/bindings/timer/nxp,sysctr-timer.yaml @@ -18,9 +18,14 @@ description: | properties: compatible: - enum: - - nxp,imx95-sysctr-timer - - nxp,sysctr-timer + oneOf: + - enum: + - nxp,imx95-sysctr-timer + - nxp,sysctr-timer + - items: + - enum: + - nxp,imx94-sysctr-timer + - const: nxp,imx95-sysctr-timer reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/timer/renesas,tpu.yaml b/Documentation/devicetree/bindings/timer/renesas,tpu.yaml deleted file mode 100644 index 7a473b302775..000000000000 --- a/Documentation/devicetree/bindings/timer/renesas,tpu.yaml +++ /dev/null @@ -1,56 +0,0 @@ -# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -%YAML 1.2 ---- -$id: http://devicetree.org/schemas/timer/renesas,tpu.yaml# -$schema: http://devicetree.org/meta-schemas/core.yaml# - -title: Renesas H8/300 Timer Pulse Unit - -maintainers: - - Yoshinori Sato <ysato@users.sourceforge.jp> - -description: - The TPU is a 16bit timer/counter with configurable clock inputs and - programmable compare match. - This implementation supports only cascade mode. - -select: - properties: - compatible: - contains: - const: renesas,tpu - '#pwm-cells': false - required: - - compatible - -properties: - compatible: - const: renesas,tpu - - reg: - items: - - description: First channel - - description: Second channel - - clocks: - maxItems: 1 - - clock-names: - const: fck - -required: - - compatible - - reg - - clocks - - clock-names - -additionalProperties: false - -examples: - - | - tpu: tpu@ffffe0 { - compatible = "renesas,tpu"; - reg = <0xffffe0 16>, <0xfffff0 12>; - clocks = <&pclk>; - clock-names = "fck"; - }; diff --git a/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml b/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml index b5843f4d17d8..379dacacb526 100644 --- a/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml +++ b/Documentation/devicetree/bindings/usb/dwc3-xilinx.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Xilinx SuperSpeed DWC3 USB SoC controller maintainers: - - Mubin Sayyed <mubin.sayyed@amd.com> - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com> properties: diff --git a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml index e2a72deae776..c68c04da3399 100644 --- a/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml +++ b/Documentation/devicetree/bindings/usb/microchip,usb5744.yaml @@ -17,7 +17,6 @@ description: maintainers: - Michal Simek <michal.simek@amd.com> - - Mubin Sayyed <mubin.sayyed@amd.com> - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com> properties: diff --git a/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml b/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml index a7f75fe36665..f295aa9d9ee7 100644 --- a/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml +++ b/Documentation/devicetree/bindings/usb/xlnx,usb2.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Xilinx udc controller maintainers: - - Mubin Sayyed <mubin.sayyed@amd.com> - Radhey Shyam Pandey <radhey.shyam.pandey@amd.com> properties: diff --git a/Documentation/userspace-api/mseal.rst b/Documentation/userspace-api/mseal.rst index 1dabfc29be0d..7195a7f91107 100644 --- a/Documentation/userspace-api/mseal.rst +++ b/Documentation/userspace-api/mseal.rst @@ -27,7 +27,7 @@ SYSCALL ======= mseal syscall signature ----------------------- - ``int mseal(void \* addr, size_t len, unsigned long flags)`` + ``int mseal(void *addr, size_t len, unsigned long flags)`` **addr**/**len**: virtual memory address range. The address range set by **addr**/**len** must meet: diff --git a/MAINTAINERS b/MAINTAINERS index c59316109e3f..47df41261dc1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10956,6 +10956,7 @@ F: include/linux/platform_data/huawei-gaokun-ec.h HUGETLB SUBSYSTEM M: Muchun Song <muchun.song@linux.dev> +R: Oscar Salvador <osalvador@suse.de> L: linux-mm@kvack.org S: Maintained F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages @@ -12812,6 +12813,7 @@ F: lib/Kconfig.kcsan F: scripts/Makefile.kcsan KDUMP +M: Andrew Morton <akpm@linux-foundation.org> M: Baoquan He <bhe@redhat.com> R: Vivek Goyal <vgoyal@redhat.com> R: Dave Young <dyoung@redhat.com> @@ -13113,6 +13115,8 @@ F: fs/kernfs/ F: include/linux/kernfs.h KEXEC +M: Andrew Morton <akpm@linux-foundation.org> +M: Baoquan He <bhe@redhat.com> L: kexec@lists.infradead.org W: http://kernel.org/pub/linux/utils/kernel/kexec/ F: include/linux/kexec.h diff --git a/crypto/ahash.c b/crypto/ahash.c index 9f57b925b116..2d9eec2b2b1c 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -315,16 +315,7 @@ EXPORT_SYMBOL_GPL(crypto_ahash_setkey); static bool ahash_request_hasvirt(struct ahash_request *req) { - struct ahash_request *r2; - - if (ahash_request_isvirt(req)) - return true; - - list_for_each_entry(r2, &req->base.list, base.list) - if (ahash_request_isvirt(r2)) - return true; - - return false; + return ahash_request_isvirt(req); } static int ahash_reqchain_virt(struct ahash_save_req_state *state, @@ -472,7 +463,6 @@ static int ahash_do_req_chain(struct ahash_request *req, bool update = op == crypto_ahash_alg(tfm)->update; struct ahash_save_req_state *state; struct ahash_save_req_state state0; - struct ahash_request *r2; u8 *page = NULL; int err; @@ -509,7 +499,6 @@ static int ahash_do_req_chain(struct ahash_request *req, state->offset = 0; state->nbytes = 0; INIT_LIST_HEAD(&state->head); - list_splice_init(&req->base.list, &state->head); if (page) sg_init_one(&state->sg, page, PAGE_SIZE); @@ -540,9 +529,6 @@ out_free_page: out_set_chain: req->base.err = err; - list_for_each_entry(r2, &req->base.list, base.list) - r2->base.err = err; - return err; } @@ -551,19 +537,10 @@ int crypto_ahash_init(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = crypto_shash_init(prepare_shash_desc(req, tfm)); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = prepare_shash_desc(r2, tfm); - r2->base.err = crypto_shash_init(desc); - } - return err; } @@ -620,19 +597,10 @@ int crypto_ahash_update(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = shash_ahash_update(req, ahash_request_ctx(req)); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = ahash_request_ctx(r2); - r2->base.err = shash_ahash_update(r2, desc); - } - return err; } @@ -645,19 +613,10 @@ int crypto_ahash_final(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = crypto_shash_final(ahash_request_ctx(req), req->result); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = ahash_request_ctx(r2); - r2->base.err = crypto_shash_final(desc, r2->result); - } - return err; } @@ -670,19 +629,10 @@ int crypto_ahash_finup(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = shash_ahash_finup(req, ahash_request_ctx(req)); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = ahash_request_ctx(r2); - r2->base.err = shash_ahash_finup(r2, desc); - } - return err; } @@ -757,19 +707,10 @@ int crypto_ahash_digest(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) { - struct ahash_request *r2; int err; err = shash_ahash_digest(req, prepare_shash_desc(req, tfm)); req->base.err = err; - - list_for_each_entry(r2, &req->base.list, base.list) { - struct shash_desc *desc; - - desc = prepare_shash_desc(r2, tfm); - r2->base.err = shash_ahash_digest(r2, desc); - } - return err; } @@ -1133,20 +1074,5 @@ int ahash_register_instance(struct crypto_template *tmpl, } EXPORT_SYMBOL_GPL(ahash_register_instance); -void ahash_request_free(struct ahash_request *req) -{ - struct ahash_request *tmp; - struct ahash_request *r2; - - if (unlikely(!req)) - return; - - list_for_each_entry_safe(r2, tmp, &req->base.list, base.list) - kfree_sensitive(r2); - - kfree_sensitive(req); -} -EXPORT_SYMBOL_GPL(ahash_request_free); - MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Asynchronous cryptographic hash type"); diff --git a/crypto/scompress.c b/crypto/scompress.c index d435d4b24469..5762fcc63b51 100644 --- a/crypto/scompress.c +++ b/crypto/scompress.c @@ -111,10 +111,14 @@ static void scomp_free_streams(struct scomp_alg *alg) struct crypto_acomp_stream __percpu *stream = alg->stream; int i; + alg->stream = NULL; + if (!stream) + return; + for_each_possible_cpu(i) { struct crypto_acomp_stream *ps = per_cpu_ptr(stream, i); - if (!ps->ctx) + if (IS_ERR_OR_NULL(ps->ctx)) break; alg->free_ctx(ps->ctx); @@ -132,6 +136,8 @@ static int scomp_alloc_streams(struct scomp_alg *alg) if (!stream) return -ENOMEM; + alg->stream = stream; + for_each_possible_cpu(i) { struct crypto_acomp_stream *ps = per_cpu_ptr(stream, i); @@ -143,8 +149,6 @@ static int scomp_alloc_streams(struct scomp_alg *alg) spin_lock_init(&ps->lock); } - - alg->stream = stream; return 0; } diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index 7701d00bcb3a..b6e7c0b29d4e 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -122,12 +122,12 @@ int caam_qi_enqueue(struct device *qidev, struct caam_drv_req *req) qm_fd_addr_set64(&fd, addr); do { + refcount_inc(&req->drv_ctx->refcnt); ret = qman_enqueue(req->drv_ctx->req_fq, &fd); - if (likely(!ret)) { - refcount_inc(&req->drv_ctx->refcnt); + if (likely(!ret)) return 0; - } + refcount_dec(&req->drv_ctx->refcnt); if (ret != -EBUSY) break; num_retries++; diff --git a/drivers/crypto/tegra/tegra-se-aes.c b/drivers/crypto/tegra/tegra-se-aes.c index ca9d0cca1f74..0e07d0523291 100644 --- a/drivers/crypto/tegra/tegra-se-aes.c +++ b/drivers/crypto/tegra/tegra-se-aes.c @@ -269,7 +269,7 @@ static int tegra_aes_do_one_req(struct crypto_engine *engine, void *areq) unsigned int cmdlen, key1_id, key2_id; int ret; - rctx->iv = (u32 *)req->iv; + rctx->iv = (ctx->alg == SE_ALG_ECB) ? NULL : (u32 *)req->iv; rctx->len = req->cryptlen; key1_id = ctx->key1_id; key2_id = ctx->key2_id; @@ -498,9 +498,6 @@ static int tegra_aes_crypt(struct skcipher_request *req, bool encrypt) if (!req->cryptlen) return 0; - if (ctx->alg == SE_ALG_ECB) - req->iv = NULL; - rctx->encrypt = encrypt; return crypto_transfer_skcipher_request_to_engine(ctx->se->engine, req); diff --git a/drivers/fpga/tests/fpga-bridge-test.c b/drivers/fpga/tests/fpga-bridge-test.c index b9ab29809e96..124ba40e32b1 100644 --- a/drivers/fpga/tests/fpga-bridge-test.c +++ b/drivers/fpga/tests/fpga-bridge-test.c @@ -170,4 +170,5 @@ static struct kunit_suite fpga_bridge_suite = { kunit_test_suite(fpga_bridge_suite); +MODULE_DESCRIPTION("KUnit test for the FPGA Bridge"); MODULE_LICENSE("GPL"); diff --git a/drivers/fpga/tests/fpga-mgr-test.c b/drivers/fpga/tests/fpga-mgr-test.c index 9cb37aefbac4..8748babb0504 100644 --- a/drivers/fpga/tests/fpga-mgr-test.c +++ b/drivers/fpga/tests/fpga-mgr-test.c @@ -330,4 +330,5 @@ static struct kunit_suite fpga_mgr_suite = { kunit_test_suite(fpga_mgr_suite); +MODULE_DESCRIPTION("KUnit test for the FPGA Manager"); MODULE_LICENSE("GPL"); diff --git a/drivers/fpga/tests/fpga-region-test.c b/drivers/fpga/tests/fpga-region-test.c index 6a108cafded8..020ceac48509 100644 --- a/drivers/fpga/tests/fpga-region-test.c +++ b/drivers/fpga/tests/fpga-region-test.c @@ -214,4 +214,5 @@ static struct kunit_suite fpga_region_suite = { kunit_test_suite(fpga_region_suite); +MODULE_DESCRIPTION("KUnit test for the FPGA Region"); MODULE_LICENSE("GPL"); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fedcdb56fb6b..ab31eefa916b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -72,6 +72,8 @@ static const char * const cma_events[] = { static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, enum ib_gid_type gid_type); +static void cma_netevent_work_handler(struct work_struct *_work); + const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { size_t index = event; @@ -1047,6 +1049,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); id_priv->id.route.addr.dev_addr.net = get_net(net); id_priv->seq_num &= 0x00ffffff; + INIT_WORK(&id_priv->id.net_work, cma_netevent_work_handler); rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID); if (parent) @@ -5241,7 +5244,6 @@ static int cma_netevent_callback(struct notifier_block *self, if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr, neigh->ha, ETH_ALEN)) continue; - INIT_WORK(¤t_id->id.net_work, cma_netevent_work_handler); cma_id_get(current_id); queue_work(cma_wq, ¤t_id->id.net_work); } diff --git a/drivers/infiniband/core/ucaps.c b/drivers/infiniband/core/ucaps.c index 6853c6d078f9..de5cb8bf0a61 100644 --- a/drivers/infiniband/core/ucaps.c +++ b/drivers/infiniband/core/ucaps.c @@ -170,7 +170,7 @@ int ib_create_ucap(enum rdma_user_cap type) ucap->dev.class = &ucaps_class; ucap->dev.devt = MKDEV(MAJOR(ucaps_base_dev), type); ucap->dev.release = ucap_dev_release; - ret = dev_set_name(&ucap->dev, ucap_names[type]); + ret = dev_set_name(&ucap->dev, "%s", ucap_names[type]); if (ret) goto err_device; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index e9fa22d31c23..c48ef6083020 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -76,12 +76,14 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp, npfns = (end - start) >> PAGE_SHIFT; umem_odp->pfn_list = kvcalloc( - npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL); + npfns, sizeof(*umem_odp->pfn_list), + GFP_KERNEL | __GFP_NOWARN); if (!umem_odp->pfn_list) return -ENOMEM; umem_odp->dma_list = kvcalloc( - ndmas, sizeof(*umem_odp->dma_list), GFP_KERNEL); + ndmas, sizeof(*umem_odp->dma_list), + GFP_KERNEL | __GFP_NOWARN); if (!umem_odp->dma_list) { ret = -ENOMEM; goto out_pfn_list; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 9082b3fd2b47..063801384b2b 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1774,10 +1774,7 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) ib_srq); struct bnxt_re_dev *rdev = srq->rdev; struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq; - struct bnxt_qplib_nq *nq = NULL; - if (qplib_srq->cq) - nq = qplib_srq->cq->nq; if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) { free_page((unsigned long)srq->uctx_srq_page); hash_del(&srq->hash_entry); @@ -1785,8 +1782,6 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); ib_umem_release(srq->umem); atomic_dec(&rdev->stats.res.srq_count); - if (nq) - nq->budget--; return 0; } @@ -1827,7 +1822,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; - struct bnxt_qplib_nq *nq = NULL; struct bnxt_re_ucontext *uctx; struct bnxt_re_dev *rdev; struct bnxt_re_srq *srq; @@ -1873,7 +1867,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id; srq->qplib_srq.sg_info.pgsize = PAGE_SIZE; srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT; - nq = &rdev->nqr->nq[0]; if (udata) { rc = bnxt_re_init_user_srq(rdev, pd, srq, udata); @@ -1908,8 +1901,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, goto fail; } } - if (nq) - nq->budget++; active_srqs = atomic_inc_return(&rdev->stats.res.srq_count); if (active_srqs > rdev->stats.res.srq_watermark) rdev->stats.res.srq_watermark = active_srqs; @@ -3079,7 +3070,6 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) ib_umem_release(cq->umem); atomic_dec(&rdev->stats.res.cq_count); - nq->budget--; kfree(cq->cql); return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index cf89a8db4f64..8d0b63d4b50a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -763,7 +763,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) if (ret) return ret; } - dma_set_max_seg_size(dev, UINT_MAX); + dma_set_max_seg_size(dev, SZ_2G); ret = ib_register_device(ib_dev, "hns_%d", dev); if (ret) { dev_err(dev, "ib_register_device failed!\n"); diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 251246c73b33..0ff9f18a71e8 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -3461,7 +3461,6 @@ DECLARE_UVERBS_NAMED_OBJECT( &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY)); const struct uapi_definition mlx5_ib_flow_defs[] = { -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_FLOW_MATCHER), UAPI_DEF_CHAIN_OBJ_TREE( @@ -3472,7 +3471,6 @@ const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_STEERING_ANCHOR, UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)), -#endif {}, }; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 4ddcd5860e0f..11eca39b73a9 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -397,7 +397,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (!us_ibdev) { usnic_err("Device %s context alloc failed\n", netdev_name(pci_get_drvdata(dev))); - return ERR_PTR(-EFAULT); + return NULL; } us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); @@ -517,8 +517,8 @@ static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic) } us_ibdev = usnic_ib_device_add(parent_pci); - if (IS_ERR_OR_NULL(us_ibdev)) { - us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT); + if (!us_ibdev) { + us_ibdev = ERR_PTR(-EFAULT); goto out; } @@ -586,10 +586,10 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev, } pf = usnic_ib_discover_pf(vf->vnic); - if (IS_ERR_OR_NULL(pf)) { - usnic_err("Failed to discover pf of vnic %s with err%ld\n", - pci_name(pdev), PTR_ERR(pf)); - err = pf ? PTR_ERR(pf) : -EFAULT; + if (IS_ERR(pf)) { + err = PTR_ERR(pf); + usnic_err("Failed to discover pf of vnic %s with err%d\n", + pci_name(pdev), err); goto out_clean_vnic; } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index feb386d98d1d..0bc3fbb6554f 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -140,6 +140,12 @@ static inline int qp_mtu(struct rxe_qp *qp) return IB_MTU_4096; } +static inline bool is_odp_mr(struct rxe_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_odp; +} + void free_rd_atomic_resource(struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 868d2f0b74e9..432d864c3ce9 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -323,7 +323,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, return err; } - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return rxe_odp_mr_copy(mr, iova, addr, length, dir); else return rxe_mr_copy_xarray(mr, iova, addr, length, dir); @@ -536,7 +536,7 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) u64 *va; /* ODP is not supported right now. WIP. */ - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return RESPST_ERR_UNSUPPORTED_OPCODE; /* See IBA oA19-28 */ diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 54ba9ee1acc5..5d9174e408db 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -650,7 +650,7 @@ static enum resp_states process_flush(struct rxe_qp *qp, struct resp_res *res = qp->resp.res; /* ODP is not supported right now. WIP. */ - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return RESPST_ERR_UNSUPPORTED_OPCODE; /* oA19-14, oA19-15 */ @@ -706,7 +706,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, if (!res->replay) { u64 iova = qp->resp.va + qp->resp.offset; - if (mr->umem->is_odp) + if (is_odp_mr(mr)) err = rxe_odp_atomic_op(mr, iova, pkt->opcode, atmeth_comp(pkt), atmeth_swap_add(pkt), diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index f9be26d25348..d096b58cd0ae 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -362,4 +362,7 @@ static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx) atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode); } +#define MI300_UMC_MCA_COL GENMASK(5, 1) +#define MI300_UMC_MCA_ROW13 BIT(23) + #endif /* __AMD_ATL_INTERNAL_H__ */ diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index dc8aa12f63c8..6e072b7667e9 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -229,7 +229,6 @@ int get_umc_info_mi300(void) * Additionally, the PC and Bank bits may be hashed. This must be accounted for before * reconstructing the normalized address. */ -#define MI300_UMC_MCA_COL GENMASK(5, 1) #define MI300_UMC_MCA_BANK GENMASK(9, 6) #define MI300_UMC_MCA_ROW GENMASK(24, 10) #define MI300_UMC_MCA_PC BIT(25) @@ -320,7 +319,7 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) * See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats. */ #define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL)) -static void retire_row_mi300(struct atl_err *a_err) +static void _retire_row_mi300(struct atl_err *a_err) { unsigned long addr; struct page *p; @@ -351,6 +350,22 @@ static void retire_row_mi300(struct atl_err *a_err) } } +/* + * In addition to the column bits, the row[13] bit should also be included when + * calculating addresses affected by a physical row. + * + * Instead of running through another loop over a single bit, just run through + * the column bits twice and flip the row[13] bit in-between. + * + * See MI300_UMC_MCA_ROW for the row bits in MCA_ADDR_UMC value. + */ +static void retire_row_mi300(struct atl_err *a_err) +{ + _retire_row_mi300(a_err); + a_err->addr ^= MI300_UMC_MCA_ROW13; + _retire_row_mi300(a_err); +} + void amd_retire_dram_row(struct atl_err *a_err) { if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 90de737fbc90..8877c6ff64c4 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -250,6 +250,13 @@ static bool rec_has_valid_entries(struct fru_rec *rec) return true; } +/* + * Row retirement is done on MI300 systems, and some bits are 'don't + * care' for comparing addresses with unique physical rows. This + * includes all column bits and the row[13] bit. + */ +#define MASK_ADDR(addr) ((addr) & ~(MI300_UMC_MCA_ROW13 | MI300_UMC_MCA_COL)) + static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_desc *new) { /* @@ -258,7 +265,7 @@ static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_ * * Also, order the checks from most->least likely to fail to shortcut the code. */ - if (old->addr != new->addr) + if (MASK_ADDR(old->addr) != MASK_ADDR(new->addr)) return false; if (old->hw_id != new->hw_id) diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 691e0ae607a1..8c6130789fde 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -348,9 +348,9 @@ static int afs_dynroot_readdir(struct file *file, struct dir_context *ctx) } if ((unsigned long long)ctx->pos <= AFS_MAX_DYNROOT_CELL_INO) { - rcu_read_lock(); + down_read(&net->cells_lock); ret = afs_dynroot_readdir_cells(net, ctx); - rcu_read_unlock(); + up_read(&net->cells_lock); } return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3dd555db3d32..aa58e0663a5d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3853,7 +3853,6 @@ static int write_dev_supers(struct btrfs_device *device, atomic_inc(&device->sb_write_errors); continue; } - ASSERT(folio_order(folio) == 0); offset = offset_in_folio(folio, bytenr); disk_super = folio_address(folio) + offset; @@ -3926,7 +3925,6 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) /* If the folio has been removed, then we know it completed. */ if (IS_ERR(folio)) continue; - ASSERT(folio_order(folio) == 0); /* Folio will be unlocked once the write completes. */ folio_wait_locked(folio); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a13d81bb56a0..63aeacc54945 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4902,6 +4902,8 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue ret = btrfs_encoded_read(&kiocb, &data->iter, &data->args, &cached_state, &disk_bytenr, &disk_io_size); + if (ret == -EAGAIN) + goto out_acct; if (ret < 0 && ret != -EIOCBQUEUED) goto out_free; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 40709e2a44fc..7121d8c7a318 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1139,8 +1139,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) subvol_name = btrfs_get_subvol_name_from_objectid(info, btrfs_root_id(BTRFS_I(d_inode(dentry))->root)); if (!IS_ERR(subvol_name)) { - seq_puts(seq, ",subvol="); - seq_escape(seq, subvol_name, " \t\n\\"); + seq_show_option(seq, "subvol", subvol_name); kfree(subvol_name); } return 0; diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 42e4d6eeb29f..9c20d78e41f6 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -89,12 +89,12 @@ enum { }; static const struct fs_parameter_spec devpts_param_specs[] = { - fsparam_u32 ("gid", Opt_gid), + fsparam_gid ("gid", Opt_gid), fsparam_s32 ("max", Opt_max), fsparam_u32oct ("mode", Opt_mode), fsparam_flag ("newinstance", Opt_newinstance), fsparam_u32oct ("ptmxmode", Opt_ptmxmode), - fsparam_u32 ("uid", Opt_uid), + fsparam_uid ("uid", Opt_uid), {} }; diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 2c7b24cb67ad..53c2626e90e7 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1669,6 +1669,9 @@ static int virtio_fs_get_tree(struct fs_context *fsc) unsigned int virtqueue_size; int err = -EIO; + if (!fsc->source) + return invalf(fsc, "No source specified"); + /* This gets a reference on virtio_fs object. This ptr gets installed * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() * to drop the reference to this object. diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index 6add6ebfef89..cb823a8a6ba9 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -67,6 +67,12 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off) else key_len = tree->max_key_len + 1; + if (key_len > sizeof(hfs_btree_key) || key_len < 1) { + memset(key, 0, sizeof(hfs_btree_key)); + pr_err("hfs: Invalid key length: %d\n", key_len); + return; + } + hfs_bnode_read(node, key, off, key_len); } diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 87974d5e6791..079ea80534f7 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -67,6 +67,12 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off) else key_len = tree->max_key_len + 2; + if (key_len > sizeof(hfsplus_btree_key) || key_len < 1) { + memset(key, 0, sizeof(hfsplus_btree_key)); + pr_err("hfsplus: Invalid key length: %d\n", key_len); + return; + } + hfs_bnode_read(node, key, off, key_len); } diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 31553372b33a..5b08bd417b28 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -259,7 +259,7 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio, } /* truncate len if we find any trailing uptodate block(s) */ - for ( ; i <= last; i++) { + while (++i <= last) { if (ifs_block_is_uptodate(ifs, i)) { plen -= (last - i + 1) * block_size; last = i - 1; diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 35768a63fb1d..421d247fae52 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -180,7 +180,7 @@ static struct dentry *isofs_fh_to_parent(struct super_block *sb, return NULL; return isofs_export_iget(sb, - fh_len > 2 ? ifid->parent_block : 0, + fh_len > 3 ? ifid->parent_block : 0, ifid->parent_offset, fh_len > 4 ? ifid->parent_generation : 0); } diff --git a/fs/namei.c b/fs/namei.c index 360a86ca1f02..8510ff53f12e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -125,9 +125,9 @@ #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) -static inline void initname(struct filename *name) +static inline void initname(struct filename *name, const char __user *uptr) { - name->uptr = NULL; + name->uptr = uptr; name->aname = NULL; atomic_set(&name->refcnt, 1); } @@ -210,7 +210,7 @@ getname_flags(const char __user *filename, int flags) return ERR_PTR(-ENAMETOOLONG); } } - initname(result); + initname(result, filename); audit_getname(result); return result; } @@ -268,7 +268,7 @@ struct filename *getname_kernel(const char * filename) return ERR_PTR(-ENAMETOOLONG); } memcpy((char *)result->name, filename, len); - initname(result); + initname(result, NULL); audit_getname(result); return result; } diff --git a/fs/namespace.c b/fs/namespace.c index 14935a0500a2..d9ca80dcc544 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1830,6 +1830,8 @@ static inline void namespace_lock(void) down_write(&namespace_sem); } +DEFINE_GUARD(namespace_lock, struct rw_semaphore *, namespace_lock(), namespace_unlock()) + enum umount_tree_flags { UMOUNT_SYNC = 1, UMOUNT_PROPAGATE = 2, @@ -2383,7 +2385,7 @@ void dissolve_on_fput(struct vfsmount *mnt) return; } - scoped_guard(rwsem_write, &namespace_sem) { + scoped_guard(namespace_lock, &namespace_sem) { ns = m->mnt_ns; if (!must_dissolve(ns)) return; @@ -5189,8 +5191,8 @@ static void finish_mount_kattr(struct mount_kattr *kattr) mnt_idmap_put(kattr->mnt_idmap); } -static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize, - struct mount_kattr *kattr) +static int wants_mount_setattr(struct mount_attr __user *uattr, size_t usize, + struct mount_kattr *kattr) { int ret; struct mount_attr attr; @@ -5213,9 +5215,13 @@ static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize, if (attr.attr_set == 0 && attr.attr_clr == 0 && attr.propagation == 0) - return 0; + return 0; /* Tell caller to not bother. */ + + ret = build_mount_kattr(&attr, usize, kattr); + if (ret < 0) + return ret; - return build_mount_kattr(&attr, usize, kattr); + return 1; } SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, @@ -5247,8 +5253,8 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, if (flags & AT_RECURSIVE) kattr.kflags |= MOUNT_KATTR_RECURSE; - err = copy_mount_setattr(uattr, usize, &kattr); - if (err) + err = wants_mount_setattr(uattr, usize, &kattr); + if (err <= 0) return err; err = user_path_at(dfd, path, kattr.lookup_flags, &target); @@ -5282,15 +5288,17 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, if (flags & AT_RECURSIVE) kattr.kflags |= MOUNT_KATTR_RECURSE; - ret = copy_mount_setattr(uattr, usize, &kattr); - if (ret) + ret = wants_mount_setattr(uattr, usize, &kattr); + if (ret < 0) return ret; - ret = do_mount_setattr(&file->f_path, &kattr); - if (ret) - return ret; + if (ret) { + ret = do_mount_setattr(&file->f_path, &kattr); + if (ret) + return ret; - finish_mount_kattr(&kattr); + finish_mount_kattr(&kattr); + } } fd = get_unused_fd_flags(flags & O_CLOEXEC); diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 4e3e62040831..70ecc8f5f210 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -127,11 +127,13 @@ static int __init netfs_init(void) if (mempool_init_slab_pool(&netfs_subrequest_pool, 100, netfs_subrequest_slab) < 0) goto error_subreqpool; +#ifdef CONFIG_PROC_FS if (!proc_mkdir("fs/netfs", NULL)) goto error_proc; if (!proc_create_seq("fs/netfs/requests", S_IFREG | 0444, NULL, &netfs_requests_seq_ops)) goto error_procfile; +#endif #ifdef CONFIG_FSCACHE_STATS if (!proc_create_single("fs/netfs/stats", S_IFREG | 0444, NULL, netfs_stats_show)) @@ -144,9 +146,11 @@ static int __init netfs_init(void) return 0; error_fscache: +#ifdef CONFIG_PROC_FS error_procfile: remove_proc_subtree("fs/netfs", NULL); error_proc: +#endif mempool_exit(&netfs_subrequest_pool); error_subreqpool: kmem_cache_destroy(netfs_subrequest_slab); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 6f2f8f4cfbbc..aef942a758ce 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -541,8 +541,6 @@ int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d, bool ovl_is_metacopy_dentry(struct dentry *dentry); char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding); int ovl_ensure_verity_loaded(struct path *path); -int ovl_get_verity_xattr(struct ovl_fs *ofs, const struct path *path, - u8 *digest_buf, int *buf_length); int ovl_validate_verity(struct ovl_fs *ofs, struct path *metapath, struct path *datapath); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b63474d1b064..e19940d649ca 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1138,6 +1138,11 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, return ERR_PTR(-EINVAL); } + if (ctx->nr == ctx->nr_data) { + pr_err("at least one non-data lowerdir is required\n"); + return ERR_PTR(-EINVAL); + } + err = -EINVAL; for (i = 0; i < ctx->nr; i++) { l = &ctx->lower[i]; diff --git a/include/crypto/hash.h b/include/crypto/hash.h index 2aa83ee0ec98..a67988316d06 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -10,6 +10,7 @@ #include <linux/atomic.h> #include <linux/crypto.h> +#include <linux/slab.h> #include <linux/string.h> /* Set this bit for virtual address instead of SG list. */ @@ -581,7 +582,10 @@ static inline struct ahash_request *ahash_request_alloc_noprof( * ahash_request_free() - zeroize and free the request data structure * @req: request data structure cipher handle to be freed */ -void ahash_request_free(struct ahash_request *req); +static inline void ahash_request_free(struct ahash_request *req) +{ + kfree_sensitive(req); +} static inline struct ahash_request *ahash_request_cast( struct crypto_async_request *req) diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 485e22cf517e..052ac7924af3 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -249,7 +249,7 @@ static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm) static inline bool ahash_request_chained(struct ahash_request *req) { - return crypto_request_chained(&req->base); + return false; } static inline bool ahash_request_isvirt(struct ahash_request *req) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8d1395f945bf..e9f07e37dd6f 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -173,65 +173,59 @@ struct dentry_operations { */ /* d_flags entries */ -#define DCACHE_OP_HASH BIT(0) -#define DCACHE_OP_COMPARE BIT(1) -#define DCACHE_OP_REVALIDATE BIT(2) -#define DCACHE_OP_DELETE BIT(3) -#define DCACHE_OP_PRUNE BIT(4) - -#define DCACHE_DISCONNECTED BIT(5) - /* This dentry is possibly not currently connected to the dcache tree, in - * which case its parent will either be itself, or will have this flag as - * well. nfsd will not use a dentry with this bit set, but will first - * endeavour to clear the bit either by discovering that it is connected, - * or by performing lookup operations. Any filesystem which supports - * nfsd_operations MUST have a lookup function which, if it finds a - * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that - * dentry into place and return that dentry rather than the passed one, - * typically using d_splice_alias. */ - -#define DCACHE_REFERENCED BIT(6) /* Recently used, don't discard. */ - -#define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ - -#define DCACHE_CANT_MOUNT BIT(8) -#define DCACHE_GENOCIDE BIT(9) -#define DCACHE_SHRINK_LIST BIT(10) - -#define DCACHE_OP_WEAK_REVALIDATE BIT(11) - -#define DCACHE_NFSFS_RENAMED BIT(12) - /* this dentry has been "silly renamed" and has to be deleted on the last - * dput() */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(13) - /* Parent inode is watched by some fsnotify listener */ - -#define DCACHE_DENTRY_KILLED BIT(14) - -#define DCACHE_MOUNTED BIT(15) /* is a mountpoint */ -#define DCACHE_NEED_AUTOMOUNT BIT(16) /* handle automount on this dir */ -#define DCACHE_MANAGE_TRANSIT BIT(17) /* manage transit from this dirent */ +enum dentry_flags { + DCACHE_OP_HASH = BIT(0), + DCACHE_OP_COMPARE = BIT(1), + DCACHE_OP_REVALIDATE = BIT(2), + DCACHE_OP_DELETE = BIT(3), + DCACHE_OP_PRUNE = BIT(4), + /* + * This dentry is possibly not currently connected to the dcache tree, + * in which case its parent will either be itself, or will have this + * flag as well. nfsd will not use a dentry with this bit set, but will + * first endeavour to clear the bit either by discovering that it is + * connected, or by performing lookup operations. Any filesystem which + * supports nfsd_operations MUST have a lookup function which, if it + * finds a directory inode with a DCACHE_DISCONNECTED dentry, will + * d_move that dentry into place and return that dentry rather than the + * passed one, typically using d_splice_alias. + */ + DCACHE_DISCONNECTED = BIT(5), + DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */ + DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */ + DCACHE_CANT_MOUNT = BIT(8), + DCACHE_GENOCIDE = BIT(9), + DCACHE_SHRINK_LIST = BIT(10), + DCACHE_OP_WEAK_REVALIDATE = BIT(11), + /* + * this dentry has been "silly renamed" and has to be deleted on the + * last dput() + */ + DCACHE_NFSFS_RENAMED = BIT(12), + DCACHE_FSNOTIFY_PARENT_WATCHED = BIT(13), /* Parent inode is watched by some fsnotify listener */ + DCACHE_DENTRY_KILLED = BIT(14), + DCACHE_MOUNTED = BIT(15), /* is a mountpoint */ + DCACHE_NEED_AUTOMOUNT = BIT(16), /* handle automount on this dir */ + DCACHE_MANAGE_TRANSIT = BIT(17), /* manage transit from this dirent */ + DCACHE_LRU_LIST = BIT(18), + DCACHE_ENTRY_TYPE = (7 << 19), /* bits 19..21 are for storing type: */ + DCACHE_MISS_TYPE = (0 << 19), /* Negative dentry */ + DCACHE_WHITEOUT_TYPE = (1 << 19), /* Whiteout dentry (stop pathwalk) */ + DCACHE_DIRECTORY_TYPE = (2 << 19), /* Normal directory */ + DCACHE_AUTODIR_TYPE = (3 << 19), /* Lookupless directory (presumed automount) */ + DCACHE_REGULAR_TYPE = (4 << 19), /* Regular file type */ + DCACHE_SPECIAL_TYPE = (5 << 19), /* Other file type */ + DCACHE_SYMLINK_TYPE = (6 << 19), /* Symlink */ + DCACHE_NOKEY_NAME = BIT(22), /* Encrypted name encoded without key */ + DCACHE_OP_REAL = BIT(23), + DCACHE_PAR_LOOKUP = BIT(24), /* being looked up (with parent locked shared) */ + DCACHE_DENTRY_CURSOR = BIT(25), + DCACHE_NORCU = BIT(26), /* No RCU delay for freeing */ +}; + #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) -#define DCACHE_LRU_LIST BIT(18) - -#define DCACHE_ENTRY_TYPE (7 << 19) /* bits 19..21 are for storing type: */ -#define DCACHE_MISS_TYPE (0 << 19) /* Negative dentry */ -#define DCACHE_WHITEOUT_TYPE (1 << 19) /* Whiteout dentry (stop pathwalk) */ -#define DCACHE_DIRECTORY_TYPE (2 << 19) /* Normal directory */ -#define DCACHE_AUTODIR_TYPE (3 << 19) /* Lookupless directory (presumed automount) */ -#define DCACHE_REGULAR_TYPE (4 << 19) /* Regular file type */ -#define DCACHE_SPECIAL_TYPE (5 << 19) /* Other file type */ -#define DCACHE_SYMLINK_TYPE (6 << 19) /* Symlink */ - -#define DCACHE_NOKEY_NAME BIT(22) /* Encrypted name encoded without key */ -#define DCACHE_OP_REAL BIT(23) - -#define DCACHE_PAR_LOOKUP BIT(24) /* being looked up (with parent locked shared) */ -#define DCACHE_DENTRY_CURSOR BIT(25) -#define DCACHE_NORCU BIT(26) /* No RCU delay for freeing */ - extern seqlock_t rename_lock; /* diff --git a/include/linux/local_lock.h b/include/linux/local_lock.h index 1a0bc35839e3..16a2ee4f8310 100644 --- a/include/linux/local_lock.h +++ b/include/linux/local_lock.h @@ -52,44 +52,23 @@ __local_unlock_irqrestore(lock, flags) /** - * localtry_lock_init - Runtime initialize a lock instance - */ -#define localtry_lock_init(lock) __localtry_lock_init(lock) - -/** - * localtry_lock - Acquire a per CPU local lock - * @lock: The lock variable - */ -#define localtry_lock(lock) __localtry_lock(lock) - -/** - * localtry_lock_irq - Acquire a per CPU local lock and disable interrupts - * @lock: The lock variable - */ -#define localtry_lock_irq(lock) __localtry_lock_irq(lock) - -/** - * localtry_lock_irqsave - Acquire a per CPU local lock, save and disable - * interrupts - * @lock: The lock variable - * @flags: Storage for interrupt flags + * local_lock_init - Runtime initialize a lock instance */ -#define localtry_lock_irqsave(lock, flags) \ - __localtry_lock_irqsave(lock, flags) +#define local_trylock_init(lock) __local_trylock_init(lock) /** - * localtry_trylock - Try to acquire a per CPU local lock. + * local_trylock - Try to acquire a per CPU local lock * @lock: The lock variable * * The function can be used in any context such as NMI or HARDIRQ. Due to * locking constrains it will _always_ fail to acquire the lock in NMI or * HARDIRQ context on PREEMPT_RT. */ -#define localtry_trylock(lock) __localtry_trylock(lock) +#define local_trylock(lock) __local_trylock(lock) /** - * localtry_trylock_irqsave - Try to acquire a per CPU local lock, save and disable - * interrupts if acquired + * local_trylock_irqsave - Try to acquire a per CPU local lock, save and disable + * interrupts if acquired * @lock: The lock variable * @flags: Storage for interrupt flags * @@ -97,29 +76,8 @@ * locking constrains it will _always_ fail to acquire the lock in NMI or * HARDIRQ context on PREEMPT_RT. */ -#define localtry_trylock_irqsave(lock, flags) \ - __localtry_trylock_irqsave(lock, flags) - -/** - * local_unlock - Release a per CPU local lock - * @lock: The lock variable - */ -#define localtry_unlock(lock) __localtry_unlock(lock) - -/** - * local_unlock_irq - Release a per CPU local lock and enable interrupts - * @lock: The lock variable - */ -#define localtry_unlock_irq(lock) __localtry_unlock_irq(lock) - -/** - * localtry_unlock_irqrestore - Release a per CPU local lock and restore - * interrupt flags - * @lock: The lock variable - * @flags: Interrupt flags to restore - */ -#define localtry_unlock_irqrestore(lock, flags) \ - __localtry_unlock_irqrestore(lock, flags) +#define local_trylock_irqsave(lock, flags) \ + __local_trylock_irqsave(lock, flags) DEFINE_GUARD(local_lock, local_lock_t __percpu*, local_lock(_T), diff --git a/include/linux/local_lock_internal.h b/include/linux/local_lock_internal.h index 67bd13d142fa..bf2bf40d7b18 100644 --- a/include/linux/local_lock_internal.h +++ b/include/linux/local_lock_internal.h @@ -15,10 +15,11 @@ typedef struct { #endif } local_lock_t; +/* local_trylock() and local_trylock_irqsave() only work with local_trylock_t */ typedef struct { local_lock_t llock; - unsigned int acquired; -} localtry_lock_t; + u8 acquired; +} local_trylock_t; #ifdef CONFIG_DEBUG_LOCK_ALLOC # define LOCAL_LOCK_DEBUG_INIT(lockname) \ @@ -29,6 +30,9 @@ typedef struct { }, \ .owner = NULL, +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) \ + .llock = { LOCAL_LOCK_DEBUG_INIT((lockname).llock) }, + static inline void local_lock_acquire(local_lock_t *l) { lock_map_acquire(&l->dep_map); @@ -56,6 +60,7 @@ static inline void local_lock_debug_init(local_lock_t *l) } #else /* CONFIG_DEBUG_LOCK_ALLOC */ # define LOCAL_LOCK_DEBUG_INIT(lockname) +# define LOCAL_TRYLOCK_DEBUG_INIT(lockname) static inline void local_lock_acquire(local_lock_t *l) { } static inline void local_trylock_acquire(local_lock_t *l) { } static inline void local_lock_release(local_lock_t *l) { } @@ -63,7 +68,7 @@ static inline void local_lock_debug_init(local_lock_t *l) { } #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #define INIT_LOCAL_LOCK(lockname) { LOCAL_LOCK_DEBUG_INIT(lockname) } -#define INIT_LOCALTRY_LOCK(lockname) { .llock = { LOCAL_LOCK_DEBUG_INIT(lockname.llock) }} +#define INIT_LOCAL_TRYLOCK(lockname) { LOCAL_TRYLOCK_DEBUG_INIT(lockname) } #define __local_lock_init(lock) \ do { \ @@ -76,6 +81,8 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_trylock_init(lock) __local_lock_init(lock.llock) + #define __spinlock_nested_bh_init(lock) \ do { \ static struct lock_class_key __key; \ @@ -87,149 +94,117 @@ do { \ local_lock_debug_init(lock); \ } while (0) +#define __local_lock_acquire(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + _Generic((lock), \ + local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 0); \ + WRITE_ONCE(tl->acquired, 1); \ + }), \ + default:(void)0); \ + local_lock_acquire(l); \ + } while (0) + #define __local_lock(lock) \ do { \ preempt_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irq(lock) \ do { \ local_irq_disable(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ + __local_lock_acquire(lock); \ } while (0) #define __local_lock_irqsave(lock, flags) \ do { \ local_irq_save(flags); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -#define __local_unlock(lock) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - preempt_enable(); \ + __local_lock_acquire(lock); \ } while (0) -#define __local_unlock_irq(lock) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_enable(); \ - } while (0) - -#define __local_unlock_irqrestore(lock, flags) \ - do { \ - local_lock_release(this_cpu_ptr(lock)); \ - local_irq_restore(flags); \ - } while (0) - -#define __local_lock_nested_bh(lock) \ - do { \ - lockdep_assert_in_softirq(); \ - local_lock_acquire(this_cpu_ptr(lock)); \ - } while (0) - -#define __local_unlock_nested_bh(lock) \ - local_lock_release(this_cpu_ptr(lock)) - -/* localtry_lock_t variants */ - -#define __localtry_lock_init(lock) \ -do { \ - __local_lock_init(&(lock)->llock); \ - WRITE_ONCE((lock)->acquired, 0); \ -} while (0) - -#define __localtry_lock(lock) \ - do { \ - localtry_lock_t *lt; \ - preempt_disable(); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_lock_irq(lock) \ - do { \ - localtry_lock_t *lt; \ - local_irq_disable(); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_lock_irqsave(lock, flags) \ - do { \ - localtry_lock_t *lt; \ - local_irq_save(flags); \ - lt = this_cpu_ptr(lock); \ - local_lock_acquire(<->llock); \ - WRITE_ONCE(lt->acquired, 1); \ - } while (0) - -#define __localtry_trylock(lock) \ +#define __local_trylock(lock) \ ({ \ - localtry_lock_t *lt; \ - bool _ret; \ + local_trylock_t *tl; \ \ preempt_disable(); \ - lt = this_cpu_ptr(lock); \ - if (!READ_ONCE(lt->acquired)) { \ - WRITE_ONCE(lt->acquired, 1); \ - local_trylock_acquire(<->llock); \ - _ret = true; \ - } else { \ - _ret = false; \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ preempt_enable(); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ } \ - _ret; \ + !!tl; \ }) -#define __localtry_trylock_irqsave(lock, flags) \ +#define __local_trylock_irqsave(lock, flags) \ ({ \ - localtry_lock_t *lt; \ - bool _ret; \ + local_trylock_t *tl; \ \ local_irq_save(flags); \ - lt = this_cpu_ptr(lock); \ - if (!READ_ONCE(lt->acquired)) { \ - WRITE_ONCE(lt->acquired, 1); \ - local_trylock_acquire(<->llock); \ - _ret = true; \ - } else { \ - _ret = false; \ + tl = this_cpu_ptr(lock); \ + if (READ_ONCE(tl->acquired)) { \ local_irq_restore(flags); \ + tl = NULL; \ + } else { \ + WRITE_ONCE(tl->acquired, 1); \ + local_trylock_acquire( \ + (local_lock_t *)tl); \ } \ - _ret; \ + !!tl; \ }) -#define __localtry_unlock(lock) \ +#define __local_lock_release(lock) \ + do { \ + local_trylock_t *tl; \ + local_lock_t *l; \ + \ + l = (local_lock_t *)this_cpu_ptr(lock); \ + tl = (local_trylock_t *)l; \ + local_lock_release(l); \ + _Generic((lock), \ + local_trylock_t *: ({ \ + lockdep_assert(tl->acquired == 1); \ + WRITE_ONCE(tl->acquired, 0); \ + }), \ + default:(void)0); \ + } while (0) + +#define __local_unlock(lock) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ preempt_enable(); \ } while (0) -#define __localtry_unlock_irq(lock) \ +#define __local_unlock_irq(lock) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ local_irq_enable(); \ } while (0) -#define __localtry_unlock_irqrestore(lock, flags) \ +#define __local_unlock_irqrestore(lock, flags) \ do { \ - localtry_lock_t *lt; \ - lt = this_cpu_ptr(lock); \ - WRITE_ONCE(lt->acquired, 0); \ - local_lock_release(<->llock); \ + __local_lock_release(lock); \ local_irq_restore(flags); \ } while (0) +#define __local_lock_nested_bh(lock) \ + do { \ + lockdep_assert_in_softirq(); \ + local_lock_acquire(this_cpu_ptr(lock)); \ + } while (0) + +#define __local_unlock_nested_bh(lock) \ + local_lock_release(this_cpu_ptr(lock)) + #else /* !CONFIG_PREEMPT_RT */ /* @@ -237,16 +212,18 @@ do { \ * critical section while staying preemptible. */ typedef spinlock_t local_lock_t; -typedef spinlock_t localtry_lock_t; +typedef spinlock_t local_trylock_t; #define INIT_LOCAL_LOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) -#define INIT_LOCALTRY_LOCK(lockname) INIT_LOCAL_LOCK(lockname) +#define INIT_LOCAL_TRYLOCK(lockname) __LOCAL_SPIN_LOCK_UNLOCKED((lockname)) #define __local_lock_init(l) \ do { \ local_spin_lock_init((l)); \ } while (0) +#define __local_trylock_init(l) __local_lock_init(l) + #define __local_lock(__lock) \ do { \ migrate_disable(); \ @@ -283,17 +260,7 @@ do { \ spin_unlock(this_cpu_ptr((lock))); \ } while (0) -/* localtry_lock_t variants */ - -#define __localtry_lock_init(lock) __local_lock_init(lock) -#define __localtry_lock(lock) __local_lock(lock) -#define __localtry_lock_irq(lock) __local_lock(lock) -#define __localtry_lock_irqsave(lock, flags) __local_lock_irqsave(lock, flags) -#define __localtry_unlock(lock) __local_unlock(lock) -#define __localtry_unlock_irq(lock) __local_unlock(lock) -#define __localtry_unlock_irqrestore(lock, flags) __local_unlock_irqrestore(lock, flags) - -#define __localtry_trylock(lock) \ +#define __local_trylock(lock) \ ({ \ int __locked; \ \ @@ -308,11 +275,11 @@ do { \ __locked; \ }) -#define __localtry_trylock_irqsave(lock, flags) \ +#define __local_trylock_irqsave(lock, flags) \ ({ \ typecheck(unsigned long, flags); \ flags = 0; \ - __localtry_trylock(lock); \ + __local_trylock(lock); \ }) #endif /* CONFIG_PREEMPT_RT */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e2b705c14945..b50447ef1c92 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1511,8 +1511,9 @@ static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, /* * track_pfn_copy is called when a VM_PFNMAP VMA is about to get the page - * tables copied during copy_page_range(). On success, stores the pfn to be - * passed to untrack_pfn_copy(). + * tables copied during copy_page_range(). Will store the pfn to be + * passed to untrack_pfn_copy() only if there is something to be untracked. + * Callers should initialize the pfn to 0. */ static inline int track_pfn_copy(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, unsigned long *pfn) @@ -1522,7 +1523,9 @@ static inline int track_pfn_copy(struct vm_area_struct *dst_vma, /* * untrack_pfn_copy is called when a VM_PFNMAP VMA failed to copy during - * copy_page_range(), but after track_pfn_copy() was already called. + * copy_page_range(), but after track_pfn_copy() was already called. Can + * be called even if track_pfn_copy() did not actually track anything: + * handled internally. */ static inline void untrack_pfn_copy(struct vm_area_struct *dst_vma, unsigned long pfn) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d42eae69d9a8..901353796fbb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4790,7 +4790,14 @@ void roce_del_all_netdev_gids(struct ib_device *ib_dev, struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); +#else +static inline int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) +{ + return 0; +} +#endif struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 1d893e313614..25ecc1334b67 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -422,11 +422,20 @@ static int vm_module_tags_populate(void) unsigned long old_shadow_end = ALIGN(phys_end, MODULE_ALIGN); unsigned long new_shadow_end = ALIGN(new_end, MODULE_ALIGN); unsigned long more_pages; - unsigned long nr; + unsigned long nr = 0; more_pages = ALIGN(new_end - phys_end, PAGE_SIZE) >> PAGE_SHIFT; - nr = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN, - NUMA_NO_NODE, more_pages, next_page); + while (nr < more_pages) { + unsigned long allocated; + + allocated = alloc_pages_bulk_node(GFP_KERNEL | __GFP_NOWARN, + NUMA_NO_NODE, more_pages - nr, next_page + nr); + + if (!allocated) + break; + nr += allocated; + } + if (nr < more_pages || vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL, next_page, PAGE_SHIFT) < 0) { diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index 13da529e2e72..5738ae286b41 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -518,4 +518,5 @@ error: } EXPORT_SYMBOL_GPL(asn1_ber_decoder); +MODULE_DESCRIPTION("Decoder for ASN.1 BER/DER/CER encoded bytestream"); MODULE_LICENSE("GPL"); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8c7fdb7d8c8f..bc9391e55d57 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1191,7 +1191,7 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, return -ENOMEM; p = *pages; for (int k = 0; k < n; k++) { - struct folio *folio = page_folio(page); + struct folio *folio = page_folio(page + k); p[k] = page + k; if (!folio_test_slab(folio)) folio_get(folio); diff --git a/lib/tests/slub_kunit.c b/lib/tests/slub_kunit.c index d47c472b0520..848b682a2d70 100644 --- a/lib/tests/slub_kunit.c +++ b/lib/tests/slub_kunit.c @@ -325,4 +325,5 @@ static struct kunit_suite test_suite = { }; kunit_test_suite(test_suite); +MODULE_DESCRIPTION("Kunit tests for slub allocator"); MODULE_LICENSE("GPL"); diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index 9308bcfb2ad5..dfb4f2358cab 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c @@ -165,4 +165,5 @@ ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength) } EXPORT_SYMBOL(ucs2_as_utf8); +MODULE_DESCRIPTION("UCS2 string handling"); MODULE_LICENSE("GPL v2"); diff --git a/lib/zlib_inflate/inflate_syms.c b/lib/zlib_inflate/inflate_syms.c index 9720114c0672..b8996d90e8bc 100644 --- a/lib/zlib_inflate/inflate_syms.c +++ b/lib/zlib_inflate/inflate_syms.c @@ -18,4 +18,5 @@ EXPORT_SYMBOL(zlib_inflateEnd); EXPORT_SYMBOL(zlib_inflateReset); EXPORT_SYMBOL(zlib_inflateIncomp); EXPORT_SYMBOL(zlib_inflate_blob); +MODULE_DESCRIPTION("Data decompression using the deflation algorithm"); MODULE_LICENSE("GPL"); @@ -35,7 +35,7 @@ struct cma cma_areas[MAX_CMA_AREAS]; unsigned int cma_area_count; -static int __init __cma_declare_contiguous_nid(phys_addr_t base, +static int __init __cma_declare_contiguous_nid(phys_addr_t *basep, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, const char *name, struct cma **res_cma, @@ -370,7 +370,7 @@ int __init cma_declare_contiguous_multi(phys_addr_t total_size, phys_addr_t align, unsigned int order_per_bit, const char *name, struct cma **res_cma, int nid) { - phys_addr_t start, end; + phys_addr_t start = 0, end; phys_addr_t size, sizesum, sizeleft; struct cma_init_memrange *mrp, *mlp, *failed; struct cma_memrange *cmrp; @@ -384,7 +384,7 @@ int __init cma_declare_contiguous_multi(phys_addr_t total_size, /* * First, try it the normal way, producing just one range. */ - ret = __cma_declare_contiguous_nid(0, total_size, 0, align, + ret = __cma_declare_contiguous_nid(&start, total_size, 0, align, order_per_bit, false, name, res_cma, nid); if (ret != -ENOMEM) goto out; @@ -580,7 +580,7 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, { int ret; - ret = __cma_declare_contiguous_nid(base, size, limit, alignment, + ret = __cma_declare_contiguous_nid(&base, size, limit, alignment, order_per_bit, fixed, name, res_cma, nid); if (ret != 0) pr_err("Failed to reserve %ld MiB\n", @@ -592,14 +592,14 @@ int __init cma_declare_contiguous_nid(phys_addr_t base, return ret; } -static int __init __cma_declare_contiguous_nid(phys_addr_t base, +static int __init __cma_declare_contiguous_nid(phys_addr_t *basep, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, const char *name, struct cma **res_cma, int nid) { phys_addr_t memblock_end = memblock_end_of_DRAM(); - phys_addr_t highmem_start; + phys_addr_t highmem_start, base = *basep; int ret; /* @@ -722,12 +722,15 @@ static int __init __cma_declare_contiguous_nid(phys_addr_t base, } ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma); - if (ret) + if (ret) { memblock_phys_free(base, size); + return ret; + } (*res_cma)->nid = nid; + *basep = base; - return ret; + return 0; } static void cma_debug_show_areas(struct cma *cma) diff --git a/mm/compaction.c b/mm/compaction.c index 139f00c0308a..ca71fd3c3181 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -981,13 +981,13 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, } if (PageHuge(page)) { + const unsigned int order = compound_order(page); /* * skip hugetlbfs if we are not compacting for pages * bigger than its order. THPs and other compound pages * are handled below. */ if (!cc->alloc_contig) { - const unsigned int order = compound_order(page); if (order <= MAX_PAGE_ORDER) { low_pfn += (1UL << order) - 1; @@ -1011,8 +1011,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* Do not report -EBUSY down the chain */ if (ret == -EBUSY) ret = 0; - low_pfn += compound_nr(page) - 1; - nr_scanned += compound_nr(page) - 1; + low_pfn += (1UL << order) - 1; + nr_scanned += (1UL << order) - 1; goto isolate_fail; } diff --git a/mm/filemap.c b/mm/filemap.c index b5e784f34d98..7b90cbeb4a1a 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2244,6 +2244,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping, *start = folio->index + nr; goto out; } + xas_advance(&xas, folio_next_index(folio) - 1); continue; put_folio: folio_put(folio); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 39f92aad7bd1..e3e6ac991b9c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2271,7 +2271,7 @@ static struct folio *alloc_surplus_hugetlb_folio(struct hstate *h, * as surplus_pages, otherwise it might confuse * persistent_huge_pages() momentarily. */ - __prep_account_new_huge_page(h, nid); + __prep_account_new_huge_page(h, folio_nid(folio)); /* * We could have raced with the pool size change. @@ -3825,6 +3825,7 @@ found: static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, nodemask_t *nodes_allowed) { + unsigned long persistent_free_count; unsigned long min_count; unsigned long allocated; struct folio *folio; @@ -3959,8 +3960,24 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, * though, we'll note that we're not allowed to exceed surplus * and won't grow the pool anywhere else. Not until one of the * sysctls are changed, or the surplus pages go out of use. + * + * min_count is the expected number of persistent pages, we + * shouldn't calculate min_count by using + * resv_huge_pages + persistent_huge_pages() - free_huge_pages, + * because there may exist free surplus huge pages, and this will + * lead to subtracting twice. Free surplus huge pages come from HVO + * failing to restore vmemmap, see comments in the callers of + * hugetlb_vmemmap_restore_folio(). Thus, we should calculate + * persistent free count first. */ - min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages; + persistent_free_count = h->free_huge_pages; + if (h->free_huge_pages > persistent_huge_pages(h)) { + if (h->free_huge_pages > h->surplus_huge_pages) + persistent_free_count -= h->surplus_huge_pages; + else + persistent_free_count = 0; + } + min_count = h->resv_huge_pages + persistent_huge_pages(h) - persistent_free_count; min_count = max(count, min_count); try_to_free_low(h, min_count, nodes_allowed); @@ -4630,7 +4647,7 @@ static void __init hugetlb_sysfs_init(void) err = hugetlb_sysfs_add_hstate(h, hugepages_kobj, hstate_kobjs, &hstate_attr_group); if (err) - pr_err("HugeTLB: Unable to add hstate %s", h->name); + pr_err("HugeTLB: Unable to add hstate %s\n", h->name); } #ifdef CONFIG_NUMA diff --git a/mm/kasan/kasan_test_c.c b/mm/kasan/kasan_test_c.c index 3ea317837c2d..f24e3bef72a4 100644 --- a/mm/kasan/kasan_test_c.c +++ b/mm/kasan/kasan_test_c.c @@ -2127,4 +2127,5 @@ static struct kunit_suite kasan_kunit_test_suite = { kunit_test_suite(kasan_kunit_test_suite); +MODULE_DESCRIPTION("KUnit tests for checking KASAN bug-detection capabilities"); MODULE_LICENSE("GPL"); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 421740f1bcdc..c96c1f2b9cf5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1759,7 +1759,7 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) } struct memcg_stock_pcp { - localtry_lock_t stock_lock; + local_trylock_t stock_lock; struct mem_cgroup *cached; /* this never be root cgroup */ unsigned int nr_pages; @@ -1774,7 +1774,7 @@ struct memcg_stock_pcp { #define FLUSHING_CACHED_CHARGE 0 }; static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = { - .stock_lock = INIT_LOCALTRY_LOCK(stock_lock), + .stock_lock = INIT_LOCAL_TRYLOCK(stock_lock), }; static DEFINE_MUTEX(percpu_charge_mutex); @@ -1805,11 +1805,10 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages, if (nr_pages > MEMCG_CHARGE_BATCH) return ret; - if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) { - if (!gfpflags_allow_spinning(gfp_mask)) - return ret; - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); - } + if (gfpflags_allow_spinning(gfp_mask)) + local_lock_irqsave(&memcg_stock.stock_lock, flags); + else if (!local_trylock_irqsave(&memcg_stock.stock_lock, flags)) + return ret; stock = this_cpu_ptr(&memcg_stock); stock_pages = READ_ONCE(stock->nr_pages); @@ -1818,7 +1817,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages, ret = true; } - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); return ret; } @@ -1857,14 +1856,14 @@ static void drain_local_stock(struct work_struct *dummy) * drain_stock races is that we always operate on local CPU stock * here with IRQ disabled */ - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); old = drain_obj_stock(stock); drain_stock(stock); clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); } @@ -1894,7 +1893,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) { unsigned long flags; - if (!localtry_trylock_irqsave(&memcg_stock.stock_lock, flags)) { + if (!local_trylock_irqsave(&memcg_stock.stock_lock, flags)) { /* * In case of unlikely failure to lock percpu stock_lock * uncharge memcg directly. @@ -1907,7 +1906,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) return; } __refill_stock(memcg, nr_pages); - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); } /* @@ -1964,9 +1963,9 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu) stock = &per_cpu(memcg_stock, cpu); /* drain_obj_stock requires stock_lock */ - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); old = drain_obj_stock(stock); - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); drain_stock(stock); obj_cgroup_put(old); @@ -2787,7 +2786,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, unsigned long flags; int *bytes; - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); /* @@ -2836,7 +2835,7 @@ static void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, if (nr) __mod_objcg_mlstate(objcg, pgdat, idx, nr); - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); } @@ -2846,7 +2845,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) unsigned long flags; bool ret = false; - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) { @@ -2854,7 +2853,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) ret = true; } - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); return ret; } @@ -2946,7 +2945,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, unsigned long flags; unsigned int nr_pages = 0; - localtry_lock_irqsave(&memcg_stock.stock_lock, flags); + local_lock_irqsave(&memcg_stock.stock_lock, flags); stock = this_cpu_ptr(&memcg_stock); if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */ @@ -2960,7 +2959,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes, stock->nr_bytes &= (PAGE_SIZE - 1); } - localtry_unlock_irqrestore(&memcg_stock.stock_lock, flags); + local_unlock_irqrestore(&memcg_stock.stock_lock, flags); obj_cgroup_put(old); if (nr_pages) diff --git a/mm/memory.c b/mm/memory.c index 2d8c265fc7d6..44481fe7c629 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1361,7 +1361,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) struct mm_struct *dst_mm = dst_vma->vm_mm; struct mm_struct *src_mm = src_vma->vm_mm; struct mmu_notifier_range range; - unsigned long next, pfn; + unsigned long next, pfn = 0; bool is_cow; int ret; @@ -2938,11 +2938,11 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, if (fn) { do { if (create || !pte_none(ptep_get(pte))) { - err = fn(pte++, addr, data); + err = fn(pte, addr, data); if (err) break; } - } while (addr += PAGE_SIZE, addr != end); + } while (pte++, addr += PAGE_SIZE, addr != end); } *mask |= PGTBL_PTE_MODIFIED; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fd6b865cb1ab..1715e34b91af 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1400,11 +1400,12 @@ static void free_one_page(struct zone *zone, struct page *page, struct llist_head *llhead; unsigned long flags; - if (!spin_trylock_irqsave(&zone->lock, flags)) { - if (unlikely(fpi_flags & FPI_TRYLOCK)) { + if (unlikely(fpi_flags & FPI_TRYLOCK)) { + if (!spin_trylock_irqsave(&zone->lock, flags)) { add_page_to_zone_llist(zone, page, order); return; } + } else { spin_lock_irqsave(&zone->lock, flags); } @@ -2182,23 +2183,15 @@ try_to_claim_block(struct zone *zone, struct page *page, } /* - * Try finding a free buddy page on the fallback list. - * - * This will attempt to claim a whole pageblock for the requested type - * to ensure grouping of such requests in the future. - * - * If a whole block cannot be claimed, steal an individual page, regressing to - * __rmqueue_smallest() logic to at least break up as little contiguity as - * possible. + * Try to allocate from some fallback migratetype by claiming the entire block, + * i.e. converting it to the allocation's start migratetype. * * The use of signed ints for order and current_order is a deliberate * deviation from the rest of this file, to make the for loop * condition simpler. - * - * Return the stolen page, or NULL if none can be found. */ static __always_inline struct page * -__rmqueue_fallback(struct zone *zone, int order, int start_migratetype, +__rmqueue_claim(struct zone *zone, int order, int start_migratetype, unsigned int alloc_flags) { struct free_area *area; @@ -2236,14 +2229,29 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, page = try_to_claim_block(zone, page, current_order, order, start_migratetype, fallback_mt, alloc_flags); - if (page) - goto got_one; + if (page) { + trace_mm_page_alloc_extfrag(page, order, current_order, + start_migratetype, fallback_mt); + return page; + } } - if (alloc_flags & ALLOC_NOFRAGMENT) - return NULL; + return NULL; +} + +/* + * Try to steal a single page from some fallback migratetype. Leave the rest of + * the block as its current migratetype, potentially causing fragmentation. + */ +static __always_inline struct page * +__rmqueue_steal(struct zone *zone, int order, int start_migratetype) +{ + struct free_area *area; + int current_order; + struct page *page; + int fallback_mt; + bool claim_block; - /* No luck claiming pageblock. Find the smallest fallback page */ for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { area = &(zone->free_area[current_order]); fallback_mt = find_suitable_fallback(area, current_order, @@ -2253,25 +2261,28 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, page = get_page_from_free_area(area, fallback_mt); page_del_and_expand(zone, page, order, current_order, fallback_mt); - goto got_one; + trace_mm_page_alloc_extfrag(page, order, current_order, + start_migratetype, fallback_mt); + return page; } return NULL; - -got_one: - trace_mm_page_alloc_extfrag(page, order, current_order, - start_migratetype, fallback_mt); - - return page; } +enum rmqueue_mode { + RMQUEUE_NORMAL, + RMQUEUE_CMA, + RMQUEUE_CLAIM, + RMQUEUE_STEAL, +}; + /* * Do the hard work of removing an element from the buddy allocator. * Call me with the zone->lock already held. */ static __always_inline struct page * __rmqueue(struct zone *zone, unsigned int order, int migratetype, - unsigned int alloc_flags) + unsigned int alloc_flags, enum rmqueue_mode *mode) { struct page *page; @@ -2290,16 +2301,48 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, } } - page = __rmqueue_smallest(zone, order, migratetype); - if (unlikely(!page)) { - if (alloc_flags & ALLOC_CMA) + /* + * First try the freelists of the requested migratetype, then try + * fallbacks modes with increasing levels of fragmentation risk. + * + * The fallback logic is expensive and rmqueue_bulk() calls in + * a loop with the zone->lock held, meaning the freelists are + * not subject to any outside changes. Remember in *mode where + * we found pay dirt, to save us the search on the next call. + */ + switch (*mode) { + case RMQUEUE_NORMAL: + page = __rmqueue_smallest(zone, order, migratetype); + if (page) + return page; + fallthrough; + case RMQUEUE_CMA: + if (alloc_flags & ALLOC_CMA) { page = __rmqueue_cma_fallback(zone, order); - - if (!page) - page = __rmqueue_fallback(zone, order, migratetype, - alloc_flags); + if (page) { + *mode = RMQUEUE_CMA; + return page; + } + } + fallthrough; + case RMQUEUE_CLAIM: + page = __rmqueue_claim(zone, order, migratetype, alloc_flags); + if (page) { + /* Replenished preferred freelist, back to normal mode. */ + *mode = RMQUEUE_NORMAL; + return page; + } + fallthrough; + case RMQUEUE_STEAL: + if (!(alloc_flags & ALLOC_NOFRAGMENT)) { + page = __rmqueue_steal(zone, order, migratetype); + if (page) { + *mode = RMQUEUE_STEAL; + return page; + } + } } - return page; + return NULL; } /* @@ -2311,17 +2354,19 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, unsigned int alloc_flags) { + enum rmqueue_mode rmqm = RMQUEUE_NORMAL; unsigned long flags; int i; - if (!spin_trylock_irqsave(&zone->lock, flags)) { - if (unlikely(alloc_flags & ALLOC_TRYLOCK)) + if (unlikely(alloc_flags & ALLOC_TRYLOCK)) { + if (!spin_trylock_irqsave(&zone->lock, flags)) return 0; + } else { spin_lock_irqsave(&zone->lock, flags); } for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype, - alloc_flags); + alloc_flags, &rmqm); if (unlikely(page == NULL)) break; @@ -2937,15 +2982,18 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, do { page = NULL; - if (!spin_trylock_irqsave(&zone->lock, flags)) { - if (unlikely(alloc_flags & ALLOC_TRYLOCK)) + if (unlikely(alloc_flags & ALLOC_TRYLOCK)) { + if (!spin_trylock_irqsave(&zone->lock, flags)) return NULL; + } else { spin_lock_irqsave(&zone->lock, flags); } if (alloc_flags & ALLOC_HIGHATOMIC) page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); if (!page) { - page = __rmqueue(zone, order, migratetype, alloc_flags); + enum rmqueue_mode rmqm = RMQUEUE_NORMAL; + + page = __rmqueue(zone, order, migratetype, alloc_flags, &rmqm); /* * If the allocation fails, allow OOM handling and diff --git a/mm/slub.c b/mm/slub.c index b46f87662e71..dc9e729e1d26 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1973,6 +1973,11 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts, #define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \ __GFP_ACCOUNT | __GFP_NOFAIL) +static inline void init_slab_obj_exts(struct slab *slab) +{ + slab->obj_exts = 0; +} + int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, gfp_t gfp, bool new_slab) { @@ -2058,6 +2063,10 @@ static inline bool need_slab_obj_ext(void) #else /* CONFIG_SLAB_OBJ_EXT */ +static inline void init_slab_obj_exts(struct slab *slab) +{ +} + static int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, gfp_t gfp, bool new_slab) { @@ -2637,6 +2646,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) slab->objects = oo_objects(oo); slab->inuse = 0; slab->frozen = 0; + init_slab_obj_exts(slab); account_slab(slab, oo_order(oo), s, flags); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index fbf2cf62ab9f..7d5d709cc838 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1902,6 +1902,14 @@ struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, unsigned long end) { struct vm_area_struct *ret; + bool give_up_on_oom = false; + + /* + * If we are modifying only and not splitting, just give up on the merge + * if OOM prevents us from merging successfully. + */ + if (start == vma->vm_start && end == vma->vm_end) + give_up_on_oom = true; /* Reset ptes for the whole vma range if wr-protected */ if (userfaultfd_wp(vma)) @@ -1909,7 +1917,7 @@ struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, ret = vma_modify_flags_uffd(vmi, prev, vma, start, end, vma->vm_flags & ~__VM_UFFD_FLAGS, - NULL_VM_UFFD_CTX); + NULL_VM_UFFD_CTX, give_up_on_oom); /* * In the vma_merge() successful mprotect-like case 8: @@ -1960,7 +1968,8 @@ int userfaultfd_register_range(struct userfaultfd_ctx *ctx, new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags; vma = vma_modify_flags_uffd(&vmi, prev, vma, start, vma_end, new_flags, - (struct vm_userfaultfd_ctx){ctx}); + (struct vm_userfaultfd_ctx){ctx}, + /* give_up_on_oom = */false); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -666,6 +666,9 @@ static void vmg_adjust_set_range(struct vma_merge_struct *vmg) /* * Actually perform the VMA merge operation. * + * IMPORTANT: We guarantee that, should vmg->give_up_on_oom is set, to not + * modify any VMAs or cause inconsistent state should an OOM condition arise. + * * Returns 0 on success, or an error value on failure. */ static int commit_merge(struct vma_merge_struct *vmg) @@ -685,6 +688,12 @@ static int commit_merge(struct vma_merge_struct *vmg) init_multi_vma_prep(&vp, vma, vmg); + /* + * If vmg->give_up_on_oom is set, we're safe, because we don't actually + * manipulate any VMAs until we succeed at preallocation. + * + * Past this point, we will not return an error. + */ if (vma_iter_prealloc(vmg->vmi, vma)) return -ENOMEM; @@ -915,7 +924,13 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( if (anon_dup) unlink_anon_vmas(anon_dup); - vmg->state = VMA_MERGE_ERROR_NOMEM; + /* + * We've cleaned up any cloned anon_vma's, no VMAs have been + * modified, no harm no foul if the user requests that we not + * report this and just give up, leaving the VMAs unmerged. + */ + if (!vmg->give_up_on_oom) + vmg->state = VMA_MERGE_ERROR_NOMEM; return NULL; } @@ -926,7 +941,15 @@ static __must_check struct vm_area_struct *vma_merge_existing_range( abort: vma_iter_set(vmg->vmi, start); vma_iter_load(vmg->vmi); - vmg->state = VMA_MERGE_ERROR_NOMEM; + + /* + * This means we have failed to clone anon_vma's correctly, but no + * actual changes to VMAs have occurred, so no harm no foul - if the + * user doesn't want this reported and instead just wants to give up on + * the merge, allow it. + */ + if (!vmg->give_up_on_oom) + vmg->state = VMA_MERGE_ERROR_NOMEM; return NULL; } @@ -1068,6 +1091,10 @@ int vma_expand(struct vma_merge_struct *vmg) /* This should already have been checked by this point. */ VM_WARN_ON_VMG(!can_merge_remove_vma(next), vmg); vma_start_write(next); + /* + * In this case we don't report OOM, so vmg->give_up_on_mm is + * safe. + */ ret = dup_anon_vma(middle, next, &anon_dup); if (ret) return ret; @@ -1090,9 +1117,15 @@ int vma_expand(struct vma_merge_struct *vmg) return 0; nomem: - vmg->state = VMA_MERGE_ERROR_NOMEM; if (anon_dup) unlink_anon_vmas(anon_dup); + /* + * If the user requests that we just give upon OOM, we are safe to do so + * here, as commit merge provides this contract to us. Nothing has been + * changed - no harm no foul, just don't report it. + */ + if (!vmg->give_up_on_oom) + vmg->state = VMA_MERGE_ERROR_NOMEM; return -ENOMEM; } @@ -1534,6 +1567,13 @@ static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg) if (vmg_nomem(vmg)) return ERR_PTR(-ENOMEM); + /* + * Split can fail for reasons other than OOM, so if the user requests + * this it's probably a mistake. + */ + VM_WARN_ON(vmg->give_up_on_oom && + (vma->vm_start != start || vma->vm_end != end)); + /* Split any preceding portion of the VMA. */ if (vma->vm_start < start) { int err = split_vma(vmg->vmi, vma, start, 1); @@ -1602,12 +1642,15 @@ struct vm_area_struct struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long new_flags, - struct vm_userfaultfd_ctx new_ctx) + struct vm_userfaultfd_ctx new_ctx, + bool give_up_on_oom) { VMG_VMA_STATE(vmg, vmi, prev, vma, start, end); vmg.flags = new_flags; vmg.uffd_ctx = new_ctx; + if (give_up_on_oom) + vmg.give_up_on_oom = true; return vma_modify(&vmg); } @@ -114,6 +114,12 @@ struct vma_merge_struct { */ bool just_expand :1; + /* + * If a merge is possible, but an OOM error occurs, give up and don't + * execute the merge, returning NULL. + */ + bool give_up_on_oom :1; + /* Internal flags set during merge process: */ /* @@ -255,7 +261,8 @@ __must_check struct vm_area_struct struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long new_flags, - struct vm_userfaultfd_ctx new_ctx); + struct vm_userfaultfd_ctx new_ctx, + bool give_up_on_oom); __must_check struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg); diff --git a/samples/livepatch/livepatch-callbacks-busymod.c b/samples/livepatch/livepatch-callbacks-busymod.c index 69105596e72e..fadc2a85cb35 100644 --- a/samples/livepatch/livepatch-callbacks-busymod.c +++ b/samples/livepatch/livepatch-callbacks-busymod.c @@ -56,4 +56,5 @@ static void livepatch_callbacks_mod_exit(void) module_init(livepatch_callbacks_mod_init); module_exit(livepatch_callbacks_mod_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module"); MODULE_LICENSE("GPL"); diff --git a/samples/livepatch/livepatch-callbacks-demo.c b/samples/livepatch/livepatch-callbacks-demo.c index 11c3f4357812..9e69d9caed25 100644 --- a/samples/livepatch/livepatch-callbacks-demo.c +++ b/samples/livepatch/livepatch-callbacks-demo.c @@ -192,5 +192,6 @@ static void livepatch_callbacks_demo_exit(void) module_init(livepatch_callbacks_demo_init); module_exit(livepatch_callbacks_demo_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-callbacks-mod.c b/samples/livepatch/livepatch-callbacks-mod.c index 2a074f422a51..d1851b471ad9 100644 --- a/samples/livepatch/livepatch-callbacks-mod.c +++ b/samples/livepatch/livepatch-callbacks-mod.c @@ -38,4 +38,5 @@ static void livepatch_callbacks_mod_exit(void) module_init(livepatch_callbacks_mod_init); module_exit(livepatch_callbacks_mod_exit); +MODULE_DESCRIPTION("Live patching demo for (un)patching callbacks, support module"); MODULE_LICENSE("GPL"); diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c index cd76d7ebe598..5263a2f31c48 100644 --- a/samples/livepatch/livepatch-sample.c +++ b/samples/livepatch/livepatch-sample.c @@ -66,5 +66,6 @@ static void livepatch_exit(void) module_init(livepatch_init); module_exit(livepatch_exit); +MODULE_DESCRIPTION("Kernel Live Patching Sample Module"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-shadow-fix1.c b/samples/livepatch/livepatch-shadow-fix1.c index f3f153895d6c..cbf68ca40097 100644 --- a/samples/livepatch/livepatch-shadow-fix1.c +++ b/samples/livepatch/livepatch-shadow-fix1.c @@ -168,5 +168,6 @@ static void livepatch_shadow_fix1_exit(void) module_init(livepatch_shadow_fix1_init); module_exit(livepatch_shadow_fix1_exit); +MODULE_DESCRIPTION("Live patching demo for shadow variables"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/samples/livepatch/livepatch-shadow-fix2.c b/samples/livepatch/livepatch-shadow-fix2.c index 361046a4f10c..b99122cb221f 100644 --- a/samples/livepatch/livepatch-shadow-fix2.c +++ b/samples/livepatch/livepatch-shadow-fix2.c @@ -128,5 +128,6 @@ static void livepatch_shadow_fix2_exit(void) module_init(livepatch_shadow_fix2_init); module_exit(livepatch_shadow_fix2_exit); +MODULE_DESCRIPTION("Live patching demo for shadow variables"); MODULE_LICENSE("GPL"); MODULE_INFO(livepatch, "Y"); diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 6d44f8c8a18f..af9d9acaf997 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -43,9 +43,6 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - struct kvm_regs { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -108,6 +105,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ +#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */ struct kvm_vcpu_init { __u32 target; @@ -418,6 +416,7 @@ enum { #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index 9306726337fe..df36f23876e8 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -1,24 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#define __ARCH_WANT_RENAMEAT -#define __ARCH_WANT_NEW_STAT -#define __ARCH_WANT_SET_GET_RLIMIT -#define __ARCH_WANT_TIME32_SYSCALLS -#define __ARCH_WANT_MEMFD_SECRET - -#include <asm-generic/unistd.h> +#include <asm/unistd_64.h> diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 9e3fa7942e7d..6c2c152d8a67 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -75,8 +75,8 @@ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ #define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ #define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */ +/* Free ( 3*32+ 6) */ +/* Free ( 3*32+ 7) */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ #define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ #define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ @@ -329,6 +329,7 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */ #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ @@ -377,6 +378,7 @@ #define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ #define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ +#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ @@ -434,15 +436,18 @@ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ -#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */ -#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" Secure Encrypted Virtualization */ #define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ -#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ -#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ -#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ +#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */ +#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */ #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ +#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ @@ -455,6 +460,11 @@ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ +#define X86_FEATURE_SRSO_USER_KERNEL_NO (20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */ +#define X86_FEATURE_SRSO_BP_SPEC_REDUCE (20*32+31) /* + * BP_CFG[BpSpecReduce] can be used to mitigate SRSO for VMs. + * (SRSO_MSR_FIX in the official doc). + */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -470,6 +480,7 @@ #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ /* * BUG word(s) @@ -521,4 +532,5 @@ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index dc1c1057f26e..e6134ef2263d 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -397,7 +397,8 @@ #define MSR_IA32_PASID_VALID BIT_ULL(31) /* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */ +#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT) #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) @@ -610,6 +611,7 @@ #define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_AMD_PERF_STATUS 0xc0010063 #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD64_GUEST_TSC_FREQ 0xc0010134 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 @@ -646,6 +648,7 @@ #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ #define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b #define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 @@ -684,11 +687,12 @@ #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) #define MSR_AMD64_SNP_RESV_BIT 18 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) - -#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f - #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_AMD64_RMP_CFG 0xc0010136 +#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0 +#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT) +#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8) #define MSR_SVSM_CAA 0xc001f000 @@ -699,15 +703,17 @@ #define MSR_AMD_CPPC_REQ 0xc00102b3 #define MSR_AMD_CPPC_STATUS 0xc00102b4 -#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff) -#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) -#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff) -#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff) +/* Masks for use with MSR_AMD_CPPC_CAP1 */ +#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24) -#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0) -#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) -#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) -#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) +/* Masks for use with MSR_AMD_CPPC_REQ */ +#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) /* AMD Performance Counter Global Status and Control MSRs */ #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 @@ -719,6 +725,7 @@ /* Zen4 */ #define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 /* Fam 19h MSRs */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 88585c1de416..460306b35a4b 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -559,6 +559,9 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) #define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) +#define KVM_XEN_MSR_MIN_INDEX 0x40000000u +#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; @@ -925,5 +928,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 #define KVM_X86_SNP_VM 4 +#define KVM_X86_TDX_VM 5 #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 1814b413fd57..ec1321248dac 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -95,6 +95,7 @@ #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_IDLE_HLT 0x0a6 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 @@ -224,6 +225,7 @@ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ + { SVM_EXIT_IDLE_HLT, "idle-halt" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 0199d56cb479..d66b710d628f 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -3,6 +3,7 @@ #include <linux/export.h> #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> @@ -28,7 +29,7 @@ * only for the return value that is the same as the source input, * which the compiler could/should do much better anyway. */ -SYM_FUNC_START(__memset) +SYM_TYPED_FUNC_START(__memset) ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS movq %rdi,%r9 diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h new file mode 100644 index 000000000000..6b8713675765 --- /dev/null +++ b/tools/include/linux/cfi_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) type definitions. + */ +#ifndef _LINUX_CFI_TYPES_H +#define _LINUX_CFI_TYPES_H + +#ifdef __ASSEMBLY__ +#include <linux/linkage.h> + +#ifdef CONFIG_CFI_CLANG +/* + * Use the __kcfi_typeid_<function> type identifier symbol to + * annotate indirectly called assembly functions. The compiler emits + * these symbols for all address-taken function declarations in C + * code. + */ +#ifndef __CFI_TYPE +#define __CFI_TYPE(name) \ + .4byte __kcfi_typeid_##name +#endif + +#define SYM_TYPED_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + __CFI_TYPE(name) ASM_NL \ + name: + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_TYPED_ENTRY(name, linkage, align) + +#else /* CONFIG_CFI_CLANG */ + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) + +#endif /* CONFIG_CFI_CLANG */ + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _LINUX_CFI_TYPES_H */ diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 1ea2c4c33b86..ef1c27fa3c57 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -85,6 +85,7 @@ /* compatibility flags */ #define MAP_FILE 0 +#define PKEY_UNRESTRICTED 0x0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 88dc393c2bca..2892a45023af 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -849,9 +849,11 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat) __SYSCALL(__NR_listxattrat, sys_listxattrat) #define __NR_removexattrat 466 __SYSCALL(__NR_removexattrat, sys_removexattrat) +#define __NR_open_tree_attr 467 +__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) #undef __NR_syscalls -#define __NR_syscalls 467 +#define __NR_syscalls 468 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 5d32d53508d9..ced0fc3c3aa5 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -79,6 +79,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ #define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */ +#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_SMC = 256, /* Shared Memory Communications */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 502ea63b5d2e..b6ae8ad8934b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -617,10 +617,6 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { @@ -933,6 +929,7 @@ struct kvm_enable_cap { #define KVM_CAP_PRE_FAULT_MEMORY 236 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1070,6 +1067,10 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + #define KVM_REG_SIZE_U8 0x0000000000000000ULL #define KVM_REG_SIZE_U16 0x0010000000000000ULL #define KVM_REG_SIZE_U32 0x0020000000000000ULL diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 0524d541d4e3..5fc753c23734 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -385,6 +385,8 @@ enum perf_event_read_format { * * @sample_max_stack: Max number of frame pointers in a callchain, * should be < /proc/sys/kernel/perf_event_max_stack + * Max number of entries of branch stack + * should be < hardware limit */ struct perf_event_attr { diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 887a25286441..f78ee3670dd5 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -98,43 +98,93 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +214,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl index 49eeb2ad8dbd..27c1d5ebcd91 100644 --- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl @@ -481,3 +481,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index c844cd5cda62..1e8c44c7b614 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -381,3 +381,4 @@ 464 n64 getxattrat sys_getxattrat 465 n64 listxattrat sys_listxattrat 466 n64 removexattrat sys_removexattrat +467 n64 open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index d8b4ab78bef0..9a084bdb8926 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -557,3 +557,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index e9115b4d8b63..a4569b96ef06 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl index c8cad33bf250..52a7652fcff6 100644 --- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl @@ -470,3 +470,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl index 727f99d333b3..83e45eb6c095 100644 --- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl @@ -512,3 +512,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index 4d0fb2fba7e2..ac007ea00979 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl @@ -396,7 +396,7 @@ 381 i386 pkey_alloc sys_pkey_alloc 382 i386 pkey_free sys_pkey_free 383 i386 statx sys_statx -384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl +384 i386 arch_prctl sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents 386 i386 rseq sys_rseq 393 i386 semget sys_semget @@ -472,3 +472,4 @@ 464 i386 getxattrat sys_getxattrat 465 i386 listxattrat sys_listxattrat 466 i386 removexattrat sys_removexattrat +467 i386 open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 5eb708bff1c7..cfb5ca41e30d 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -390,6 +390,7 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl index 37effc1b134e..f657a77314f8 100644 --- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl @@ -437,3 +437,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index a4499e5a6f9c..857f6646cc23 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -20,6 +20,7 @@ FILES=( "include/uapi/linux/stat.h" "include/linux/bits.h" "include/vdso/bits.h" + "include/linux/cfi_types.h" "include/linux/const.h" "include/vdso/const.h" "include/vdso/unaligned.h" diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index d18cc47e89bd..c3322eb3d686 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -392,6 +392,8 @@ struct ucred { extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); +extern int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len, + void *data); struct timespec64; struct __kernel_timespec; diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index 6e6907e63bfc..a15ac2fa4b20 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -155,4 +155,8 @@ #define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ #define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */ +/* Flags for execveat2(2). */ +#define AT_EXECVE_CHECK 0x10000 /* Only perform a check if execution + would be allowed. */ + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 753971770733..e762e1af650c 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -40,6 +40,15 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) +/* flags for integrity meta */ +#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */ +#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */ +#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */ + +#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \ + IO_INTEGRITY_CHK_REFTAG | \ + IO_INTEGRITY_CHK_APPTAG) + #define SEEK_SET 0 /* seek relative to beginning of file */ #define SEEK_CUR 1 /* seek relative to current file position */ #define SEEK_END 2 /* seek relative to end of file */ @@ -203,10 +212,8 @@ struct fsxattr { #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) #define BLKGETDISKSEQ _IOR(0x12,128,__u64) -/* - * A jump here: 130-136 are reserved for zoned block devices - * (see uapi/linux/blkzoned.h) - */ +/* 130-136 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */ +/* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */ #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ @@ -332,9 +339,13 @@ typedef int __bitwise __kernel_rwf_t; /* Atomic Write */ #define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) +/* buffered IO that drops the cache after reading or writing data */ +#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ + RWF_DONTCACHE) #define PROCFS_IOCTL_MAGIC 'f' diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index c07008816aca..7fa67c2031a5 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h @@ -179,7 +179,12 @@ struct statmount { __u32 opt_array; /* [str] Array of nul terminated fs options */ __u32 opt_sec_num; /* Number of security options */ __u32 opt_sec_array; /* [str] Array of nul terminated security options */ - __u64 __spare2[46]; + __u64 supported_mask; /* Mask flags that this kernel supports */ + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ + __u64 __spare2[43]; char str[]; /* Variable size part containing strings */ }; @@ -217,6 +222,9 @@ struct mnt_id_req { #define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ #define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ #define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */ +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ /* * Special @mnt_id values that can be passed to listmount diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 5c6080680cb2..15c18ef4eb11 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -353,4 +353,15 @@ struct prctl_mm_map { */ #define PR_LOCK_SHADOW_STACK_STATUS 76 +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index 887a25286441..f78ee3670dd5 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -98,43 +98,93 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +214,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 4cd513215bcd..5a049eeaecce 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -716,7 +716,7 @@ enum { * Raw MIDI section - /dev/snd/midi?? */ -#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 4) +#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 5) enum { SNDRV_RAWMIDI_STREAM_OUTPUT = 0, @@ -728,6 +728,9 @@ enum { #define SNDRV_RAWMIDI_INFO_INPUT 0x00000002 #define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004 #define SNDRV_RAWMIDI_INFO_UMP 0x00000008 +#define SNDRV_RAWMIDI_INFO_STREAM_INACTIVE 0x00000010 + +#define SNDRV_RAWMIDI_DEVICE_UNKNOWN 0 struct snd_rawmidi_info { unsigned int device; /* RO/WR (control): device number */ @@ -740,7 +743,8 @@ struct snd_rawmidi_info { unsigned char subname[32]; /* name of active or selected subdevice */ unsigned int subdevices_count; unsigned int subdevices_avail; - unsigned char reserved[64]; /* reserved for future use */ + int tied_device; /* R: tied rawmidi device (UMP/legacy) */ + unsigned char reserved[60]; /* reserved for future use */ }; #define SNDRV_RAWMIDI_MODE_FRAMING_MASK (7<<0) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1974395492d7..3c030da2e477 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2566,25 +2566,6 @@ check: return false; } -static bool evsel__handle_error_quirks(struct evsel *evsel, int error) -{ - /* - * AMD core PMU tries to forward events with precise_ip to IBS PMU - * implicitly. But IBS PMU has more restrictions so it can fail with - * supported event attributes. Let's forward it back to the core PMU - * by clearing precise_ip only if it's from precise_max (:P). - */ - if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() && - evsel->core.attr.precise_ip && evsel->precise_max) { - evsel->core.attr.precise_ip = 0; - pr_debug2_peo("removing precise_ip on AMD\n"); - display_attr(&evsel->core.attr); - return true; - } - - return false; -} - static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads, int start_cpu_map_idx, int end_cpu_map_idx) @@ -2730,9 +2711,6 @@ try_fallback: if (evsel__precise_ip_fallback(evsel)) goto retry_open; - if (evsel__handle_error_quirks(evsel, err)) - goto retry_open; - out_close: if (err) threads->err_thread = thread; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 9fb2c1343c7f..0b037e7389a0 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -371,7 +371,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, * has to be pointed by symsrc_filename */ if (ofs == 0) { - if (dso__data_get_fd(dso, machine, &fd) { + if (dso__data_get_fd(dso, machine, &fd)) { ofs = elf_section_offset(fd, ".debug_frame"); dso__data_put_fd(dso); } diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl index ebbdb3c42e9f..580b4e246aec 100644 --- a/tools/scripts/syscall.tbl +++ b/tools/scripts/syscall.tbl @@ -407,3 +407,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index efabfcbe0b49..17ed3e9917ca 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -283,8 +283,7 @@ out_free: /* * Test mincore() behavior on a page backed by a tmpfs file. This test - * performs the same steps as the previous one. However, we don't expect - * any readahead in this case. + * performs the same steps as the previous one. */ TEST(check_tmpfs_mmap) { @@ -295,7 +294,6 @@ TEST(check_tmpfs_mmap) int page_size; int fd; int i; - int ra_pages = 0; page_size = sysconf(_SC_PAGESIZE); vec_size = FILE_SIZE / page_size; @@ -338,8 +336,7 @@ TEST(check_tmpfs_mmap) } /* - * Touch a page in the middle of the mapping. We expect only - * that page to be fetched into memory. + * Touch a page in the middle of the mapping. */ addr[FILE_SIZE / 2] = 1; retval = mincore(addr, FILE_SIZE, vec); @@ -348,15 +345,6 @@ TEST(check_tmpfs_mmap) TH_LOG("Page not found in memory after use"); } - i = FILE_SIZE / 2 / page_size + 1; - while (i < vec_size && vec[i]) { - ra_pages++; - i++; - } - ASSERT_EQ(ra_pages, 0) { - TH_LOG("Read-ahead pages found in memory"); - } - munmap(addr, FILE_SIZE); close(fd); free(vec); diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index 67df7b47087f..e1fe16bcbbe8 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -29,7 +29,7 @@ fi if [[ $cgroup2 ]]; then cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup2 none $cgroup_path do_umount=1 fi @@ -37,7 +37,7 @@ if [[ $cgroup2 ]]; then else cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup memory,hugetlb $cgroup_path do_umount=1 fi diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index f0cb14ea8608..b6cfe0a4b7df 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -293,7 +293,7 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, .iov_base = mem, .iov_len = size, }; - ssize_t cur, total, transferred; + ssize_t cur, total, transferred = 0; struct comm_pipes comm_pipes; char *old, *new; int ret, fds[2]; diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index 11f9bbe7dc22..0b0d4ba1af27 100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -23,7 +23,7 @@ fi if [[ $cgroup2 ]]; then CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then - CGROUP_ROOT=/dev/cgroup/memory + CGROUP_ROOT=$(mktemp -d) mount -t cgroup2 none $CGROUP_ROOT do_umount=1 fi diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 66dbb362385f..0f97fb0d19e1 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -150,7 +150,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list) { if (kmalloc_verbose) - pr_debug("Bulk free %p[0-%lu]\n", list, size - 1); + pr_debug("Bulk free %p[0-%zu]\n", list, size - 1); pthread_mutex_lock(&cachep->lock); for (int i = 0; i < size; i++) @@ -168,7 +168,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, size_t i; if (kmalloc_verbose) - pr_debug("Bulk alloc %lu\n", size); + pr_debug("Bulk alloc %zu\n", size); pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs >= size) { diff --git a/tools/testing/shared/linux/cleanup.h b/tools/testing/shared/linux/cleanup.h new file mode 100644 index 000000000000..ea3081426ee9 --- /dev/null +++ b/tools/testing/shared/linux/cleanup.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../../../../include/linux/cleanup.h" |