From 9f365cb8bbd0162963d6852651d7c9e30adcb7b5 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Tue, 14 May 2024 13:47:23 -0700
Subject: scsi: mpi3mr: Use proper format specifier in mpi3mr_sas_port_add()

When building for a 32-bit platform such as ARM or i386, for which size_t
is unsigned int, there is a warning due to using an unsigned long format
specifier:

  drivers/scsi/mpi3mr/mpi3mr_transport.c:1370:11: error: format specifies type 'unsigned long' but the argument has type 'unsigned int' [-Werror,-Wformat]
   1369 |                         ioc_warn(mrioc, "skipping port %u, max allowed value is %lu\n",
        |                                                                                 ~~~
        |                                                                                 %u
   1370 |                             i, sizeof(mr_sas_port->phy_mask) * 8);
        |                                ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Use the proper format specifier for size_t, %zu, to resolve the warning for
all platforms.

Fixes: 3668651def2c ("scsi: mpi3mr: Sanitise num_phys")
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20240514-mpi3mr-fix-wformat-v1-1-f1ad49217e5e@kernel.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpi3mr/mpi3mr_transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c
index 7ca9a7c2709c..5d261c2f2d20 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_transport.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c
@@ -1366,7 +1366,7 @@ static struct mpi3mr_sas_port *mpi3mr_sas_port_add(struct mpi3mr_ioc *mrioc,
 			continue;
 
 		if (i > sizeof(mr_sas_port->phy_mask) * 8) {
-			ioc_warn(mrioc, "skipping port %u, max allowed value is %lu\n",
+			ioc_warn(mrioc, "skipping port %u, max allowed value is %zu\n",
 			    i, sizeof(mr_sas_port->phy_mask) * 8);
 			goto out_fail;
 		}
-- 
cgit v1.2.3


From 10157b1fc1a762293381e9145041253420dfc6ad Mon Sep 17 00:00:00 2001
From: Martin Wilck <martin.wilck@suse.com>
Date: Tue, 14 May 2024 16:03:44 +0200
Subject: scsi: core: alua: I/O errors for ALUA state transitions

When a host is configured with a few LUNs and I/O is running, injecting FC
faults repeatedly leads to path recovery problems.  The LUNs have 4 paths
each and 3 of them come back active after say an FC fault which makes 2 of
the paths go down, instead of all 4. This happens after several iterations
of continuous FC faults.

Reason here is that we're returning an I/O error whenever we're
encountering sense code 06/04/0a (LOGICAL UNIT NOT ACCESSIBLE, ASYMMETRIC
ACCESS STATE TRANSITION) instead of retrying.

[mwilck: The original patch was developed by Rajashekhar M A and Hannes
Reinecke. I moved the code to alua_check_sense() as suggested by Mike
Christie [1]. Evan Milne had raised the question whether pg->state should
be set to transitioning in the UA case [2]. I believe that doing this is
correct. SCSI_ACCESS_STATE_TRANSITIONING by itself doesn't cause I/O
errors. Our handler schedules an RTPG, which will only result in an I/O
error condition if the transitioning timeout expires.]

[1] https://lore.kernel.org/all/0bc96e82-fdda-4187-148d-5b34f81d4942@oracle.com/
[2] https://lore.kernel.org/all/CAGtn9r=kicnTDE2o7Gt5Y=yoidHYD7tG8XdMHEBJTBraVEoOCw@mail.gmail.com/

Co-developed-by: Rajashekhar M A <rajs@netapp.com>
Co-developed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin Wilck <martin.wilck@suse.com>
Link: https://lore.kernel.org/r/20240514140344.19538-1-mwilck@suse.com
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 31 +++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index a226dc1b65d7..4eb0837298d4 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -414,28 +414,40 @@ static char print_alua_state(unsigned char state)
 	}
 }
 
-static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
-					      struct scsi_sense_hdr *sense_hdr)
+static void alua_handle_state_transition(struct scsi_device *sdev)
 {
 	struct alua_dh_data *h = sdev->handler_data;
 	struct alua_port_group *pg;
 
+	rcu_read_lock();
+	pg = rcu_dereference(h->pg);
+	if (pg)
+		pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
+	rcu_read_unlock();
+	alua_check(sdev, false);
+}
+
+static enum scsi_disposition alua_check_sense(struct scsi_device *sdev,
+					      struct scsi_sense_hdr *sense_hdr)
+{
 	switch (sense_hdr->sense_key) {
 	case NOT_READY:
 		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
 			/*
 			 * LUN Not Accessible - ALUA state transition
 			 */
-			rcu_read_lock();
-			pg = rcu_dereference(h->pg);
-			if (pg)
-				pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
-			rcu_read_unlock();
-			alua_check(sdev, false);
+			alua_handle_state_transition(sdev);
 			return NEEDS_RETRY;
 		}
 		break;
 	case UNIT_ATTENTION:
+		if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
+			/*
+			 * LUN Not Accessible - ALUA state transition
+			 */
+			alua_handle_state_transition(sdev);
+			return NEEDS_RETRY;
+		}
 		if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
 			/*
 			 * Power On, Reset, or Bus Device Reset.
@@ -502,7 +514,8 @@ static int alua_tur(struct scsi_device *sdev)
 
 	retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
 				      ALUA_FAILOVER_RETRIES, &sense_hdr);
-	if (sense_hdr.sense_key == NOT_READY &&
+	if ((sense_hdr.sense_key == NOT_READY ||
+	     sense_hdr.sense_key == UNIT_ATTENTION) &&
 	    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
 		return SCSI_DH_RETRY;
 	else if (retval)
-- 
cgit v1.2.3


From 9fad9d560af5c654bb38e0b07ee54a4e9acdc5cd Mon Sep 17 00:00:00 2001
From: Justin Stitt <justinstitt@google.com>
Date: Wed, 8 May 2024 17:22:51 +0000
Subject: scsi: sr: Fix unintentional arithmetic wraparound

Running syzkaller with the newly reintroduced signed integer overflow
sanitizer produces this report:

[   65.194362] ------------[ cut here ]------------
[   65.197752] UBSAN: signed-integer-overflow in ../drivers/scsi/sr_ioctl.c:436:9
[   65.203607] -2147483648 * 177 cannot be represented in type 'int'
[   65.207911] CPU: 2 PID: 10416 Comm: syz-executor.1 Not tainted 6.8.0-rc2-00035-gb3ef86b5a957 #1
[   65.213585] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[   65.219923] Call Trace:
[   65.221556]  <TASK>
[   65.223029]  dump_stack_lvl+0x93/0xd0
[   65.225573]  handle_overflow+0x171/0x1b0
[   65.228219]  sr_select_speed+0xeb/0xf0
[   65.230786]  ? __pm_runtime_resume+0xe6/0x130
[   65.233606]  sr_block_ioctl+0x15d/0x1d0
...

Historically, the signed integer overflow sanitizer did not work in the
kernel due to its interaction with `-fwrapv` but this has since been
changed [1] in the newest version of Clang. It was re-enabled in the kernel
with Commit 557f8c582a9b ("ubsan: Reintroduce signed overflow sanitizer").

Firstly, let's change the type of "speed" to unsigned long as
sr_select_speed()'s only caller passes in an unsigned long anyways.

$ git grep '\.select_speed'
|	drivers/scsi/sr.c:      .select_speed           = sr_select_speed,
...
|	static int cdrom_ioctl_select_speed(struct cdrom_device_info *cdi,
|	                unsigned long arg)
|	{
|	        ...
|	        return cdi->ops->select_speed(cdi, arg);
|	}

Next, let's add an extra check to make sure we don't exceed 0xffff/177
(350) since 0xffff is the max speed. This has two benefits: 1) we deal
with integer overflow before it happens and 2) we properly respect the
max speed of 0xffff. There are some "magic" numbers here but I did not
want to change more than what was necessary.

Link: https://github.com/llvm/llvm-project/pull/82432 [1]
Closes: https://github.com/KSPP/linux/issues/357
Cc: linux-hardening@vger.kernel.org
Signed-off-by: Justin Stitt <justinstitt@google.com>
Link: https://lore.kernel.org/r/20240508-b4-b4-sio-sr_select_speed-v2-1-00b68f724290@google.com
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 Documentation/cdrom/cdrom-standard.rst | 4 ++--
 drivers/scsi/sr.h                      | 2 +-
 drivers/scsi/sr_ioctl.c                | 5 ++++-
 include/linux/cdrom.h                  | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst
index 7964fe134277..6c1303cff159 100644
--- a/Documentation/cdrom/cdrom-standard.rst
+++ b/Documentation/cdrom/cdrom-standard.rst
@@ -217,7 +217,7 @@ current *struct* is::
 		int (*media_changed)(struct cdrom_device_info *, int);
 		int (*tray_move)(struct cdrom_device_info *, int);
 		int (*lock_door)(struct cdrom_device_info *, int);
-		int (*select_speed)(struct cdrom_device_info *, int);
+		int (*select_speed)(struct cdrom_device_info *, unsigned long);
 		int (*get_last_session) (struct cdrom_device_info *,
 					 struct cdrom_multisession *);
 		int (*get_mcn)(struct cdrom_device_info *, struct cdrom_mcn *);
@@ -396,7 +396,7 @@ action need be taken, and the return value should be 0.
 
 ::
 
-	int select_speed(struct cdrom_device_info *cdi, int speed)
+	int select_speed(struct cdrom_device_info *cdi, unsigned long speed)
 
 Some CD-ROM drives are capable of changing their head-speed. There
 are several reasons for changing the speed of a CD-ROM drive. Badly
diff --git a/drivers/scsi/sr.h b/drivers/scsi/sr.h
index 1175f2e213b5..dc899277b3a4 100644
--- a/drivers/scsi/sr.h
+++ b/drivers/scsi/sr.h
@@ -65,7 +65,7 @@ int sr_disk_status(struct cdrom_device_info *);
 int sr_get_last_session(struct cdrom_device_info *, struct cdrom_multisession *);
 int sr_get_mcn(struct cdrom_device_info *, struct cdrom_mcn *);
 int sr_reset(struct cdrom_device_info *);
-int sr_select_speed(struct cdrom_device_info *cdi, int speed);
+int sr_select_speed(struct cdrom_device_info *cdi, unsigned long speed);
 int sr_audio_ioctl(struct cdrom_device_info *, unsigned int, void *);
 
 int sr_is_xa(Scsi_CD *);
diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c
index 5b0b35e60e61..a0d2556a27bb 100644
--- a/drivers/scsi/sr_ioctl.c
+++ b/drivers/scsi/sr_ioctl.c
@@ -425,11 +425,14 @@ int sr_reset(struct cdrom_device_info *cdi)
 	return 0;
 }
 
-int sr_select_speed(struct cdrom_device_info *cdi, int speed)
+int sr_select_speed(struct cdrom_device_info *cdi, unsigned long speed)
 {
 	Scsi_CD *cd = cdi->handle;
 	struct packet_command cgc;
 
+	/* avoid exceeding the max speed or overflowing integer bounds */
+	speed = clamp(0, speed, 0xffff / 177);
+
 	if (speed == 0)
 		speed = 0xffff;	/* set to max */
 	else
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index 98c6fd0b39b6..fdfb61ccf55a 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -77,7 +77,7 @@ struct cdrom_device_ops {
 				      unsigned int clearing, int slot);
 	int (*tray_move) (struct cdrom_device_info *, int);
 	int (*lock_door) (struct cdrom_device_info *, int);
-	int (*select_speed) (struct cdrom_device_info *, int);
+	int (*select_speed) (struct cdrom_device_info *, unsigned long);
 	int (*get_last_session) (struct cdrom_device_info *,
 				 struct cdrom_multisession *);
 	int (*get_mcn) (struct cdrom_device_info *,
-- 
cgit v1.2.3


From 51071f0831ea975fc045526dd7e17efe669dc6e1 Mon Sep 17 00:00:00 2001
From: Saurav Kashyap <skashyap@marvell.com>
Date: Wed, 15 May 2024 14:40:59 +0530
Subject: scsi: qedf: Don't process stag work during unload and recovery

Stag work can cause issues during unload and recovery, hence don't process
it.

Signed-off-by: Saurav Kashyap <skashyap@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Link: https://lore.kernel.org/r/20240515091101.18754-2-skashyap@marvell.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_main.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index fd12439cbaab..c457caae07b3 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -3997,6 +3997,22 @@ void qedf_stag_change_work(struct work_struct *work)
 	struct qedf_ctx *qedf =
 	    container_of(work, struct qedf_ctx, stag_work.work);
 
+	if (!qedf) {
+		QEDF_ERR(&qedf->dbg_ctx, "qedf is NULL");
+		return;
+	}
+
+	if (test_bit(QEDF_IN_RECOVERY, &qedf->flags)) {
+		QEDF_ERR(&qedf->dbg_ctx,
+			 "Already is in recovery, hence not calling software context reset.\n");
+		return;
+	}
+
+	if (test_bit(QEDF_UNLOADING, &qedf->flags)) {
+		QEDF_ERR(&qedf->dbg_ctx, "Driver unloading\n");
+		return;
+	}
+
 	printk_ratelimited("[%s]:[%s:%d]:%d: Performing software context reset.",
 			dev_name(&qedf->pdev->dev), __func__, __LINE__,
 			qedf->dbg_ctx.host_no);
-- 
cgit v1.2.3


From 78e88472b60936025b83eba57cffa59d3501dc07 Mon Sep 17 00:00:00 2001
From: Saurav Kashyap <skashyap@marvell.com>
Date: Wed, 15 May 2024 14:41:00 +0530
Subject: scsi: qedf: Wait for stag work during unload

If stag work is already scheduled and unload is called, it can lead to
issues as unload cleans up the work element. Wait for stag work to get
completed before cleanup during unload.

Signed-off-by: Saurav Kashyap <skashyap@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Link: https://lore.kernel.org/r/20240515091101.18754-3-skashyap@marvell.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf.h      |  1 +
 drivers/scsi/qedf/qedf_main.c | 30 +++++++++++++++++++++++++++---
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h
index 5058e01b65a2..98afdfe63600 100644
--- a/drivers/scsi/qedf/qedf.h
+++ b/drivers/scsi/qedf/qedf.h
@@ -363,6 +363,7 @@ struct qedf_ctx {
 #define QEDF_IN_RECOVERY		5
 #define QEDF_DBG_STOP_IO		6
 #define QEDF_PROBING			8
+#define QEDF_STAG_IN_PROGRESS		9
 	unsigned long flags; /* Miscellaneous state flags */
 	int fipvlan_retries;
 	u8 num_queues;
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index c457caae07b3..eca0190d9a82 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -318,11 +318,18 @@ static struct fc_seq *qedf_elsct_send(struct fc_lport *lport, u32 did,
 	 */
 	if (resp == fc_lport_flogi_resp) {
 		qedf->flogi_cnt++;
+		qedf->flogi_pending++;
+
+		if (test_bit(QEDF_UNLOADING, &qedf->flags)) {
+			QEDF_ERR(&qedf->dbg_ctx, "Driver unloading\n");
+			qedf->flogi_pending = 0;
+		}
+
 		if (qedf->flogi_pending >= QEDF_FLOGI_RETRY_CNT) {
 			schedule_delayed_work(&qedf->stag_work, 2);
 			return NULL;
 		}
-		qedf->flogi_pending++;
+
 		return fc_elsct_send(lport, did, fp, op, qedf_flogi_resp,
 		    arg, timeout);
 	}
@@ -912,13 +919,14 @@ void qedf_ctx_soft_reset(struct fc_lport *lport)
 	struct qedf_ctx *qedf;
 	struct qed_link_output if_link;
 
+	qedf = lport_priv(lport);
+
 	if (lport->vport) {
+		clear_bit(QEDF_STAG_IN_PROGRESS, &qedf->flags);
 		printk_ratelimited("Cannot issue host reset on NPIV port.\n");
 		return;
 	}
 
-	qedf = lport_priv(lport);
-
 	qedf->flogi_pending = 0;
 	/* For host reset, essentially do a soft link up/down */
 	atomic_set(&qedf->link_state, QEDF_LINK_DOWN);
@@ -938,6 +946,7 @@ void qedf_ctx_soft_reset(struct fc_lport *lport)
 	if (!if_link.link_up) {
 		QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC,
 			  "Physical link is not up.\n");
+		clear_bit(QEDF_STAG_IN_PROGRESS, &qedf->flags);
 		return;
 	}
 	/* Flush and wait to make sure link down is processed */
@@ -950,6 +959,7 @@ void qedf_ctx_soft_reset(struct fc_lport *lport)
 		  "Queue link up work.\n");
 	queue_delayed_work(qedf->link_update_wq, &qedf->link_update,
 	    0);
+	clear_bit(QEDF_STAG_IN_PROGRESS, &qedf->flags);
 }
 
 /* Reset the host by gracefully logging out and then logging back in */
@@ -3721,6 +3731,7 @@ static void __qedf_remove(struct pci_dev *pdev, int mode)
 {
 	struct qedf_ctx *qedf;
 	int rc;
+	int cnt = 0;
 
 	if (!pdev) {
 		QEDF_ERR(NULL, "pdev is NULL.\n");
@@ -3738,6 +3749,17 @@ static void __qedf_remove(struct pci_dev *pdev, int mode)
 		return;
 	}
 
+stag_in_prog:
+	if (test_bit(QEDF_STAG_IN_PROGRESS, &qedf->flags)) {
+		QEDF_ERR(&qedf->dbg_ctx, "Stag in progress, cnt=%d.\n", cnt);
+		cnt++;
+
+		if (cnt < 5) {
+			msleep(500);
+			goto stag_in_prog;
+		}
+	}
+
 	if (mode != QEDF_MODE_RECOVERY)
 		set_bit(QEDF_UNLOADING, &qedf->flags);
 
@@ -4013,6 +4035,8 @@ void qedf_stag_change_work(struct work_struct *work)
 		return;
 	}
 
+	set_bit(QEDF_STAG_IN_PROGRESS, &qedf->flags);
+
 	printk_ratelimited("[%s]:[%s:%d]:%d: Performing software context reset.",
 			dev_name(&qedf->pdev->dev), __func__, __LINE__,
 			qedf->dbg_ctx.host_no);
-- 
cgit v1.2.3


From 6c3bb589debd763dc4b94803ddf3c13b4fcca776 Mon Sep 17 00:00:00 2001
From: Saurav Kashyap <skashyap@marvell.com>
Date: Wed, 15 May 2024 14:41:01 +0530
Subject: scsi: qedf: Set qed_slowpath_params to zero before use

Zero qed_slowpath_params before use.

Signed-off-by: Saurav Kashyap <skashyap@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Link: https://lore.kernel.org/r/20240515091101.18754-4-skashyap@marvell.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index eca0190d9a82..49adddf978cc 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -3473,6 +3473,7 @@ retry_probe:
 	}
 
 	/* Start the Slowpath-process */
+	memset(&slowpath_params, 0, sizeof(struct qed_slowpath_params));
 	slowpath_params.int_mode = QED_INT_MODE_MSIX;
 	slowpath_params.drv_major = QEDF_DRIVER_MAJOR_VER;
 	slowpath_params.drv_minor = QEDF_DRIVER_MINOR_VER;
-- 
cgit v1.2.3


From e4f5f8298cf6ddae43210d236ad65ac2c6379559 Mon Sep 17 00:00:00 2001
From: Deming Wang <wangdeming@inspur.com>
Date: Mon, 13 May 2024 07:59:56 -0400
Subject: scsi: mpt3sas: Add missing kerneldoc parameter descriptions

Add missing kerneldoc parameter descriptions to _scsih_set_debug_level().

Signed-off-by: Deming Wang <wangdeming@inspur.com>
Link: https://lore.kernel.org/r/20240513115956.1576-1-wangdeming@inspur.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 89ef43a5ef86..12d08d8ba538 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -302,8 +302,8 @@ struct _scsi_io_transfer {
 
 /**
  * _scsih_set_debug_level - global setting of ioc->logging_level.
- * @val: ?
- * @kp: ?
+ * @val: value of the parameter to be set
+ * @kp: pointer to kernel_param structure
  *
  * Note: The logging levels are defined in mpt3sas_debug.h.
  */
-- 
cgit v1.2.3


From 637c435f08ea7e77c53a2ad590b651d0de225e3b Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Thu, 16 May 2024 11:30:41 +0300
Subject: wifi: ath11k: Fix error path in ath11k_pcic_ext_irq_config

If one of the dummy allocation fails in ath11k_pcic_ext_irq_config(),
the previous allocated devices might leak due to returning without
deallocating the devices.

Instead of returning on the error path, deallocate all the previously
allocated net_devices and then return.

Fixes: bca592ead825 ("wifi: ath11k: allocate dummy net_device dynamically")
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
Link: https://msgid.link/20240508185902.70975-1-leitao@debian.org
---
 drivers/net/wireless/ath/ath11k/pcic.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/pcic.c b/drivers/net/wireless/ath/ath11k/pcic.c
index 79eb3f9c902f..debe7c5919ef 100644
--- a/drivers/net/wireless/ath/ath11k/pcic.c
+++ b/drivers/net/wireless/ath/ath11k/pcic.c
@@ -561,6 +561,7 @@ static int ath11k_pcic_ext_irq_config(struct ath11k_base *ab)
 {
 	int i, j, n, ret, num_vectors = 0;
 	u32 user_base_data = 0, base_vector = 0;
+	struct ath11k_ext_irq_grp *irq_grp;
 	unsigned long irq_flags;
 
 	ret = ath11k_pcic_get_user_msi_assignment(ab, "DP", &num_vectors,
@@ -574,14 +575,16 @@ static int ath11k_pcic_ext_irq_config(struct ath11k_base *ab)
 		irq_flags |= IRQF_NOBALANCING;
 
 	for (i = 0; i < ATH11K_EXT_IRQ_GRP_NUM_MAX; i++) {
-		struct ath11k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i];
+		irq_grp = &ab->ext_irq_grp[i];
 		u32 num_irq = 0;
 
 		irq_grp->ab = ab;
 		irq_grp->grp_id = i;
 		irq_grp->napi_ndev = alloc_netdev_dummy(0);
-		if (!irq_grp->napi_ndev)
-			return -ENOMEM;
+		if (!irq_grp->napi_ndev) {
+			ret = -ENOMEM;
+			goto fail_allocate;
+		}
 
 		netif_napi_add(irq_grp->napi_ndev, &irq_grp->napi,
 			       ath11k_pcic_ext_grp_napi_poll);
@@ -606,11 +609,8 @@ static int ath11k_pcic_ext_irq_config(struct ath11k_base *ab)
 			int irq = ath11k_pcic_get_msi_irq(ab, vector);
 
 			if (irq < 0) {
-				for (n = 0; n <= i; n++) {
-					irq_grp = &ab->ext_irq_grp[n];
-					free_netdev(irq_grp->napi_ndev);
-				}
-				return irq;
+				ret = irq;
+				goto fail_irq;
 			}
 
 			ab->irq_num[irq_idx] = irq;
@@ -635,6 +635,15 @@ static int ath11k_pcic_ext_irq_config(struct ath11k_base *ab)
 	}
 
 	return 0;
+fail_irq:
+	/* i ->napi_ndev was properly allocated. Free it also */
+	i += 1;
+fail_allocate:
+	for (n = 0; n < i; n++) {
+		irq_grp = &ab->ext_irq_grp[n];
+		free_netdev(irq_grp->napi_ndev);
+	}
+	return ret;
 }
 
 int ath11k_pcic_config_irq(struct ath11k_base *ab)
-- 
cgit v1.2.3


From dc21c6cc3d6986d938efbf95de62473982c98dec Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 15 May 2024 13:23:39 +0000
Subject: netfilter: nfnetlink_queue: acquire rcu_read_lock() in
 instance_destroy_rcu()

syzbot reported that nf_reinject() could be called without rcu_read_lock() :

WARNING: suspicious RCU usage
6.9.0-rc7-syzkaller-02060-g5c1672705a1a #0 Not tainted

net/netfilter/nfnetlink_queue.c:263 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
2 locks held by syz-executor.4/13427:
  #0: ffffffff8e334f60 (rcu_callback){....}-{0:0}, at: rcu_lock_acquire include/linux/rcupdate.h:329 [inline]
  #0: ffffffff8e334f60 (rcu_callback){....}-{0:0}, at: rcu_do_batch kernel/rcu/tree.c:2190 [inline]
  #0: ffffffff8e334f60 (rcu_callback){....}-{0:0}, at: rcu_core+0xa86/0x1830 kernel/rcu/tree.c:2471
  #1: ffff88801ca92958 (&inst->lock){+.-.}-{2:2}, at: spin_lock_bh include/linux/spinlock.h:356 [inline]
  #1: ffff88801ca92958 (&inst->lock){+.-.}-{2:2}, at: nfqnl_flush net/netfilter/nfnetlink_queue.c:405 [inline]
  #1: ffff88801ca92958 (&inst->lock){+.-.}-{2:2}, at: instance_destroy_rcu+0x30/0x220 net/netfilter/nfnetlink_queue.c:172

stack backtrace:
CPU: 0 PID: 13427 Comm: syz-executor.4 Not tainted 6.9.0-rc7-syzkaller-02060-g5c1672705a1a #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/02/2024
Call Trace:
 <IRQ>
  __dump_stack lib/dump_stack.c:88 [inline]
  dump_stack_lvl+0x241/0x360 lib/dump_stack.c:114
  lockdep_rcu_suspicious+0x221/0x340 kernel/locking/lockdep.c:6712
  nf_reinject net/netfilter/nfnetlink_queue.c:323 [inline]
  nfqnl_reinject+0x6ec/0x1120 net/netfilter/nfnetlink_queue.c:397
  nfqnl_flush net/netfilter/nfnetlink_queue.c:410 [inline]
  instance_destroy_rcu+0x1ae/0x220 net/netfilter/nfnetlink_queue.c:172
  rcu_do_batch kernel/rcu/tree.c:2196 [inline]
  rcu_core+0xafd/0x1830 kernel/rcu/tree.c:2471
  handle_softirqs+0x2d6/0x990 kernel/softirq.c:554
  __do_softirq kernel/softirq.c:588 [inline]
  invoke_softirq kernel/softirq.c:428 [inline]
  __irq_exit_rcu+0xf4/0x1c0 kernel/softirq.c:637
  irq_exit_rcu+0x9/0x30 kernel/softirq.c:649
  instr_sysvec_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1043 [inline]
  sysvec_apic_timer_interrupt+0xa6/0xc0 arch/x86/kernel/apic/apic.c:1043
 </IRQ>
 <TASK>

Fixes: 9872bec773c2 ("[NETFILTER]: nfnetlink: use RCU for queue instances hash")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_queue.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 00f4bd21c59b..f1c31757e496 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -169,7 +169,9 @@ instance_destroy_rcu(struct rcu_head *head)
 	struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
 						   rcu);
 
+	rcu_read_lock();
 	nfqnl_flush(inst, NULL, 0);
+	rcu_read_unlock();
 	kfree(inst);
 	module_put(THIS_MODULE);
 }
-- 
cgit v1.2.3


From 21ae74e1bf18331ae5e279bd96304b3630828009 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Fri, 17 May 2024 10:00:28 +0300
Subject: wifi: ath10k: fix QCOM_RPROC_COMMON dependency

If ath10k_snoc is built-in, while Qualcomm remoteprocs are built as
modules, compilation fails with:

/usr/bin/aarch64-linux-gnu-ld: drivers/net/wireless/ath/ath10k/snoc.o: in function `ath10k_modem_init':
drivers/net/wireless/ath/ath10k/snoc.c:1534: undefined reference to `qcom_register_ssr_notifier'
/usr/bin/aarch64-linux-gnu-ld: drivers/net/wireless/ath/ath10k/snoc.o: in function `ath10k_modem_deinit':
drivers/net/wireless/ath/ath10k/snoc.c:1551: undefined reference to `qcom_unregister_ssr_notifier'

Add corresponding dependency to ATH10K_SNOC Kconfig entry so that it's
built as module if QCOM_RPROC_COMMON is built as module too.

Fixes: 747ff7d3d742 ("ath10k: Don't always treat modem stop events as crashes")
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
Link: https://msgid.link/20240511-ath10k-snoc-dep-v1-1-9666e3af5c27@linaro.org
---
 drivers/net/wireless/ath/ath10k/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/ath/ath10k/Kconfig b/drivers/net/wireless/ath/ath10k/Kconfig
index e6ea884cafc1..4f385f4a8cef 100644
--- a/drivers/net/wireless/ath/ath10k/Kconfig
+++ b/drivers/net/wireless/ath/ath10k/Kconfig
@@ -45,6 +45,7 @@ config ATH10K_SNOC
 	depends on ATH10K
 	depends on ARCH_QCOM || COMPILE_TEST
 	depends on QCOM_SMEM
+	depends on QCOM_RPROC_COMMON || QCOM_RPROC_COMMON=n
 	select QCOM_SCM
 	select QCOM_QMI_HELPERS
 	help
-- 
cgit v1.2.3


From c1193d9bbbd379defe9be3c6de566de684de8a6f Mon Sep 17 00:00:00 2001
From: Alexander Maltsev <keltar.gw@gmail.com>
Date: Wed, 17 Apr 2024 18:51:41 +0500
Subject: netfilter: ipset: Add list flush to cancel_gc

Flushing list in cancel_gc drops references to other lists right away,
without waiting for RCU to destroy list. Fixes race when referenced
ipsets can't be destroyed while referring list is scheduled for destroy.

Fixes: 97f7cf1cd80e ("netfilter: ipset: fix performance regression in swap operation")
Signed-off-by: Alexander Maltsev <keltar.gw@gmail.com>
Acked-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_list_set.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 6c3f28bc59b3..54e2a1dd7f5f 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -549,6 +549,9 @@ list_set_cancel_gc(struct ip_set *set)
 
 	if (SET_WITH_TIMEOUT(set))
 		timer_shutdown_sync(&map->gc);
+
+	/* Flush list to drop references to other ipsets */
+	list_set_flush(set);
 }
 
 static const struct ip_set_type_variant set_variant = {
-- 
cgit v1.2.3


From aff5c01fa1284d606f8e7cbdaafeef2511bb46c1 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 9 May 2024 23:02:24 +0200
Subject: netfilter: nft_payload: restore vlan q-in-q match support

Revert f6ae9f120dad ("netfilter: nft_payload: add C-VLAN support").

f41f72d09ee1 ("netfilter: nft_payload: simplify vlan header handling")
already allows to match on inner vlan tags by subtract the vlan header
size to the payload offset which has been popped and stored in skbuff
metadata fields.

Fixes: f6ae9f120dad ("netfilter: nft_payload: add C-VLAN support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_payload.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 0a689c8e0295..a3cb5dbcb362 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -45,36 +45,27 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
 	int mac_off = skb_mac_header(skb) - skb->data;
 	u8 *vlanh, *dst_u8 = (u8 *) d;
 	struct vlan_ethhdr veth;
-	u8 vlan_hlen = 0;
-
-	if ((skb->protocol == htons(ETH_P_8021AD) ||
-	     skb->protocol == htons(ETH_P_8021Q)) &&
-	    offset >= VLAN_ETH_HLEN && offset < VLAN_ETH_HLEN + VLAN_HLEN)
-		vlan_hlen += VLAN_HLEN;
 
 	vlanh = (u8 *) &veth;
-	if (offset < VLAN_ETH_HLEN + vlan_hlen) {
+	if (offset < VLAN_ETH_HLEN) {
 		u8 ethlen = len;
 
-		if (vlan_hlen &&
-		    skb_copy_bits(skb, mac_off, &veth, VLAN_ETH_HLEN) < 0)
-			return false;
-		else if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth))
+		if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth))
 			return false;
 
-		if (offset + len > VLAN_ETH_HLEN + vlan_hlen)
-			ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen;
+		if (offset + len > VLAN_ETH_HLEN)
+			ethlen -= offset + len - VLAN_ETH_HLEN;
 
-		memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen);
+		memcpy(dst_u8, vlanh + offset, ethlen);
 
 		len -= ethlen;
 		if (len == 0)
 			return true;
 
 		dst_u8 += ethlen;
-		offset = ETH_HLEN + vlan_hlen;
+		offset = ETH_HLEN;
 	} else {
-		offset -= VLAN_HLEN + vlan_hlen;
+		offset -= VLAN_HLEN;
 	}
 
 	return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
-- 
cgit v1.2.3


From e8dc41afca161b988e6d462f4d0803d247e22250 Mon Sep 17 00:00:00 2001
From: Shengjiu Wang <shengjiu.wang@nxp.com>
Date: Sat, 11 May 2024 10:55:25 +0800
Subject: pmdomain: imx: gpcv2: Add delay after power up handshake

AudioMix BLK-CTRL on i.MX8MP encountered an accessing register issue
after power up.

[    2.181035] Kernel panic - not syncing: Asynchronous SError Interrupt
[    2.181038] CPU: 1 PID: 48 Comm: kworker/u16:2 Not tainted 6.9.0-rc5-next-20240424-00003-g21cec88845c6 #171
[    2.181047] Hardware name: NXP i.MX8MPlus EVK board (DT)
[    2.181050] Workqueue: events_unbound deferred_probe_work_func
[    2.181064] Call trace:
[...]
[    2.181142]  arm64_serror_panic+0x6c/0x78
[    2.181149]  do_serror+0x3c/0x70
[    2.181157]  el1h_64_error_handler+0x30/0x48
[    2.181164]  el1h_64_error+0x64/0x68
[    2.181171]  clk_imx8mp_audiomix_runtime_resume+0x34/0x44
[    2.181183]  __genpd_runtime_resume+0x30/0x80
[    2.181195]  genpd_runtime_resume+0x110/0x244
[    2.181205]  __rpm_callback+0x48/0x1d8
[    2.181213]  rpm_callback+0x68/0x74
[    2.181224]  rpm_resume+0x468/0x6c0
[    2.181234]  __pm_runtime_resume+0x50/0x94
[    2.181243]  pm_runtime_get_suppliers+0x60/0x8c
[    2.181258]  __driver_probe_device+0x48/0x12c
[    2.181268]  driver_probe_device+0xd8/0x15c
[    2.181278]  __device_attach_driver+0xb8/0x134
[    2.181290]  bus_for_each_drv+0x84/0xe0
[    2.181302]  __device_attach+0x9c/0x188
[    2.181312]  device_initial_probe+0x14/0x20
[    2.181323]  bus_probe_device+0xac/0xb0
[    2.181334]  deferred_probe_work_func+0x88/0xc0
[    2.181344]  process_one_work+0x150/0x290
[    2.181357]  worker_thread+0x2f8/0x408
[    2.181370]  kthread+0x110/0x114
[    2.181381]  ret_from_fork+0x10/0x20
[    2.181391] SMP: stopping secondary CPUs

According to comments in power up handshake:

	/* request the ADB400 to power up */
	if (domain->bits.hskreq) {
		regmap_update_bits(domain->regmap, domain->regs->hsk,
				   domain->bits.hskreq, domain->bits.hskreq);

		/*
		 * ret = regmap_read_poll_timeout(domain->regmap, domain->regs->hsk, reg_val,
		 *				  (reg_val & domain->bits.hskack), 0,
		 *				  USEC_PER_MSEC);
		 * Technically we need the commented code to wait handshake. But that needs
		 * the BLK-CTL module BUS clk-en bit being set.
		 *
		 * There is a separate BLK-CTL module and we will have such a driver for it,
		 * that driver will set the BUS clk-en bit and handshake will be triggered
		 * automatically there. Just add a delay and suppose the handshake finish
		 * after that.
		 */
	}

The BLK-CTL module needs to add delay to wait for a handshake request finished.
For some BLK-CTL module (eg. AudioMix on i.MX8MP) doesn't have BUS clk-en
bit, it is better to add delay in this driver, as the BLK-CTL module doesn't
need to care about how it is powered up.

regmap_read_bypassed() is to make sure the above write IO transaction already
reaches target before udelay().

Fixes: 1496dd413b2e ("clk: imx: imx8mp: Add pm_runtime support for power saving")
Reported-by: Francesco Dolcini <francesco@dolcini.it>
Closes: https://lore.kernel.org/all/66293535.170a0220.21fe.a2e7@mx.google.com/
Suggested-by: Frank Li <frank.li@nxp.com>
Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
Tested-by: Adam Ford <aford173@gmail.com>
Tested-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Link: https://lore.kernel.org/r/1715396125-3724-1-git-send-email-shengjiu.wang@nxp.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/pmdomain/imx/gpcv2.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/pmdomain/imx/gpcv2.c b/drivers/pmdomain/imx/gpcv2.c
index 4b828d74a606..856eaac0ec14 100644
--- a/drivers/pmdomain/imx/gpcv2.c
+++ b/drivers/pmdomain/imx/gpcv2.c
@@ -393,6 +393,17 @@ static int imx_pgc_power_up(struct generic_pm_domain *genpd)
 		 * automatically there. Just add a delay and suppose the handshake finish
 		 * after that.
 		 */
+
+		/*
+		 * For some BLK-CTL module (eg. AudioMix on i.MX8MP) doesn't have BUS
+		 * clk-en bit, it is better to add delay here, as the BLK-CTL module
+		 * doesn't need to care about how it is powered up.
+		 *
+		 * regmap_read_bypassed() is to make sure the above write IO transaction
+		 * already reaches target before udelay()
+		 */
+		regmap_read_bypassed(domain->regmap, domain->regs->hsk, &reg_val);
+		udelay(5);
 	}
 
 	/* Disable reset clocks for all devices in the domain */
-- 
cgit v1.2.3


From 25460d6f39024cc3b8241b14c7ccf0d6f11a736a Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Date: Mon, 8 Apr 2024 07:10:39 -0700
Subject: net/9p: fix uninit-value in p9_client_rpc()

Syzbot with the help of KMSAN reported the following error:

BUG: KMSAN: uninit-value in trace_9p_client_res include/trace/events/9p.h:146 [inline]
BUG: KMSAN: uninit-value in p9_client_rpc+0x1314/0x1340 net/9p/client.c:754
 trace_9p_client_res include/trace/events/9p.h:146 [inline]
 p9_client_rpc+0x1314/0x1340 net/9p/client.c:754
 p9_client_create+0x1551/0x1ff0 net/9p/client.c:1031
 v9fs_session_init+0x1b9/0x28e0 fs/9p/v9fs.c:410
 v9fs_mount+0xe2/0x12b0 fs/9p/vfs_super.c:122
 legacy_get_tree+0x114/0x290 fs/fs_context.c:662
 vfs_get_tree+0xa7/0x570 fs/super.c:1797
 do_new_mount+0x71f/0x15e0 fs/namespace.c:3352
 path_mount+0x742/0x1f20 fs/namespace.c:3679
 do_mount fs/namespace.c:3692 [inline]
 __do_sys_mount fs/namespace.c:3898 [inline]
 __se_sys_mount+0x725/0x810 fs/namespace.c:3875
 __x64_sys_mount+0xe4/0x150 fs/namespace.c:3875
 do_syscall_64+0xd5/0x1f0
 entry_SYSCALL_64_after_hwframe+0x6d/0x75

Uninit was created at:
 __alloc_pages+0x9d6/0xe70 mm/page_alloc.c:4598
 __alloc_pages_node include/linux/gfp.h:238 [inline]
 alloc_pages_node include/linux/gfp.h:261 [inline]
 alloc_slab_page mm/slub.c:2175 [inline]
 allocate_slab mm/slub.c:2338 [inline]
 new_slab+0x2de/0x1400 mm/slub.c:2391
 ___slab_alloc+0x1184/0x33d0 mm/slub.c:3525
 __slab_alloc mm/slub.c:3610 [inline]
 __slab_alloc_node mm/slub.c:3663 [inline]
 slab_alloc_node mm/slub.c:3835 [inline]
 kmem_cache_alloc+0x6d3/0xbe0 mm/slub.c:3852
 p9_tag_alloc net/9p/client.c:278 [inline]
 p9_client_prepare_req+0x20a/0x1770 net/9p/client.c:641
 p9_client_rpc+0x27e/0x1340 net/9p/client.c:688
 p9_client_create+0x1551/0x1ff0 net/9p/client.c:1031
 v9fs_session_init+0x1b9/0x28e0 fs/9p/v9fs.c:410
 v9fs_mount+0xe2/0x12b0 fs/9p/vfs_super.c:122
 legacy_get_tree+0x114/0x290 fs/fs_context.c:662
 vfs_get_tree+0xa7/0x570 fs/super.c:1797
 do_new_mount+0x71f/0x15e0 fs/namespace.c:3352
 path_mount+0x742/0x1f20 fs/namespace.c:3679
 do_mount fs/namespace.c:3692 [inline]
 __do_sys_mount fs/namespace.c:3898 [inline]
 __se_sys_mount+0x725/0x810 fs/namespace.c:3875
 __x64_sys_mount+0xe4/0x150 fs/namespace.c:3875
 do_syscall_64+0xd5/0x1f0
 entry_SYSCALL_64_after_hwframe+0x6d/0x75

If p9_check_errors() fails early in p9_client_rpc(), req->rc.tag
will not be properly initialized. However, trace_9p_client_res()
ends up trying to print it out anyway before p9_client_rpc()
finishes.

Fix this issue by assigning default values to p9_fcall fields
such as 'tag' and (just in case KMSAN unearths something new) 'id'
during the tag allocation stage.

Reported-and-tested-by: syzbot+ff14db38f56329ef68df@syzkaller.appspotmail.com
Fixes: 348b59012e5c ("net/9p: Convert net/9p protocol dumps to tracepoints")
Signed-off-by: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
Cc: stable@vger.kernel.org
Message-ID: <20240408141039.30428-1-n.zhandarovich@fintech.ru>
Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
---
 net/9p/client.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/9p/client.c b/net/9p/client.c
index f7e90b4769bb..b05f73c291b4 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -235,6 +235,8 @@ static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc,
 	if (!fc->sdata)
 		return -ENOMEM;
 	fc->capacity = alloc_msize;
+	fc->id = 0;
+	fc->tag = P9_NOTAG;
 	return 0;
 }
 
-- 
cgit v1.2.3


From d3a043733f25d743f3aa617c7f82dbcb5ee2211a Mon Sep 17 00:00:00 2001
From: Nilay Shroff <nilay@linux.ibm.com>
Date: Thu, 16 May 2024 17:43:51 +0530
Subject: nvme-multipath: find NUMA path only for online numa-node

In current native multipath design when a shared namespace is created,
we loop through each possible numa-node, calculate the NUMA distance of
that node from each nvme controller and then cache the optimal IO path
for future reference while sending IO. The issue with this design is that
we may refer to the NUMA distance table for an offline node which may not
be populated at the time and so we may inadvertently end up finding and
caching a non-optimal path for IO. Then latter when the corresponding
numa-node becomes online and hence the NUMA distance table entry for that
node is created, ideally we should re-calculate the multipath node distance
for the newly added node however that doesn't happen unless we rescan/reset
the controller. So essentially, we may keep using non-optimal IO path for a
node which is made online after namespace is created.
This patch helps fix this issue ensuring that when a shared namespace is
created, we calculate the multipath node distance for each online numa-node
instead of each possible numa-node. Then latter when a node becomes online
and we receive any IO on that newly added node, we would calculate the
multipath node distance for newly added node but this time NUMA distance
table would have been already populated for newly added node. Hence we
would be able to correctly calculate the multipath node distance and choose
the optimal path for the IO.

Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/multipath.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index d16e976ae1a4..9c1e135b8df3 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -595,7 +595,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
 		int node, srcu_idx;
 
 		srcu_idx = srcu_read_lock(&head->srcu);
-		for_each_node(node)
+		for_each_online_node(node)
 			__nvme_find_path(head, node);
 		srcu_read_unlock(&head->srcu, srcu_idx);
 	}
-- 
cgit v1.2.3


From 8d00547ea8754afdc4a550af2fb7af2e3ba93cf8 Mon Sep 17 00:00:00 2001
From: Xu Kuohai <xukuohai@huaweicloud.com>
Date: Thu, 16 May 2024 10:09:28 +0800
Subject: MAINTAINERS: Add myself as reviewer of ARM64 BPF JIT

I am working on ARM64 BPF JIT for a while, hence add myself
as reviewer.

Signed-off-by: Xu Kuohai <xukuohai@huaweicloud.com>
Acked-by: Hengqi Chen <hengqi.chen@gmail.com>
Link: https://lore.kernel.org/r/20240516020928.156125-1-xukuohai@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3fdc3b09c171..5e279e9ff63f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3816,6 +3816,7 @@ BPF JIT for ARM64
 M:	Daniel Borkmann <daniel@iogearbox.net>
 M:	Alexei Starovoitov <ast@kernel.org>
 M:	Puranjay Mohan <puranjay@kernel.org>
+R:	Xu Kuohai <xukuohai@huaweicloud.com>
 L:	bpf@vger.kernel.org
 S:	Supported
 F:	arch/arm64/net/
-- 
cgit v1.2.3


From 5134acb15d9ef27aa2b90aad46d4e89fcef79fdc Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Wed, 22 May 2024 10:32:43 -0700
Subject: efi/libstub: zboot.lds: Discard .discard sections

When building ARCH=loongarch defconfig + CONFIG_UNWINDER_ORC=y using
LLVM, there is a warning from ld.lld when linking the EFI zboot image
due to the use of unreachable() in number() in vsprintf.c:

  ld.lld: warning: drivers/firmware/efi/libstub/lib.a(vsprintf.stub.o):(.discard.unreachable+0x0): has non-ABS relocation R_LARCH_32_PCREL against symbol ''

If the compiler cannot eliminate the default case for any reason, the
.discard.unreachable section will remain in the final binary but the
entire point of any section prefixed with .discard is that it is only
used at compile time, so it can be discarded via /DISCARD/ in a linker
script. The asm-generic vmlinux.lds.h includes .discard and .discard.*
in the COMMON_DISCARDS macro but that is not used for zboot.lds, as it
is not a kernel image linker script.

Add .discard and .discard.* to /DISCARD/ in zboot.lds, so that any
sections meant to be discarded at link time are not included in the
final zboot image. This issue is not specific to LoongArch, it is just
the first architecture to select CONFIG_OBJTOOL, which defines
annotate_unreachable() as an asm statement to add the
.discard.unreachable section, and use the EFI stub.

Closes: https://github.com/ClangBuiltLinux/linux/issues/2023
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Huacai Chen <chenhuacai@loongson.cn>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/libstub/zboot.lds | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/firmware/efi/libstub/zboot.lds b/drivers/firmware/efi/libstub/zboot.lds
index ac8c0ef85158..af2c82f7bd90 100644
--- a/drivers/firmware/efi/libstub/zboot.lds
+++ b/drivers/firmware/efi/libstub/zboot.lds
@@ -41,6 +41,7 @@ SECTIONS
 	}
 
 	/DISCARD/ : {
+		*(.discard .discard.*)
 		*(.modinfo .init.modinfo)
 	}
 }
-- 
cgit v1.2.3


From 7c23b186ab892088f76a3ad9dbff1685ffe2e832 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Date: Sun, 19 May 2024 13:33:53 -0300
Subject: efi: pstore: Return proper errors on UEFI failures

Right now efi-pstore either returns 0 (success) or -EIO; but we
do have a function to convert UEFI errors in different standard
error codes, helping to narrow down potential issues more accurately.

So, let's use this helper here.

Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/efi-pstore.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 5b9dc26e6bcb..552c78f5f059 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -136,7 +136,7 @@ static int efi_pstore_read_func(struct pstore_record *record,
 				     &size, record->buf);
 	if (status != EFI_SUCCESS) {
 		kfree(record->buf);
-		return -EIO;
+		return efi_status_to_err(status);
 	}
 
 	/*
@@ -189,7 +189,7 @@ static ssize_t efi_pstore_read(struct pstore_record *record)
 			return 0;
 
 		if (status != EFI_SUCCESS)
-			return -EIO;
+			return efi_status_to_err(status);
 
 		/* skip variables that don't concern us */
 		if (efi_guidcmp(guid, LINUX_EFI_CRASH_GUID))
@@ -227,7 +227,7 @@ static int efi_pstore_write(struct pstore_record *record)
 					    record->size, record->psi->buf,
 					    true);
 	efivar_unlock();
-	return status == EFI_SUCCESS ? 0 : -EIO;
+	return efi_status_to_err(status);
 };
 
 static int efi_pstore_erase(struct pstore_record *record)
@@ -238,7 +238,7 @@ static int efi_pstore_erase(struct pstore_record *record)
 				     PSTORE_EFI_ATTRIBUTES, 0, NULL);
 
 	if (status != EFI_SUCCESS && status != EFI_NOT_FOUND)
-		return -EIO;
+		return efi_status_to_err(status);
 	return 0;
 }
 
-- 
cgit v1.2.3


From c898afdc15645efb555acb6d85b484eb40a45409 Mon Sep 17 00:00:00 2001
From: Dominique Martinet <asmadeus@codewreck.org>
Date: Tue, 21 May 2024 21:13:36 +0900
Subject: 9p: add missing locking around taking dentry fid list

Fix a use-after-free on dentry's d_fsdata fid list when a thread
looks up a fid through dentry while another thread unlinks it:

UAF thread:
refcount_t: addition on 0; use-after-free.
 p9_fid_get linux/./include/net/9p/client.h:262
 v9fs_fid_find+0x236/0x280 linux/fs/9p/fid.c:129
 v9fs_fid_lookup_with_uid linux/fs/9p/fid.c:181
 v9fs_fid_lookup+0xbf/0xc20 linux/fs/9p/fid.c:314
 v9fs_vfs_getattr_dotl+0xf9/0x360 linux/fs/9p/vfs_inode_dotl.c:400
 vfs_statx+0xdd/0x4d0 linux/fs/stat.c:248

Freed by:
 p9_fid_destroy (inlined)
 p9_client_clunk+0xb0/0xe0 linux/net/9p/client.c:1456
 p9_fid_put linux/./include/net/9p/client.h:278
 v9fs_dentry_release+0xb5/0x140 linux/fs/9p/vfs_dentry.c:55
 v9fs_remove+0x38f/0x620 linux/fs/9p/vfs_inode.c:518
 vfs_unlink+0x29a/0x810 linux/fs/namei.c:4335

The problem is that d_fsdata was not accessed under d_lock, because
d_release() normally is only called once the dentry is otherwise no
longer accessible but since we also call it explicitly in v9fs_remove
that lock is required:
move the hlist out of the dentry under lock then unref its fids once
they are no longer accessible.

Fixes: 154372e67d40 ("fs/9p: fix create-unlink-getattr idiom")
Cc: stable@vger.kernel.org
Reported-by: Meysam Firouzi
Reported-by: Amirmohammad Eftekhar
Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
Message-ID: <20240521122947.1080227-1-asmadeus@codewreck.org>
Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
---
 fs/9p/vfs_dentry.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index f16f73581634..01338d4c2d9e 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -48,12 +48,17 @@ static int v9fs_cached_dentry_delete(const struct dentry *dentry)
 static void v9fs_dentry_release(struct dentry *dentry)
 {
 	struct hlist_node *p, *n;
+	struct hlist_head head;
 
 	p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n",
 		 dentry, dentry);
-	hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata)
+
+	spin_lock(&dentry->d_lock);
+	hlist_move_list((struct hlist_head *)&dentry->d_fsdata, &head);
+	spin_unlock(&dentry->d_lock);
+
+	hlist_for_each_safe(p, n, &head)
 		p9_fid_put(hlist_entry(p, struct p9_fid, dlist));
-	dentry->d_fsdata = NULL;
 }
 
 static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
-- 
cgit v1.2.3


From 9f788ba457b45b0ce422943fcec9fa35c4587764 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 22 May 2024 20:09:49 +0300
Subject: spi: Don't mark message DMA mapped when no transfer in it is

There is no need to set the DMA mapped flag of the message if it has
no mapped transfers. Moreover, it may give the code a chance to take
the wrong paths, i.e. to exercise DMA related APIs on unmapped data.
Make __spi_map_msg() to bail earlier on the above mentioned cases.

Fixes: 99adef310f68 ("spi: Provide core support for DMA mapping transfers")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://msgid.link/r/20240522171018.3362521-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index a8966caed841..d40ce0fdb1a8 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1243,6 +1243,7 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
 	else
 		rx_dev = ctlr->dev.parent;
 
+	ret = -ENOMSG;
 	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
 		/* The sync is done before each transfer. */
 		unsigned long attrs = DMA_ATTR_SKIP_CPU_SYNC;
@@ -1272,6 +1273,9 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
 			}
 		}
 	}
+	/* No transfer has been mapped, bail out with success */
+	if (ret)
+		return 0;
 
 	ctlr->cur_rx_dma_dev = rx_dev;
 	ctlr->cur_tx_dma_dev = tx_dev;
-- 
cgit v1.2.3


From da560097c05612f8d360f86528f6213629b9c395 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 22 May 2024 20:09:50 +0300
Subject: spi: Check if transfer is mapped before calling DMA sync APIs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The resent update to remove the orig_nents checks revealed
that not all DMA sync backends can cope with the unallocated
SG list, while supplying orig_nents == 0 (the commit 861370f49ce4
("iommu/dma: force bouncing if the size is not cacheline-aligned"),
for example, makes that happen for the IOMMU case). It means
we have to check if the buffers are DMA mapped before trying
to sync them. Re-introduce that check in a form of calling
->can_dma() in the same way as it's done in the DMA mapping loop
for the SPI transfers.

Reported-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Reported-by: Neil Armstrong <neil.armstrong@linaro.org>
Closes: https://lore.kernel.org/r/8ae675b5-fcf9-4c9b-b06a-4462f70e1322@linaro.org
Closes: https://lore.kernel.org/all/d3679496-2e4e-4a7c-97ed-f193bd53af1d@notapiano
Fixes: 8cc3bad9d9d6 ("spi: Remove unneded check for orig_nents")
Suggested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://msgid.link/r/20240522171018.3362521-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index d40ce0fdb1a8..b18a4c871e21 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1311,7 +1311,7 @@ static int __spi_unmap_msg(struct spi_controller *ctlr, struct spi_message *msg)
 	return 0;
 }
 
-static void spi_dma_sync_for_device(struct spi_controller *ctlr,
+static void spi_dma_sync_for_device(struct spi_controller *ctlr, struct spi_message *msg,
 				    struct spi_transfer *xfer)
 {
 	struct device *rx_dev = ctlr->cur_rx_dma_dev;
@@ -1320,11 +1320,14 @@ static void spi_dma_sync_for_device(struct spi_controller *ctlr,
 	if (!ctlr->cur_msg_mapped)
 		return;
 
+	if (!ctlr->can_dma(ctlr, msg->spi, xfer))
+		return;
+
 	dma_sync_sgtable_for_device(tx_dev, &xfer->tx_sg, DMA_TO_DEVICE);
 	dma_sync_sgtable_for_device(rx_dev, &xfer->rx_sg, DMA_FROM_DEVICE);
 }
 
-static void spi_dma_sync_for_cpu(struct spi_controller *ctlr,
+static void spi_dma_sync_for_cpu(struct spi_controller *ctlr, struct spi_message *msg,
 				 struct spi_transfer *xfer)
 {
 	struct device *rx_dev = ctlr->cur_rx_dma_dev;
@@ -1333,6 +1336,9 @@ static void spi_dma_sync_for_cpu(struct spi_controller *ctlr,
 	if (!ctlr->cur_msg_mapped)
 		return;
 
+	if (!ctlr->can_dma(ctlr, msg->spi, xfer))
+		return;
+
 	dma_sync_sgtable_for_cpu(rx_dev, &xfer->rx_sg, DMA_FROM_DEVICE);
 	dma_sync_sgtable_for_cpu(tx_dev, &xfer->tx_sg, DMA_TO_DEVICE);
 }
@@ -1350,11 +1356,13 @@ static inline int __spi_unmap_msg(struct spi_controller *ctlr,
 }
 
 static void spi_dma_sync_for_device(struct spi_controller *ctrl,
+				    struct spi_message *msg,
 				    struct spi_transfer *xfer)
 {
 }
 
 static void spi_dma_sync_for_cpu(struct spi_controller *ctrl,
+				 struct spi_message *msg,
 				 struct spi_transfer *xfer)
 {
 }
@@ -1626,10 +1634,10 @@ static int spi_transfer_one_message(struct spi_controller *ctlr,
 			reinit_completion(&ctlr->xfer_completion);
 
 fallback_pio:
-			spi_dma_sync_for_device(ctlr, xfer);
+			spi_dma_sync_for_device(ctlr, msg, xfer);
 			ret = ctlr->transfer_one(ctlr, msg->spi, xfer);
 			if (ret < 0) {
-				spi_dma_sync_for_cpu(ctlr, xfer);
+				spi_dma_sync_for_cpu(ctlr, msg, xfer);
 
 				if (ctlr->cur_msg_mapped &&
 				   (xfer->error & SPI_TRANS_FAIL_NO_START)) {
@@ -1654,7 +1662,7 @@ fallback_pio:
 					msg->status = ret;
 			}
 
-			spi_dma_sync_for_cpu(ctlr, xfer);
+			spi_dma_sync_for_cpu(ctlr, msg, xfer);
 		} else {
 			if (xfer->len)
 				dev_err(&msg->spi->dev,
-- 
cgit v1.2.3


From a827ad9b3c2fc243e058595533f91ce41a312527 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Thu, 23 May 2024 12:33:25 +0200
Subject: spi: stm32: Revert change that enabled controller before asserting CS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On stm32mp157 enabling the controller before asserting CS makes the
hardware trigger spurious interrupts in a tight loop and the transfers
fail. Revert the commit that swapped the order of enable and CS. This
reintroduces the problem that swapping was supposed to fix, which
however is less grave.

Reported-by: Leonard Göhrs <l.goehrs@pengutronix.de>
Link: https://lore.kernel.org/all/39033ed7-3e57-4339-80b4-fc8919e26aa7@pengutronix.de/
Fixes: 52b62e7a5d4f ("spi: stm32: enable controller before asserting CS")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://msgid.link/r/20240523103326.792907-2-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-stm32.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 4a68abcdcc35..e4e7ddb7524a 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -1016,8 +1016,10 @@ end_irq:
 static irqreturn_t stm32fx_spi_irq_thread(int irq, void *dev_id)
 {
 	struct spi_controller *ctrl = dev_id;
+	struct stm32_spi *spi = spi_controller_get_devdata(ctrl);
 
 	spi_finalize_current_transfer(ctrl);
+	stm32fx_spi_disable(spi);
 
 	return IRQ_HANDLED;
 }
@@ -1185,8 +1187,6 @@ static int stm32_spi_prepare_msg(struct spi_controller *ctrl,
 			 ~clrb) | setb,
 			spi->base + spi->cfg->regs->cpol.reg);
 
-	stm32_spi_enable(spi);
-
 	spin_unlock_irqrestore(&spi->lock, flags);
 
 	return 0;
@@ -1204,6 +1204,7 @@ static void stm32fx_spi_dma_tx_cb(void *data)
 
 	if (spi->cur_comm == SPI_SIMPLEX_TX || spi->cur_comm == SPI_3WIRE_TX) {
 		spi_finalize_current_transfer(spi->ctrl);
+		stm32fx_spi_disable(spi);
 	}
 }
 
@@ -1218,6 +1219,7 @@ static void stm32_spi_dma_rx_cb(void *data)
 	struct stm32_spi *spi = data;
 
 	spi_finalize_current_transfer(spi->ctrl);
+	spi->cfg->disable(spi);
 }
 
 /**
@@ -1305,6 +1307,8 @@ static int stm32fx_spi_transfer_one_irq(struct stm32_spi *spi)
 
 	stm32_spi_set_bits(spi, STM32FX_SPI_CR2, cr2);
 
+	stm32_spi_enable(spi);
+
 	/* starting data transfer when buffer is loaded */
 	if (spi->tx_buf)
 		spi->cfg->write_tx(spi);
@@ -1341,6 +1345,8 @@ static int stm32h7_spi_transfer_one_irq(struct stm32_spi *spi)
 
 	spin_lock_irqsave(&spi->lock, flags);
 
+	stm32_spi_enable(spi);
+
 	/* Be sure to have data in fifo before starting data transfer */
 	if (spi->tx_buf)
 		stm32h7_spi_write_txfifo(spi);
@@ -1372,6 +1378,8 @@ static void stm32fx_spi_transfer_one_dma_start(struct stm32_spi *spi)
 		 */
 		stm32_spi_set_bits(spi, STM32FX_SPI_CR2, STM32FX_SPI_CR2_ERRIE);
 	}
+
+	stm32_spi_enable(spi);
 }
 
 /**
@@ -1405,6 +1413,8 @@ static void stm32h7_spi_transfer_one_dma_start(struct stm32_spi *spi)
 
 	stm32_spi_set_bits(spi, STM32H7_SPI_IER, ier);
 
+	stm32_spi_enable(spi);
+
 	if (STM32_SPI_HOST_MODE(spi))
 		stm32_spi_set_bits(spi, STM32H7_SPI_CR1, STM32H7_SPI_CR1_CSTART);
 }
-- 
cgit v1.2.3


From e6722ea6b9ed731f7392277d76ca912dfffca7ee Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 6 Jan 2024 13:48:25 +0100
Subject: i2c: synquacer: Remove a clk reference from struct synquacer_i2c

'pclk' is only used locally in the probe. Remove it from the
'synquacer_i2c' structure.

Also remove a useless debug message.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
---
 drivers/i2c/busses/i2c-synquacer.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/i2c/busses/i2c-synquacer.c b/drivers/i2c/busses/i2c-synquacer.c
index 31ecb2c7e978..4eccbcd0fbfc 100644
--- a/drivers/i2c/busses/i2c-synquacer.c
+++ b/drivers/i2c/busses/i2c-synquacer.c
@@ -138,7 +138,6 @@ struct synquacer_i2c {
 	int			irq;
 	struct device		*dev;
 	void __iomem		*base;
-	struct clk		*pclk;
 	u32			pclkrate;
 	u32			speed_khz;
 	u32			timeout_ms;
@@ -535,6 +534,7 @@ static const struct i2c_adapter synquacer_i2c_ops = {
 static int synquacer_i2c_probe(struct platform_device *pdev)
 {
 	struct synquacer_i2c *i2c;
+	struct clk *pclk;
 	u32 bus_speed;
 	int ret;
 
@@ -550,13 +550,12 @@ static int synquacer_i2c_probe(struct platform_device *pdev)
 	device_property_read_u32(&pdev->dev, "socionext,pclk-rate",
 				 &i2c->pclkrate);
 
-	i2c->pclk = devm_clk_get_enabled(&pdev->dev, "pclk");
-	if (IS_ERR(i2c->pclk))
-		return dev_err_probe(&pdev->dev, PTR_ERR(i2c->pclk),
+	pclk = devm_clk_get_enabled(&pdev->dev, "pclk");
+	if (IS_ERR(pclk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(pclk),
 				     "failed to get and enable clock\n");
 
-	dev_dbg(&pdev->dev, "clock source %p\n", i2c->pclk);
-	i2c->pclkrate = clk_get_rate(i2c->pclk);
+	i2c->pclkrate = clk_get_rate(pclk);
 
 	if (i2c->pclkrate < SYNQUACER_I2C_MIN_CLK_RATE ||
 	    i2c->pclkrate > SYNQUACER_I2C_MAX_CLK_RATE)
-- 
cgit v1.2.3


From e61bcf42d290e73025bab38e0e55a5586c2d8ad5 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 15 Apr 2024 22:50:27 +0200
Subject: i2c: Remove I2C_CLASS_SPD

Remove this class after all users have been gone.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Andi Shyti <andi.shyti@kernel.org>
---
 include/linux/i2c.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 5e6cd43a6dbd..9709537370ee 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -852,7 +852,6 @@ static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap)
 
 /* i2c adapter classes (bitmask) */
 #define I2C_CLASS_HWMON		(1<<0)	/* lm_sensors, ... */
-#define I2C_CLASS_SPD		(1<<7)	/* Memory modules */
 /* Warn users that the adapter doesn't support classes anymore */
 #define I2C_CLASS_DEPRECATED	(1<<8)
 
-- 
cgit v1.2.3


From 2360497238261f17d4a3f6cbc02d6dbd8951c23c Mon Sep 17 00:00:00 2001
From: Zhang Lixu <lixu.zhang@intel.com>
Date: Thu, 23 May 2024 09:14:01 +0800
Subject: HID: intel-ish-hid: Fix build error for COMPILE_TEST

kernel test robot reported build error due to a pointer type mismatch:

  .../ishtp/loader.c:172:8: error: incompatible pointer types passing
  '__le64 *' (aka 'unsigned long long *') to parameter of type
  'dma_addr_t *' (aka 'unsigned int *')

The issue arises because the driver, which is primarily intended for
x86-64, is also built for i386 when COMPILE_TEST is enabled.

Resolve type mismatch by using a temporary dma_addr_t variable to hold
the DMA address. Populate this temporary variable in dma_alloc_coherent()
function, and then convert and store the address in the
fragment->fragment_tbl[i].ddr_adrs field in the correct format.
Similarly, convert the ddr_adrs field back to dma_addr_t when freeing
the DMA buffer with dma_free_coherent().

Fixes: 579a267e4617 ("HID: intel-ish-hid: Implement loading firmware from host feature")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202405201313.SAStVPrT-lkp@intel.com/
Signed-off-by: Zhang Lixu <lixu.zhang@intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 drivers/hid/intel-ish-hid/ishtp/loader.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp/loader.c b/drivers/hid/intel-ish-hid/ishtp/loader.c
index 993f8b390e57..2785b04a2f5a 100644
--- a/drivers/hid/intel-ish-hid/ishtp/loader.c
+++ b/drivers/hid/intel-ish-hid/ishtp/loader.c
@@ -138,12 +138,13 @@ static void release_dma_bufs(struct ishtp_device *dev,
 			     struct loader_xfer_dma_fragment *fragment,
 			     void **dma_bufs, u32 fragment_size)
 {
+	dma_addr_t dma_addr;
 	int i;
 
 	for (i = 0; i < FRAGMENT_MAX_NUM; i++) {
 		if (dma_bufs[i]) {
-			dma_free_coherent(dev->devc, fragment_size, dma_bufs[i],
-					  fragment->fragment_tbl[i].ddr_adrs);
+			dma_addr = le64_to_cpu(fragment->fragment_tbl[i].ddr_adrs);
+			dma_free_coherent(dev->devc, fragment_size, dma_bufs[i], dma_addr);
 			dma_bufs[i] = NULL;
 		}
 	}
@@ -164,15 +165,16 @@ static int prepare_dma_bufs(struct ishtp_device *dev,
 			    struct loader_xfer_dma_fragment *fragment,
 			    void **dma_bufs, u32 fragment_size)
 {
+	dma_addr_t dma_addr;
 	u32 offset = 0;
 	int i;
 
 	for (i = 0; i < fragment->fragment_cnt && offset < ish_fw->size; i++) {
-		dma_bufs[i] = dma_alloc_coherent(dev->devc, fragment_size,
-						 &fragment->fragment_tbl[i].ddr_adrs, GFP_KERNEL);
+		dma_bufs[i] = dma_alloc_coherent(dev->devc, fragment_size, &dma_addr, GFP_KERNEL);
 		if (!dma_bufs[i])
 			return -ENOMEM;
 
+		fragment->fragment_tbl[i].ddr_adrs = cpu_to_le64(dma_addr);
 		fragment->fragment_tbl[i].length = clamp(ish_fw->size - offset, 0, fragment_size);
 		fragment->fragment_tbl[i].fw_off = offset;
 		memcpy(dma_bufs[i], ish_fw->data + offset, fragment->fragment_tbl[i].length);
-- 
cgit v1.2.3


From 0a3f9f7fc59feb8a91a2793b8b60977895c72365 Mon Sep 17 00:00:00 2001
From: Chen Ni <nichen@iscas.ac.cn>
Date: Wed, 15 May 2024 11:30:51 +0800
Subject: HID: nvidia-shield: Add missing check for input_ff_create_memless

Add check for the return value of input_ff_create_memless() and return
the error if it fails in order to catch the error.

Fixes: 09308562d4af ("HID: nvidia-shield: Initial driver implementation with Thunderstrike support")
Signed-off-by: Chen Ni <nichen@iscas.ac.cn>
Reviewed-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 drivers/hid/hid-nvidia-shield.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c
index 58b15750dbb0..ff9078ad1961 100644
--- a/drivers/hid/hid-nvidia-shield.c
+++ b/drivers/hid/hid-nvidia-shield.c
@@ -283,7 +283,9 @@ static struct input_dev *shield_haptics_create(
 		return haptics;
 
 	input_set_capability(haptics, EV_FF, FF_RUMBLE);
-	input_ff_create_memless(haptics, NULL, play_effect);
+	ret = input_ff_create_memless(haptics, NULL, play_effect);
+	if (ret)
+		goto err;
 
 	ret = input_register_device(haptics);
 	if (ret)
-- 
cgit v1.2.3


From ed281c6ab6eb8a914f06c74dfeaebde15b34a3f4 Mon Sep 17 00:00:00 2001
From: Carl Huang <quic_cjhuang@quicinc.com>
Date: Tue, 21 May 2024 11:08:10 +0300
Subject: wifi: ath11k: fix WCN6750 firmware crash caused by 17 num_vdevs

WCN6750 firmware crashes because of num_vdevs changed from 4 to 17
in ath11k_init_wmi_config_qca6390() as the ab->hw_params.num_vdevs
is 17. This is caused by commit f019f4dff2e4 ("wifi: ath11k: support
2 station interfaces") which assigns ab->hw_params.num_vdevs directly
to config->num_vdevs in ath11k_init_wmi_config_qca6390(), therefore
WCN6750 firmware crashes as it can't support such a big num_vdevs.

Fix it by assign 3 to num_vdevs in hw_params for WCN6750 as 3 is
sufficient too.

Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
Tested-on: WCN6750 hw1.0 AHB WLAN.MSL.1.0.1-01371-QCAMSLSWPLZ-1

Fixes: f019f4dff2e4 ("wifi: ath11k: support 2 station interfaces")
Reported-by: Luca Weiss <luca.weiss@fairphone.com>
Tested-by: Luca Weiss <luca.weiss@fairphone.com>
Closes: https://lore.kernel.org/r/D15TIIDIIESY.D1EKKJLZINMA@fairphone.com/
Signed-off-by: Carl Huang <quic_cjhuang@quicinc.com>
Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
Link: https://msgid.link/20240520030757.2209395-1-quic_cjhuang@quicinc.com
---
 drivers/net/wireless/ath/ath11k/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index 3cc817a3b4a4..b82e8fb28541 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -604,7 +604,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
 		.coldboot_cal_ftm = true,
 		.cbcal_restart_fw = false,
 		.fw_mem_mode = 0,
-		.num_vdevs = 16 + 1,
+		.num_vdevs = 3,
 		.num_peers = 512,
 		.supports_suspend = false,
 		.hal_desc_sz = sizeof(struct hal_rx_desc_qcn9074),
-- 
cgit v1.2.3


From 6e16782d6b4a724f9c9dcd49471219643593b60c Mon Sep 17 00:00:00 2001
From: Baochen Qiang <quic_bqiang@quicinc.com>
Date: Tue, 21 May 2024 11:08:11 +0300
Subject: wifi: ath11k: move power type check to ASSOC stage when connecting to
 6 GHz AP

With commit bc8a0fac8677 ("wifi: mac80211: don't set bss_conf in parsing")
ath11k fails to connect to 6 GHz AP.

This is because currently ath11k checks AP's power type in
ath11k_mac_op_assign_vif_chanctx() which would be called in AUTH stage.
However with above commit power type is not available until ASSOC stage.
As a result power type check fails and therefore connection fails.

Fix this by moving power type check to ASSOC stage, also move regulatory
rules update there because it depends on power type.

Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.30

Fixes: bc8a0fac8677 ("wifi: mac80211: don't set bss_conf in parsing")
Signed-off-by: Baochen Qiang <quic_bqiang@quicinc.com>
Acked-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
Link: https://msgid.link/20240424064019.4847-1-quic_bqiang@quicinc.com
---
 drivers/net/wireless/ath/ath11k/mac.c | 38 +++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 4f62e38ba48b..9b96dbb21d83 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -7988,8 +7988,6 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
 	struct ath11k_base *ab = ar->ab;
 	struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
 	int ret;
-	struct cur_regulatory_info *reg_info;
-	enum ieee80211_ap_reg_power power_type;
 
 	mutex_lock(&ar->conf_mutex);
 
@@ -8000,17 +7998,6 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
 	if (ath11k_wmi_supports_6ghz_cc_ext(ar) &&
 	    ctx->def.chan->band == NL80211_BAND_6GHZ &&
 	    arvif->vdev_type == WMI_VDEV_TYPE_STA) {
-		reg_info = &ab->reg_info_store[ar->pdev_idx];
-		power_type = vif->bss_conf.power_type;
-
-		ath11k_dbg(ab, ATH11K_DBG_MAC, "chanctx power type %d\n", power_type);
-
-		if (power_type == IEEE80211_REG_UNSET_AP) {
-			ret = -EINVAL;
-			goto out;
-		}
-
-		ath11k_reg_handle_chan_list(ab, reg_info, power_type);
 		arvif->chanctx = *ctx;
 		ath11k_mac_parse_tx_pwr_env(ar, vif, ctx);
 	}
@@ -9626,6 +9613,8 @@ static int ath11k_mac_op_sta_state(struct ieee80211_hw *hw,
 	struct ath11k *ar = hw->priv;
 	struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
 	struct ath11k_sta *arsta = ath11k_sta_to_arsta(sta);
+	enum ieee80211_ap_reg_power power_type;
+	struct cur_regulatory_info *reg_info;
 	struct ath11k_peer *peer;
 	int ret = 0;
 
@@ -9705,6 +9694,29 @@ static int ath11k_mac_op_sta_state(struct ieee80211_hw *hw,
 				ath11k_warn(ar->ab, "Unable to authorize peer %pM vdev %d: %d\n",
 					    sta->addr, arvif->vdev_id, ret);
 		}
+
+		if (!ret &&
+		    ath11k_wmi_supports_6ghz_cc_ext(ar) &&
+		    arvif->vdev_type == WMI_VDEV_TYPE_STA &&
+		    arvif->chanctx.def.chan &&
+		    arvif->chanctx.def.chan->band == NL80211_BAND_6GHZ) {
+			reg_info = &ar->ab->reg_info_store[ar->pdev_idx];
+			power_type = vif->bss_conf.power_type;
+
+			if (power_type == IEEE80211_REG_UNSET_AP) {
+				ath11k_warn(ar->ab, "invalid power type %d\n",
+					    power_type);
+				ret = -EINVAL;
+			} else {
+				ret = ath11k_reg_handle_chan_list(ar->ab,
+								  reg_info,
+								  power_type);
+				if (ret)
+					ath11k_warn(ar->ab,
+						    "failed to handle chan list with power type %d\n",
+						    power_type);
+			}
+		}
 	} else if (old_state == IEEE80211_STA_AUTHORIZED &&
 		   new_state == IEEE80211_STA_ASSOC) {
 		spin_lock_bh(&ar->ab->base_lock);
-- 
cgit v1.2.3


From bb9025f4432f8c158322cf2c04c2b492f23eb511 Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin@ispras.ru>
Date: Sat, 4 May 2024 14:47:01 +0300
Subject: dma-mapping: benchmark: fix up kthread-related error handling

kthread creation failure is invalidly handled inside do_map_benchmark().
The put_task_struct() calls on the error path are supposed to balance the
get_task_struct() calls which only happen after all the kthreads are
successfully created. Rollback using kthread_stop() for already created
kthreads in case of such failure.

In normal situation call kthread_stop_put() to gracefully stop kthreads
and put their task refcounts. This should be done for all started
kthreads.

Found by Linux Verification Center (linuxtesting.org).

Fixes: 65789daa8087 ("dma-mapping: add benchmark support for streaming DMA APIs")
Suggested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/map_benchmark.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index 02205ab53b7e..2478957cf9f8 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -118,6 +118,8 @@ static int do_map_benchmark(struct map_benchmark_data *map)
 		if (IS_ERR(tsk[i])) {
 			pr_err("create dma_map thread failed\n");
 			ret = PTR_ERR(tsk[i]);
+			while (--i >= 0)
+				kthread_stop(tsk[i]);
 			goto out;
 		}
 
@@ -139,13 +141,17 @@ static int do_map_benchmark(struct map_benchmark_data *map)
 
 	msleep_interruptible(map->bparam.seconds * 1000);
 
-	/* wait for the completion of benchmark threads */
+	/* wait for the completion of all started benchmark threads */
 	for (i = 0; i < threads; i++) {
-		ret = kthread_stop(tsk[i]);
-		if (ret)
-			goto out;
+		int kthread_ret = kthread_stop_put(tsk[i]);
+
+		if (kthread_ret)
+			ret = kthread_ret;
 	}
 
+	if (ret)
+		goto out;
+
 	loops = atomic64_read(&map->loops);
 	if (likely(loops > 0)) {
 		u64 map_variance, unmap_variance;
@@ -170,8 +176,6 @@ static int do_map_benchmark(struct map_benchmark_data *map)
 	}
 
 out:
-	for (i = 0; i < threads; i++)
-		put_task_struct(tsk[i]);
 	put_device(map->dev);
 	kfree(tsk);
 	return ret;
-- 
cgit v1.2.3


From f7c9ccaadffd13066353332c13d7e9bf73b8f92d Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin@ispras.ru>
Date: Sat, 4 May 2024 14:47:02 +0300
Subject: dma-mapping: benchmark: avoid needless copy_to_user if benchmark
 fails

If do_map_benchmark() has failed, there is nothing useful to copy back
to userspace.

Suggested-by: Barry Song <21cnbao@gmail.com>
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/map_benchmark.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index 2478957cf9f8..a6edb1ef98c8 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -256,6 +256,9 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
 		 * dma_mask changed by benchmark
 		 */
 		dma_set_mask(map->dev, old_dma_mask);
+
+		if (ret)
+			return ret;
 		break;
 	default:
 		return -EINVAL;
-- 
cgit v1.2.3


From 1ff05e723f7ca30644b8ec3fb093f16312e408ad Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin@ispras.ru>
Date: Sat, 4 May 2024 14:47:03 +0300
Subject: dma-mapping: benchmark: fix node id validation

While validating node ids in map_benchmark_ioctl(), node_possible() may
be provided with invalid argument outside of [0,MAX_NUMNODES-1] range
leading to:

BUG: KASAN: wild-memory-access in map_benchmark_ioctl (kernel/dma/map_benchmark.c:214)
Read of size 8 at addr 1fffffff8ccb6398 by task dma_map_benchma/971
CPU: 7 PID: 971 Comm: dma_map_benchma Not tainted 6.9.0-rc6 #37
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
Call Trace:
 <TASK>
dump_stack_lvl (lib/dump_stack.c:117)
kasan_report (mm/kasan/report.c:603)
kasan_check_range (mm/kasan/generic.c:189)
variable_test_bit (arch/x86/include/asm/bitops.h:227) [inline]
arch_test_bit (arch/x86/include/asm/bitops.h:239) [inline]
_test_bit at (include/asm-generic/bitops/instrumented-non-atomic.h:142) [inline]
node_state (include/linux/nodemask.h:423) [inline]
map_benchmark_ioctl (kernel/dma/map_benchmark.c:214)
full_proxy_unlocked_ioctl (fs/debugfs/file.c:333)
__x64_sys_ioctl (fs/ioctl.c:890)
do_syscall_64 (arch/x86/entry/common.c:83)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)

Compare node ids with sane bounds first. NUMA_NO_NODE is considered a
special valid case meaning that benchmarking kthreads won't be bound to a
cpuset of a given node.

Found by Linux Verification Center (linuxtesting.org).

Fixes: 65789daa8087 ("dma-mapping: add benchmark support for streaming DMA APIs")
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/map_benchmark.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index a6edb1ef98c8..9f6c15f3f168 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -212,7 +212,8 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
 		}
 
 		if (map->bparam.node != NUMA_NO_NODE &&
-		    !node_possible(map->bparam.node)) {
+		    (map->bparam.node < 0 || map->bparam.node >= MAX_NUMNODES ||
+		     !node_possible(map->bparam.node))) {
 			pr_err("invalid numa node\n");
 			return -EINVAL;
 		}
-- 
cgit v1.2.3


From e64746e74f717961250a155e14c156616fcd981f Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin@ispras.ru>
Date: Sat, 4 May 2024 14:47:04 +0300
Subject: dma-mapping: benchmark: handle NUMA_NO_NODE correctly

cpumask_of_node() can be called for NUMA_NO_NODE inside do_map_benchmark()
resulting in the following sanitizer report:

UBSAN: array-index-out-of-bounds in ./arch/x86/include/asm/topology.h:72:28
index -1 is out of range for type 'cpumask [64][1]'
CPU: 1 PID: 990 Comm: dma_map_benchma Not tainted 6.9.0-rc6 #29
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
Call Trace:
 <TASK>
dump_stack_lvl (lib/dump_stack.c:117)
ubsan_epilogue (lib/ubsan.c:232)
__ubsan_handle_out_of_bounds (lib/ubsan.c:429)
cpumask_of_node (arch/x86/include/asm/topology.h:72) [inline]
do_map_benchmark (kernel/dma/map_benchmark.c:104)
map_benchmark_ioctl (kernel/dma/map_benchmark.c:246)
full_proxy_unlocked_ioctl (fs/debugfs/file.c:333)
__x64_sys_ioctl (fs/ioctl.c:890)
do_syscall_64 (arch/x86/entry/common.c:83)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)

Use cpumask_of_node() in place when binding a kernel thread to a cpuset
of a particular node.

Note that the provided node id is checked inside map_benchmark_ioctl().
It's just a NUMA_NO_NODE case which is not handled properly later.

Found by Linux Verification Center (linuxtesting.org).

Fixes: 65789daa8087 ("dma-mapping: add benchmark support for streaming DMA APIs")
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Acked-by: Barry Song <baohua@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/map_benchmark.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index 9f6c15f3f168..4950e0b622b1 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -101,7 +101,6 @@ static int do_map_benchmark(struct map_benchmark_data *map)
 	struct task_struct **tsk;
 	int threads = map->bparam.threads;
 	int node = map->bparam.node;
-	const cpumask_t *cpu_mask = cpumask_of_node(node);
 	u64 loops;
 	int ret = 0;
 	int i;
@@ -124,7 +123,7 @@ static int do_map_benchmark(struct map_benchmark_data *map)
 		}
 
 		if (node != NUMA_NO_NODE)
-			kthread_bind_mask(tsk[i], cpu_mask);
+			kthread_bind_mask(tsk[i], cpumask_of_node(node));
 	}
 
 	/* clear the old value in the previous benchmark */
-- 
cgit v1.2.3


From 803482f472ccc9576c0e606143725d1d8c61019d Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <isaku.yamahata@intel.com>
Date: Fri, 17 May 2024 17:04:22 -0700
Subject: KVM: x86/mmu: Use SHADOW_NONPRESENT_VALUE for atomic zap in TDP MMU

Use SHADOW_NONPRESENT_VALUE when zapping TDP MMU SPTEs with mmu_lock held
for read, tdp_mmu_zap_spte_atomic() was simply missed during the initial
development.

Fixes: 7f01cab84928 ("KVM: x86/mmu: Allow non-zero value for non-present SPTE and removed SPTE")
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
[sean: write changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Kai Huang <kai.huang@intel.com>
Message-ID: <20240518000430.1118488-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 1259dd63defc..36539c1b36cd 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -626,7 +626,7 @@ static inline int tdp_mmu_zap_spte_atomic(struct kvm *kvm,
 	 * SPTEs.
 	 */
 	handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
-			    0, iter->level, true);
+			    SHADOW_NONPRESENT_VALUE, iter->level, true);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 40e8a6901a2c983e3e541a363865e02cf591d458 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 17 May 2024 17:04:28 -0700
Subject: KVM: VMX: Don't kill the VM on an unexpected #VE

Don't terminate the VM on an unexpected #VE, as it's extremely unlikely
the #VE is fatal to the guest, and even less likely that it presents a
danger to the host.  Simply resume the guest on "failure", as the #VE info
page's BUSY field will prevent converting any more EPT Violations to #VEs
for the vCPU (at least, that's what the BUSY field is supposed to do).

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240518000430.1118488-8-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 51b2cd13250a..069fbbc1e04a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5217,8 +5217,8 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 	if (is_invalid_opcode(intr_info))
 		return handle_ud(vcpu);
 
-	if (KVM_BUG_ON(is_ve_fault(intr_info), vcpu->kvm))
-		return -EIO;
+	if (WARN_ON_ONCE(is_ve_fault(intr_info)))
+		return 1;
 
 	error_code = 0;
 	if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
-- 
cgit v1.2.3


From d1b32ecdc8ad0346ac551866d9f6831995fd70de Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 17 May 2024 17:04:23 -0700
Subject: KVM: nVMX: Initialize #VE info page for vmcs02 when proving #VE
 support

Point vmcs02.VE_INFORMATION_ADDRESS at the vCPU's #VE info page when
initializing vmcs02, otherwise KVM will run L2 with EPT Violation #VE
enabled and a VE info address pointing at pfn 0.

Fixes: 8131cf5b4fd8 ("KVM: VMX: Introduce test mode related to EPT violation VE")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240518000430.1118488-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index d5b832126e34..6798fadaa335 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2242,6 +2242,9 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
 		vmcs_write64(EPT_POINTER,
 			     construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL));
 
+	if (vmx->ve_info)
+		vmcs_write64(VE_INFORMATION_ADDRESS, __pa(vmx->ve_info));
+
 	/* All VMFUNCs are currently emulated through L0 vmexits.  */
 	if (cpu_has_vmx_vmfunc())
 		vmcs_write64(VM_FUNCTION_CONTROL, 0);
-- 
cgit v1.2.3


From 9031b42139b9d45ef806c9a7fee166c1b6443c3c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 17 May 2024 17:04:24 -0700
Subject: KVM: nVMX: Always handle #VEs in L0 (never forward #VEs from L2 to
 L1)

Always handle #VEs, e.g. due to prove EPT Violation #VE failures, in L0,
as KVM does not expose any #VE capabilities to L1, i.e. any and all #VEs
are KVM's responsibility.

Fixes: 8131cf5b4fd8 ("KVM: VMX: Introduce test mode related to EPT violation VE")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240518000430.1118488-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 6798fadaa335..643935a0f70a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -6233,6 +6233,8 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
 		else if (is_alignment_check(intr_info) &&
 			 !vmx_guest_inject_ac(vcpu))
 			return true;
+		else if (is_ve_fault(intr_info))
+			return true;
 		return false;
 	case EXIT_REASON_EXTERNAL_INTERRUPT:
 		return true;
-- 
cgit v1.2.3


From 837d557aba6b816985141ddbeb7649444ed26d3b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 17 May 2024 17:04:25 -0700
Subject: KVM: x86/mmu: Add sanity checks that KVM doesn't create EPT #VE SPTEs

Assert that KVM doesn't set a SPTE to a value that could trigger an EPT
Violation #VE on a non-MMIO SPTE, e.g. to help detect bugs even without
KVM_INTEL_PROVE_VE enabled, and to help debug actual #VE failures.

Note, this will run afoul of TDX support, which needs to reflect emulated
MMIO accesses into the guest as #VEs (which was the whole point of adding
EPT Violation #VE support in KVM).  The obvious fix for that is to exempt
MMIO SPTEs, but that's annoyingly difficult now that is_mmio_spte() relies
on a per-VM value.  However, resolving that conundrum is a future problem,
whereas getting KVM_INTEL_PROVE_VE healthy is a current problem.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240518000430.1118488-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c      | 3 +++
 arch/x86/kvm/mmu/spte.h     | 9 +++++++++
 arch/x86/kvm/mmu/tdp_iter.h | 2 ++
 3 files changed, 14 insertions(+)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 662f62dfb2aa..1ae1c1328268 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -336,16 +336,19 @@ static int is_cpuid_PSE36(void)
 #ifdef CONFIG_X86_64
 static void __set_spte(u64 *sptep, u64 spte)
 {
+	KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
 	WRITE_ONCE(*sptep, spte);
 }
 
 static void __update_clear_spte_fast(u64 *sptep, u64 spte)
 {
+	KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
 	WRITE_ONCE(*sptep, spte);
 }
 
 static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
 {
+	KVM_MMU_WARN_ON(is_ept_ve_possible(spte));
 	return xchg(sptep, spte);
 }
 
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 5dd5405fa07a..52fa004a1fbc 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -3,6 +3,8 @@
 #ifndef KVM_X86_MMU_SPTE_H
 #define KVM_X86_MMU_SPTE_H
 
+#include <asm/vmx.h>
+
 #include "mmu.h"
 #include "mmu_internal.h"
 
@@ -276,6 +278,13 @@ static inline bool is_shadow_present_pte(u64 pte)
 	return !!(pte & SPTE_MMU_PRESENT_MASK);
 }
 
+static inline bool is_ept_ve_possible(u64 spte)
+{
+	return (shadow_present_mask & VMX_EPT_SUPPRESS_VE_BIT) &&
+	       !(spte & VMX_EPT_SUPPRESS_VE_BIT) &&
+	       (spte & VMX_EPT_RWX_MASK) != VMX_EPT_MISCONFIG_WX_VALUE;
+}
+
 /*
  * Returns true if A/D bits are supported in hardware and are enabled by KVM.
  * When enabled, KVM uses A/D bits for all non-nested MMUs.  Because L1 can
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
index fae559559a80..2880fd392e0c 100644
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -21,11 +21,13 @@ static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
 
 static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
 {
+	KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
 	return xchg(rcu_dereference(sptep), new_spte);
 }
 
 static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
 {
+	KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
 	WRITE_ONCE(*rcu_dereference(sptep), new_spte);
 }
 
-- 
cgit v1.2.3


From 743f1773366461cb44de297b1caf0a4292eb8fda Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 17 May 2024 17:04:26 -0700
Subject: KVM: VMX: Dump VMCS on unexpected #VE

Dump the VMCS on an unexpected #VE, otherwise it's practically impossible
to figure out why the #VE occurred.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240518000430.1118488-6-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 069fbbc1e04a..a02b2720023a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5217,8 +5217,10 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 	if (is_invalid_opcode(intr_info))
 		return handle_ud(vcpu);
 
-	if (WARN_ON_ONCE(is_ve_fault(intr_info)))
+	if (WARN_ON_ONCE(is_ve_fault(intr_info))) {
+		dump_vmcs(vcpu);
 		return 1;
+	}
 
 	error_code = 0;
 	if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
-- 
cgit v1.2.3


From bca99c0356524a620126b789edbaf25934415467 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 17 May 2024 17:04:27 -0700
Subject: KVM: x86/mmu: Print SPTEs on unexpected #VE

Print the SPTEs that correspond to the faulting GPA on an unexpected EPT
Violation #VE to help the user debug failures, e.g. to pinpoint which SPTE
didn't have SUPPRESS_VE set.

Opportunistically assert that the underlying exit reason was indeed an EPT
Violation, as the CPU has *really* gone off the rails if a #VE occurs due
to a completely unexpected exit reason.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240518000430.1118488-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu/mmu.c          | 40 ++++++++++++++++++++++++++++++++--------
 arch/x86/kvm/vmx/vmx.c          |  5 +++++
 3 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ece45b3f6f20..f8ca74e7678f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2154,6 +2154,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
 		       void *insn, int insn_len);
+void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
 void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			     u64 addr, unsigned long roots);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 1ae1c1328268..b7b4426a7221 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4104,23 +4104,31 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level
 	return leaf;
 }
 
-/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
-static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
+static int get_sptes_lockless(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+			      int *root_level)
 {
-	u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
-	struct rsvd_bits_validate *rsvd_check;
-	int root, leaf, level;
-	bool reserved = false;
+	int leaf;
 
 	walk_shadow_page_lockless_begin(vcpu);
 
 	if (is_tdp_mmu_active(vcpu))
-		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
+		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, root_level);
 	else
-		leaf = get_walk(vcpu, addr, sptes, &root);
+		leaf = get_walk(vcpu, addr, sptes, root_level);
 
 	walk_shadow_page_lockless_end(vcpu);
+	return leaf;
+}
 
+/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
+static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
+{
+	u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
+	struct rsvd_bits_validate *rsvd_check;
+	int root, leaf, level;
+	bool reserved = false;
+
+	leaf = get_sptes_lockless(vcpu, addr, sptes, &root);
 	if (unlikely(leaf < 0)) {
 		*sptep = 0ull;
 		return reserved;
@@ -5924,6 +5932,22 @@ emulate:
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
+void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg)
+{
+	u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
+	int root_level, leaf, level;
+
+	leaf = get_sptes_lockless(vcpu, gpa, sptes, &root_level);
+	if (unlikely(leaf < 0))
+		return;
+
+	pr_err("%s %llx", msg, gpa);
+	for (level = root_level; level >= leaf; level--)
+		pr_cont(", spte[%d] = 0x%llx", level, sptes[level]);
+	pr_cont("\n");
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_print_sptes);
+
 static void __kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 				      u64 addr, hpa_t root_hpa)
 {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a02b2720023a..58832aae2248 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5218,7 +5218,12 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 		return handle_ud(vcpu);
 
 	if (WARN_ON_ONCE(is_ve_fault(intr_info))) {
+		struct vmx_ve_information *ve_info = vmx->ve_info;
+
+		WARN_ONCE(ve_info->exit_reason != EXIT_REASON_EPT_VIOLATION,
+			  "Unexpected #VE on VM-Exit reason 0x%x", ve_info->exit_reason);
 		dump_vmcs(vcpu);
+		kvm_mmu_print_sptes(vcpu, ve_info->guest_physical_address, "#VE");
 		return 1;
 	}
 
-- 
cgit v1.2.3


From a5dc0c9b557573315633bc78bacf8f548352f95b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 17 May 2024 17:04:29 -0700
Subject: KVM: VMX: Enumerate EPT Violation #VE support in /proc/cpuinfo

Don't suppress printing EPT_VIOLATION_VE in /proc/cpuinfo, knowing whether
or not KVM_INTEL_PROVE_VE actually does anything is extremely valuable.
A privileged user can get at the information by reading the raw MSR, but
the whole point of the VMX flags is to avoid needing to glean information
from raw MSR reads.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240518000430.1118488-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/vmxfeatures.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
index 266daf5b5b84..695f36664889 100644
--- a/arch/x86/include/asm/vmxfeatures.h
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -77,7 +77,7 @@
 #define VMX_FEATURE_ENCLS_EXITING	( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */
 #define VMX_FEATURE_RDSEED_EXITING	( 2*32+ 16) /* "" VM-Exit on RDSEED */
 #define VMX_FEATURE_PAGE_MOD_LOGGING	( 2*32+ 17) /* "pml" Log dirty pages into buffer */
-#define VMX_FEATURE_EPT_VIOLATION_VE	( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */
+#define VMX_FEATURE_EPT_VIOLATION_VE	( 2*32+ 18) /* Conditionally reflect EPT violations as #VE exceptions */
 #define VMX_FEATURE_PT_CONCEAL_VMX	( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */
 #define VMX_FEATURE_XSAVES		( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */
 #define VMX_FEATURE_MODE_BASED_EPT_EXEC	( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
-- 
cgit v1.2.3


From 6af6142e3a62efd6074905e4a94d64956a3f4b7c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 17 May 2024 17:04:30 -0700
Subject: KVM: x86: Disable KVM_INTEL_PROVE_VE by default

Disable KVM's "prove #VE" support by default, as it provides no functional
value, and even its sanity checking benefits are relatively limited.  I.e.
it should be fully opt-in even on debug kernels, especially since EPT
Violation #VE suppression appears to be buggy on some CPUs.

Opportunistically add a line in the help text to make it abundantly clear
that KVM_INTEL_PROVE_VE should never be enabled in a production
environment.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240518000430.1118488-10-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/Kconfig | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index d64fb2b3eb69..7f47233ddf46 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -97,15 +97,17 @@ config KVM_INTEL
 
 config KVM_INTEL_PROVE_VE
         bool "Check that guests do not receive #VE exceptions"
-        default KVM_PROVE_MMU || DEBUG_KERNEL
-        depends on KVM_INTEL
+        depends on KVM_INTEL && EXPERT
         help
-
           Checks that KVM's page table management code will not incorrectly
           let guests receive a virtualization exception.  Virtualization
           exceptions will be trapped by the hypervisor rather than injected
           in the guest.
 
+          Note: some CPUs appear to generate spurious EPT Violations #VEs
+          that trigger KVM's WARN, in particular with eptad=0 and/or nested
+          virtualization.
+
           If unsure, say N.
 
 config X86_SGX_KVM
-- 
cgit v1.2.3


From 76d5363c20eeeb937b56c0ac6c61e697bd1bf154 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 17 May 2024 11:03:41 -0700
Subject: KVM: x86: Force KVM_WERROR if the global WERROR is enabled

Force KVM_WERROR if the global WERROR is enabled to avoid pestering the
user about a Kconfig that will ultimately be ignored.  Force KVM_WERROR
instead of making it mutually exclusive with WERROR to avoid generating a
.config builds KVM with -Werror, but has KVM_WERROR=n.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240517180341.974251-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 7f47233ddf46..fec95a770270 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -44,6 +44,7 @@ config KVM
 	select KVM_VFIO
 	select HAVE_KVM_PM_NOTIFIER if PM
 	select KVM_GENERIC_HARDWARE_ENABLING
+	select KVM_WERROR if WERROR
 	help
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
@@ -66,7 +67,7 @@ config KVM_WERROR
 	# FRAME_WARN, i.e. KVM_WERROR=y with KASAN=y requires special tuning.
 	# Building KVM with -Werror and KASAN is still doable via enabling
 	# the kernel-wide WERROR=y.
-	depends on KVM && EXPERT && !KASAN
+	depends on KVM && ((EXPERT && !KASAN) || WERROR)
 	help
 	  Add -Werror to the build flags for KVM.
 
-- 
cgit v1.2.3


From b4bd556467477420ee3a91fbcba73c579669edc6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 21 May 2024 19:14:35 -0700
Subject: KVM: SVM: WARN on vNMI + NMI window iff NMIs are outright masked

When requesting an NMI window, WARN on vNMI support being enabled if and
only if NMIs are actually masked, i.e. if the vCPU is already handling an
NMI.  KVM's ABI for NMIs that arrive simultanesouly (from KVM's point of
view) is to inject one NMI and pend the other.  When using vNMI, KVM pends
the second NMI simply by setting V_NMI_PENDING, and lets the CPU do the
rest (hardware automatically sets V_NMI_BLOCKING when an NMI is injected).

However, if KVM can't immediately inject an NMI, e.g. because the vCPU is
in an STI shadow or is running with GIF=0, then KVM will request an NMI
window and trigger the WARN (but still function correctly).

Whether or not the GIF=0 case makes sense is debatable, as the intent of
KVM's behavior is to provide functionality that is as close to real
hardware as possible.  E.g. if two NMIs are sent in quick succession, the
probability of both NMIs arriving in an STI shadow is infinitesimally low
on real hardware, but significantly larger in a virtual environment, e.g.
if the vCPU is preempted in the STI shadow.  For GIF=0, the argument isn't
as clear cut, because the window where two NMIs can collide is much larger
in bare metal (though still small).

That said, KVM should not have divergent behavior for the GIF=0 case based
on whether or not vNMI support is enabled.  And KVM has allowed
simultaneous NMIs with GIF=0 for over a decade, since commit 7460fb4a3400
("KVM: Fix simultaneous NMIs").  I.e. KVM's GIF=0 handling shouldn't be
modified without a *really* good reason to do so, and if KVM's behavior
were to be modified, it should be done irrespective of vNMI support.

Fixes: fa4c027a7956 ("KVM: x86: Add support for SVM's Virtual NMI")
Cc: stable@vger.kernel.org
Cc: Santosh Shukla <Santosh.Shukla@amd.com>
Cc: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240522021435.1684366-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c8dc25886c16..1d3c8be39479 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3846,16 +3846,27 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	/*
-	 * KVM should never request an NMI window when vNMI is enabled, as KVM
-	 * allows at most one to-be-injected NMI and one pending NMI, i.e. if
-	 * two NMIs arrive simultaneously, KVM will inject one and set
-	 * V_NMI_PENDING for the other.  WARN, but continue with the standard
-	 * single-step approach to try and salvage the pending NMI.
+	 * If NMIs are outright masked, i.e. the vCPU is already handling an
+	 * NMI, and KVM has not yet intercepted an IRET, then there is nothing
+	 * more to do at this time as KVM has already enabled IRET intercepts.
+	 * If KVM has already intercepted IRET, then single-step over the IRET,
+	 * as NMIs aren't architecturally unmasked until the IRET completes.
+	 *
+	 * If vNMI is enabled, KVM should never request an NMI window if NMIs
+	 * are masked, as KVM allows at most one to-be-injected NMI and one
+	 * pending NMI.  If two NMIs arrive simultaneously, KVM will inject one
+	 * NMI and set V_NMI_PENDING for the other, but if and only if NMIs are
+	 * unmasked.  KVM _will_ request an NMI window in some situations, e.g.
+	 * if the vCPU is in an STI shadow or if GIF=0, KVM can't immediately
+	 * inject the NMI.  In those situations, KVM needs to single-step over
+	 * the STI shadow or intercept STGI.
 	 */
-	WARN_ON_ONCE(is_vnmi_enabled(svm));
+	if (svm_get_nmi_mask(vcpu)) {
+		WARN_ON_ONCE(is_vnmi_enabled(svm));
 
-	if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
-		return; /* IRET will cause a vm exit */
+		if (!svm->awaiting_iret_completion)
+			return; /* IRET will cause a vm exit */
+	}
 
 	/*
 	 * SEV-ES guests are responsible for signaling when a vCPU is ready to
-- 
cgit v1.2.3


From 2fe7b422460d14b33027d8770f7be8d26bcb2639 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Tue, 21 May 2024 09:50:47 -0700
Subject: nvme: fix multipath batched completion accounting

Batched completions were missing the io stats accounting and bio trace
events. Move the common code to a helper and call it from the batched
and non-batched functions.

Fixes: d4d957b53d91ee ("nvme-multipath: support io stats on the mpath device")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/core.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 954f850f113a..79cdd34dfa18 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -414,6 +414,14 @@ static inline void nvme_end_req_zoned(struct request *req)
 	}
 }
 
+static inline void __nvme_end_req(struct request *req)
+{
+	nvme_end_req_zoned(req);
+	nvme_trace_bio_complete(req);
+	if (req->cmd_flags & REQ_NVME_MPATH)
+		nvme_mpath_end_request(req);
+}
+
 static inline void nvme_end_req(struct request *req)
 {
 	blk_status_t status = nvme_error_status(nvme_req(req)->status);
@@ -424,10 +432,7 @@ static inline void nvme_end_req(struct request *req)
 		else
 			nvme_log_error(req);
 	}
-	nvme_end_req_zoned(req);
-	nvme_trace_bio_complete(req);
-	if (req->cmd_flags & REQ_NVME_MPATH)
-		nvme_mpath_end_request(req);
+	__nvme_end_req(req);
 	blk_mq_end_request(req, status);
 }
 
@@ -476,7 +481,7 @@ void nvme_complete_batch_req(struct request *req)
 {
 	trace_nvme_complete_rq(req);
 	nvme_cleanup_cmd(req);
-	nvme_end_req_zoned(req);
+	__nvme_end_req(req);
 }
 EXPORT_SYMBOL_GPL(nvme_complete_batch_req);
 
-- 
cgit v1.2.3


From a2e4c5f5f68dbd206f132bc709b98dea64afc3b8 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Tue, 21 May 2024 11:02:28 -0700
Subject: nvme-multipath: fix io accounting on failover

There are io stats accounting that needs to be handled, so don't call
blk_mq_end_request() directly. Use the existing nvme_end_req() helper
that already handles everything.

Fixes: d4d957b53d91ee ("nvme-multipath: support io stats on the mpath device")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/core.c      | 2 +-
 drivers/nvme/host/multipath.c | 3 ++-
 drivers/nvme/host/nvme.h      | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 79cdd34dfa18..7706df237349 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -422,7 +422,7 @@ static inline void __nvme_end_req(struct request *req)
 		nvme_mpath_end_request(req);
 }
 
-static inline void nvme_end_req(struct request *req)
+void nvme_end_req(struct request *req)
 {
 	blk_status_t status = nvme_error_status(nvme_req(req)->status);
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 9c1e135b8df3..1bee176fd850 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -118,7 +118,8 @@ void nvme_failover_req(struct request *req)
 	blk_steal_bios(&ns->head->requeue_list, req);
 	spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
 
-	blk_mq_end_request(req, 0);
+	nvme_req(req)->status = 0;
+	nvme_end_req(req);
 	kblockd_schedule_work(&ns->head->requeue_work);
 }
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index cacc56f4bbf4..fc31bd340a63 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -767,6 +767,7 @@ static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl)
 	}
 }
 
+void nvme_end_req(struct request *req);
 void nvme_complete_rq(struct request *req);
 void nvme_complete_batch_req(struct request *req);
 
-- 
cgit v1.2.3


From f97914e35fd98b2b18fb8a092e0a0799f73afdfe Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 21 May 2024 23:20:28 +0300
Subject: nvmet: fix ns enable/disable possible hang

When disabling an nvmet namespace, there is a period where the
subsys->lock is released, as the ns disable waits for backend IO to
complete, and the ns percpu ref to be properly killed. The original
intent was to avoid taking the subsystem lock for a prolong period as
other processes may need to acquire it (for example new incoming
connections).

However, it opens up a window where another process may come in and
enable the ns, (re)intiailizing the ns percpu_ref, causing the disable
sequence to hang.

Solve this by taking the global nvmet_config_sem over the entire configfs
enable/disable sequence.

Fixes: a07b4970f464 ("nvmet: add a generic NVMe target")
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/target/configfs.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 7c43a0ad6877..bd87dfd173a4 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -676,10 +676,18 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item,
 	if (kstrtobool(page, &enable))
 		return -EINVAL;
 
+	/*
+	 * take a global nvmet_config_sem because the disable routine has a
+	 * window where it releases the subsys-lock, giving a chance to
+	 * a parallel enable to concurrently execute causing the disable to
+	 * have a misaccounting of the ns percpu_ref.
+	 */
+	down_write(&nvmet_config_sem);
 	if (enable)
 		ret = nvmet_ns_enable(ns);
 	else
 		nvmet_ns_disable(ns);
+	up_write(&nvmet_config_sem);
 
 	return ret ? ret : count;
 }
-- 
cgit v1.2.3


From ec58991054e899c9d86f7e3c8a96cb602d4b5938 Mon Sep 17 00:00:00 2001
From: Hawking Zhang <Hawking.Zhang@amd.com>
Date: Tue, 21 May 2024 15:03:02 +0800
Subject: drm/amdgpu: correct hbm field in boot status

hbm filed takes bit 13 and bit 14 in boot status.

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index c8980d5f6540..7021c4a66fb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -46,7 +46,7 @@ struct amdgpu_iv_entry;
 #define AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(x)		AMDGPU_GET_REG_FIELD(x, 7, 7)
 #define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x)			AMDGPU_GET_REG_FIELD(x, 10, 8)
 #define AMDGPU_RAS_GPU_ERR_AID_ID(x)			AMDGPU_GET_REG_FIELD(x, 12, 11)
-#define AMDGPU_RAS_GPU_ERR_HBM_ID(x)			AMDGPU_GET_REG_FIELD(x, 13, 13)
+#define AMDGPU_RAS_GPU_ERR_HBM_ID(x)			AMDGPU_GET_REG_FIELD(x, 14, 13)
 #define AMDGPU_RAS_GPU_ERR_BOOT_STATUS(x)		AMDGPU_GET_REG_FIELD(x, 31, 31)
 
 #define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT	1000
-- 
cgit v1.2.3


From 8195979d2dd995d60c2663adf54c69c1bf4eadd1 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 8 May 2024 16:45:35 -0500
Subject: drm/amd/display: Enable colorspace property for MST connectors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MST colorspace property support was disabled due to a series of warnings
that came up when the device was plugged in since the properties weren't
made at device creation. Create the properties in advance instead.

Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Fixes: 69a959610229 ("drm/amd/display: Temporary Disable MST DP Colorspace Property").
Reported-and-tested-by: Tyler Schneider <tyler.schneider@amd.com>
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3353
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 0b03e659fdf3..8b0e997ebdae 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -613,6 +613,9 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 		&connector->base,
 		dev->mode_config.tile_property,
 		0);
+	connector->colorspace_property = master->base.colorspace_property;
+	if (connector->colorspace_property)
+		drm_connector_attach_colorspace_property(connector);
 
 	drm_connector_set_path_property(connector, pathprop);
 
-- 
cgit v1.2.3


From 699646734ab51bf5b1cd4a7a30c20074f6e74f6e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 20 May 2024 22:30:17 -0700
Subject: uprobes: prevent mutex_lock() under rcu_read_lock()

Recent changes made uprobe_cpu_buffer preparation lazy, and moved it
deeper into __uprobe_trace_func(). This is problematic because
__uprobe_trace_func() is called inside rcu_read_lock()/rcu_read_unlock()
block, which then calls prepare_uprobe_buffer() -> uprobe_buffer_get() ->
mutex_lock(&ucb->mutex), leading to a splat about using mutex under
non-sleepable RCU:

  BUG: sleeping function called from invalid context at kernel/locking/mutex.c:585
   in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 98231, name: stress-ng-sigq
   preempt_count: 0, expected: 0
   RCU nest depth: 1, expected: 0
   ...
   Call Trace:
    <TASK>
    dump_stack_lvl+0x3d/0xe0
    __might_resched+0x24c/0x270
    ? prepare_uprobe_buffer+0xd5/0x1d0
    __mutex_lock+0x41/0x820
    ? ___perf_sw_event+0x206/0x290
    ? __perf_event_task_sched_in+0x54/0x660
    ? __perf_event_task_sched_in+0x54/0x660
    prepare_uprobe_buffer+0xd5/0x1d0
    __uprobe_trace_func+0x4a/0x140
    uprobe_dispatcher+0x135/0x280
    ? uprobe_dispatcher+0x94/0x280
    uprobe_notify_resume+0x650/0xec0
    ? atomic_notifier_call_chain+0x21/0x110
    ? atomic_notifier_call_chain+0xf8/0x110
    irqentry_exit_to_user_mode+0xe2/0x1e0
    asm_exc_int3+0x35/0x40
   RIP: 0033:0x7f7e1d4da390
   Code: 33 04 00 0f 1f 80 00 00 00 00 f3 0f 1e fa b9 01 00 00 00 e9 b2 fc ff ff 66 90 f3 0f 1e fa 31 c9 e9 a5 fc ff ff 0f 1f 44 00 00 <cc> 0f 1e fa b8 27 00 00 00 0f 05 c3 0f 1f 40 00 f3 0f 1e fa b8 6e
   RSP: 002b:00007ffd2abc3608 EFLAGS: 00000246
   RAX: 0000000000000000 RBX: 0000000076d325f1 RCX: 0000000000000000
   RDX: 0000000076d325f1 RSI: 000000000000000a RDI: 00007ffd2abc3690
   RBP: 000000000000000a R08: 00017fb700000000 R09: 00017fb700000000
   R10: 00017fb700000000 R11: 0000000000000246 R12: 0000000000017ff2
   R13: 00007ffd2abc3610 R14: 0000000000000000 R15: 00007ffd2abc3780
    </TASK>

Luckily, it's easy to fix by moving prepare_uprobe_buffer() to be called
slightly earlier: into uprobe_trace_func() and uretprobe_trace_func(), outside
of RCU locked section. This still keeps this buffer preparation lazy and helps
avoid the overhead when it's not needed. E.g., if there is only BPF uprobe
handler installed on a given uprobe, buffer won't be initialized.

Note, the other user of prepare_uprobe_buffer(), __uprobe_perf_func(), is not
affected, as it doesn't prepare buffer under RCU read lock.

Link: https://lore.kernel.org/all/20240521053017.3708530-1-andrii@kernel.org/

Fixes: 1b8f85defbc8 ("uprobes: prepare uprobe args buffer lazily")
Reported-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/trace/trace_uprobe.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 8541fa1494ae..c98e3b3386ba 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -970,19 +970,17 @@ static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu,
 
 static void __uprobe_trace_func(struct trace_uprobe *tu,
 				unsigned long func, struct pt_regs *regs,
-				struct uprobe_cpu_buffer **ucbp,
+				struct uprobe_cpu_buffer *ucb,
 				struct trace_event_file *trace_file)
 {
 	struct uprobe_trace_entry_head *entry;
 	struct trace_event_buffer fbuffer;
-	struct uprobe_cpu_buffer *ucb;
 	void *data;
 	int size, esize;
 	struct trace_event_call *call = trace_probe_event_call(&tu->tp);
 
 	WARN_ON(call != trace_file->event_call);
 
-	ucb = prepare_uprobe_buffer(tu, regs, ucbp);
 	if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE))
 		return;
 
@@ -1014,13 +1012,16 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
 			     struct uprobe_cpu_buffer **ucbp)
 {
 	struct event_file_link *link;
+	struct uprobe_cpu_buffer *ucb;
 
 	if (is_ret_probe(tu))
 		return 0;
 
+	ucb = prepare_uprobe_buffer(tu, regs, ucbp);
+
 	rcu_read_lock();
 	trace_probe_for_each_link_rcu(link, &tu->tp)
-		__uprobe_trace_func(tu, 0, regs, ucbp, link->file);
+		__uprobe_trace_func(tu, 0, regs, ucb, link->file);
 	rcu_read_unlock();
 
 	return 0;
@@ -1031,10 +1032,13 @@ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
 				 struct uprobe_cpu_buffer **ucbp)
 {
 	struct event_file_link *link;
+	struct uprobe_cpu_buffer *ucb;
+
+	ucb = prepare_uprobe_buffer(tu, regs, ucbp);
 
 	rcu_read_lock();
 	trace_probe_for_each_link_rcu(link, &tu->tp)
-		__uprobe_trace_func(tu, func, regs, ucbp, link->file);
+		__uprobe_trace_func(tu, func, regs, ucb, link->file);
 	rcu_read_unlock();
 }
 
-- 
cgit v1.2.3


From d09c05aa35909adb7d29f92f0cd79fdcd1338ef0 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 20 May 2024 22:30:40 -0400
Subject: scsi: core: Handle devices which return an unusually large VPD page
 count

Peter Schneider reported that a system would no longer boot after
updating to 6.8.4.  Peter bisected the issue and identified commit
b5fc07a5fb56 ("scsi: core: Consult supported VPD page list prior to
fetching page") as being the culprit.

Turns out the enclosure device in Peter's system reports a byteswapped
page length for VPD page 0. It reports "02 00" as page length instead
of "00 02". This causes us to attempt to access 516 bytes (page length
+ header) of information despite only 2 pages being present.

Limit the page search scope to the size of our VPD buffer to guard
against devices returning a larger page count than requested.

Link: https://lore.kernel.org/r/20240521023040.2703884-1-martin.petersen@oracle.com
Fixes: b5fc07a5fb56 ("scsi: core: Consult supported VPD page list prior to fetching page")
Cc: stable@vger.kernel.org
Reported-by: Peter Schneider <pschneider1968@googlemail.com>
Closes: https://lore.kernel.org/all/eec6ebbf-061b-4a7b-96dc-ea748aa4d035@googlemail.com/
Tested-by: Peter Schneider <pschneider1968@googlemail.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 3e0c0381277a..f0464db3f9de 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -350,6 +350,13 @@ static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page)
 		if (result < SCSI_VPD_HEADER_SIZE)
 			return 0;
 
+		if (result > sizeof(vpd)) {
+			dev_warn_once(&sdev->sdev_gendev,
+				      "%s: long VPD page 0 length: %d bytes\n",
+				      __func__, result);
+			result = sizeof(vpd);
+		}
+
 		result -= SCSI_VPD_HEADER_SIZE;
 		if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result))
 			return 0;
-- 
cgit v1.2.3


From 06e785aeb9ea8a43d0a3967c1ba6e69d758e82d4 Mon Sep 17 00:00:00 2001
From: Matt Jan <zoo868e@gmail.com>
Date: Tue, 14 May 2024 12:10:46 +0800
Subject: connector: Fix invalid conversion in cn_proc.h

The implicit conversion from unsigned int to enum
proc_cn_event is invalid, so explicitly cast it
for compilation in a C++ compiler.
/usr/include/linux/cn_proc.h: In function 'proc_cn_event valid_event(proc_cn_event)':
/usr/include/linux/cn_proc.h:72:17: error: invalid conversion from 'unsigned int' to 'proc_cn_event' [-fpermissive]
   72 |         ev_type &= PROC_EVENT_ALL;
      |                 ^
      |                 |
      |                 unsigned int

Signed-off-by: Matt Jan <zoo868e@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/cn_proc.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/uapi/linux/cn_proc.h b/include/uapi/linux/cn_proc.h
index f2afb7cc4926..18e3745b86cd 100644
--- a/include/uapi/linux/cn_proc.h
+++ b/include/uapi/linux/cn_proc.h
@@ -69,8 +69,7 @@ struct proc_input {
 
 static inline enum proc_cn_event valid_event(enum proc_cn_event ev_type)
 {
-	ev_type &= PROC_EVENT_ALL;
-	return ev_type;
+	return (enum proc_cn_event)(ev_type & PROC_EVENT_ALL);
 }
 
 /*
-- 
cgit v1.2.3


From 128d54fbcb14b8717ecf596d3dbded327b9980b3 Mon Sep 17 00:00:00 2001
From: Mathieu Othacehe <othacehe@gnu.org>
Date: Tue, 21 May 2024 08:54:06 +0200
Subject: net: phy: micrel: set soft_reset callback to genphy_soft_reset for
 KSZ8061

Following a similar reinstate for the KSZ8081 and KSZ9031.

Older kernels would use the genphy_soft_reset if the PHY did not implement
a .soft_reset.

The KSZ8061 errata described here:
https://ww1.microchip.com/downloads/en/DeviceDoc/KSZ8061-Errata-DS80000688B.pdf
and worked around with 232ba3a51c ("net: phy: Micrel KSZ8061: link failure after cable connect")
is back again without this soft reset.

Fixes: 6e2d85ec0559 ("net: phy: Stop with excessive soft reset")
Tested-by: Karim Ben Houcine <karim.benhoucine@landisgyr.com>
Signed-off-by: Mathieu Othacehe <othacehe@gnu.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 13e30ea7eec5..1d769322b059 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -5327,6 +5327,7 @@ static struct phy_driver ksphy_driver[] = {
 	/* PHY_BASIC_FEATURES */
 	.probe		= kszphy_probe,
 	.config_init	= ksz8061_config_init,
+	.soft_reset	= genphy_soft_reset,
 	.config_intr	= kszphy_config_intr,
 	.handle_interrupt = kszphy_handle_interrupt,
 	.suspend	= kszphy_suspend,
-- 
cgit v1.2.3


From 9b038d004ce95551cb35381c49fe896c5bc11ffe Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 21 May 2024 14:37:43 +0100
Subject: netfs: Fix io_uring based write-through

This can be triggered by mounting a cifs filesystem with a cache=strict
mount option and then, using the fsx program from xfstests, doing:

        ltp/fsx -A -d -N 1000 -S 11463 -P /tmp /cifs-mount/foo \
          --replay-ops=gen112-fsxops

Where gen112-fsxops holds:

        fallocate 0x6be7 0x8fc5 0x377d3
        copy_range 0x9c71 0x77e8 0x2edaf 0x377d3
        write 0x2776d 0x8f65 0x377d3

The problem is that netfs_io_request::len is being used for two purposes
and ends up getting set to the amount of data we transferred, not the
amount of data the caller asked to be transferred (for various reasons,
such as mmap'd writes, we might end up rounding out the data written to the
server to include the entire folio at each end).

Fix this by keeping the amount we were asked to write in ->len and using
->submitted to track what we issued ops for.  Then, when we come to calling
->ki_complete(), ->len is the right size.

This also required netfs_cleanup_dio_write() to change since we're no
longer advancing wreq->len.  Use wreq->transferred instead as we might have
done a short read.

With this, the generic/112 xfstest passes if cifs is forced to put all
non-DIO opens into write-through mode.

Fixes: 288ace2f57c9 ("netfs: New writeback implementation")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/295086.1716298663@warthog.procyon.org.uk
cc: Jeff Layton <jlayton@kernel.org>
cc: Steve French <stfrench@microsoft.com>
cc: Enzo Matsumiya <ematsumiya@suse.de>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/netfs/direct_write.c  | 2 +-
 fs/netfs/write_collect.c | 7 ++++---
 fs/netfs/write_issue.c   | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 608ba6416919..28163516bf03 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -12,7 +12,7 @@
 static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
 {
 	struct inode *inode = wreq->inode;
-	unsigned long long end = wreq->start + wreq->len;
+	unsigned long long end = wreq->start + wreq->transferred;
 
 	if (!wreq->error &&
 	    i_size_read(inode) < end) {
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 60112e4b2c5e..426cf87aaf2e 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -510,7 +510,7 @@ reassess_streams:
 	 * stream has a gap that can be jumped.
 	 */
 	if (notes & SOME_EMPTY) {
-		unsigned long long jump_to = wreq->start + wreq->len;
+		unsigned long long jump_to = wreq->start + READ_ONCE(wreq->submitted);
 
 		for (s = 0; s < NR_IO_STREAMS; s++) {
 			stream = &wreq->io_streams[s];
@@ -690,10 +690,11 @@ void netfs_write_collection_worker(struct work_struct *work)
 	wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
 
 	if (wreq->iocb) {
-		wreq->iocb->ki_pos += wreq->transferred;
+		size_t written = min(wreq->transferred, wreq->len);
+		wreq->iocb->ki_pos += written;
 		if (wreq->iocb->ki_complete)
 			wreq->iocb->ki_complete(
-				wreq->iocb, wreq->error ? wreq->error : wreq->transferred);
+				wreq->iocb, wreq->error ? wreq->error : written);
 		wreq->iocb = VFS_PTR_POISON;
 	}
 
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index e190043bc0da..86dad7e4202b 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -254,7 +254,7 @@ static void netfs_issue_write(struct netfs_io_request *wreq,
 	stream->construct = NULL;
 
 	if (subreq->start + subreq->len > wreq->start + wreq->submitted)
-		wreq->len = wreq->submitted = subreq->start + subreq->len - wreq->start;
+		WRITE_ONCE(wreq->submitted, subreq->start + subreq->len - wreq->start);
 	netfs_do_issue_write(stream, subreq);
 }
 
-- 
cgit v1.2.3


From 2c6b531020f0590db3b6b4950a41c692e9aa4f4a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 21 May 2024 14:36:27 +0100
Subject: netfs: Fix AIO error handling when doing write-through

If an error occurs whilst we're doing an AIO write in write-through mode,
we may end up calling ->ki_complete() *and* returning an error from
->write_iter().  This can result in either a UAF (the ->ki_complete() func
pointer may get overwritten, for example) or a refcount underflow in
io_submit() as ->ki_complete is called twice.

Fix this by making netfs_end_writethrough() - and thus
netfs_perform_write() - unconditionally return -EIOCBQUEUED if we're doing
an AIO write and wait for completion if we're not.

Fixes: 288ace2f57c9 ("netfs: New writeback implementation")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/295052.1716298587@warthog.procyon.org.uk
cc: Jeff Layton <jlayton@kernel.org>
cc: Enzo Matsumiya <ematsumiya@suse.de>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/netfs/write_issue.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 86dad7e4202b..3aa86e268f40 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -636,7 +636,12 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
 
 	mutex_unlock(&ictx->wb_lock);
 
-	ret = wreq->error;
+	if (wreq->iocb) {
+		ret = -EIOCBQUEUED;
+	} else {
+		wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
+		ret = wreq->error;
+	}
 	netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
 	return ret;
 }
-- 
cgit v1.2.3


From 79c137454815ba5554caa8eeb4ad5c94e96e45ce Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2@nxp.com>
Date: Tue, 21 May 2024 19:49:38 +0800
Subject: filemap: add helper mapping_max_folio_size()

Add mapping_max_folio_size() to get the maximum folio size for this
pagecache mapping.

Fixes: 5d8edfb900d5 ("iomap: Copy larger chunks from userspace")
Cc: stable@vger.kernel.org
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Link: https://lore.kernel.org/r/20240521114939.2541461-1-xu.yang_2@nxp.com
Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 include/linux/pagemap.h | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 3d69589c00a4..ee633712bba0 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -346,6 +346,19 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 	m->gfp_mask = mask;
 }
 
+/*
+ * There are some parts of the kernel which assume that PMD entries
+ * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
+ * limit the maximum allocation order to PMD size.  I'm not aware of any
+ * assumptions about maximum order if THP are disabled, but 8 seems like
+ * a good order (that's 1MB if you're using 4kB pages)
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define MAX_PAGECACHE_ORDER	HPAGE_PMD_ORDER
+#else
+#define MAX_PAGECACHE_ORDER	8
+#endif
+
 /**
  * mapping_set_large_folios() - Indicate the file supports large folios.
  * @mapping: The file.
@@ -372,6 +385,14 @@ static inline bool mapping_large_folio_support(struct address_space *mapping)
 		test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
 }
 
+/* Return the maximum folio size for this pagecache mapping, in bytes. */
+static inline size_t mapping_max_folio_size(struct address_space *mapping)
+{
+	if (mapping_large_folio_support(mapping))
+		return PAGE_SIZE << MAX_PAGECACHE_ORDER;
+	return PAGE_SIZE;
+}
+
 static inline int filemap_nr_thps(struct address_space *mapping)
 {
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
@@ -530,19 +551,6 @@ static inline void *detach_page_private(struct page *page)
 	return folio_detach_private(page_folio(page));
 }
 
-/*
- * There are some parts of the kernel which assume that PMD entries
- * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
- * limit the maximum allocation order to PMD size.  I'm not aware of any
- * assumptions about maximum order if THP are disabled, but 8 seems like
- * a good order (that's 1MB if you're using 4kB pages)
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define MAX_PAGECACHE_ORDER	HPAGE_PMD_ORDER
-#else
-#define MAX_PAGECACHE_ORDER	8
-#endif
-
 #ifdef CONFIG_NUMA
 struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order);
 #else
-- 
cgit v1.2.3


From 4e527d5841e24623181edc7fd6f6598ffa810e10 Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2@nxp.com>
Date: Tue, 21 May 2024 19:49:39 +0800
Subject: iomap: fault in smaller chunks for non-large folio mappings

Since commit (5d8edfb900d5 "iomap: Copy larger chunks from userspace"),
iomap will try to copy in larger chunks than PAGE_SIZE. However, if the
mapping doesn't support large folio, only one page of maximum 4KB will
be created and 4KB data will be writen to pagecache each time. Then,
next 4KB will be handled in next iteration. This will cause potential
write performance problem.

If chunk is 2MB, total 512 pages need to be handled finally. During this
period, fault_in_iov_iter_readable() is called to check iov_iter readable
validity. Since only 4KB will be handled each time, below address space
will be checked over and over again:

start         	end
-
buf,    	buf+2MB
buf+4KB, 	buf+2MB
buf+8KB, 	buf+2MB
...
buf+2044KB 	buf+2MB

Obviously the checking size is wrong since only 4KB will be handled each
time. So this will get a correct chunk to let iomap work well in non-large
folio case.

With this change, the write speed will be stable. Tested on ARM64 device.

Before:

 - dd if=/dev/zero of=/dev/sda bs=400K  count=10485  (334 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=800K  count=5242   (278 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=1600K count=2621   (204 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=2200K count=1906   (170 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=3000K count=1398   (150 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=4500K count=932    (139 MB/s)

After:

 - dd if=/dev/zero of=/dev/sda bs=400K  count=10485  (339 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=800K  count=5242   (330 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=1600K count=2621   (332 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=2200K count=1906   (333 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=3000K count=1398   (333 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=4500K count=932    (333 MB/s)

Fixes: 5d8edfb900d5 ("iomap: Copy larger chunks from userspace")
Cc: stable@vger.kernel.org
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Link: https://lore.kernel.org/r/20240521114939.2541461-2-xu.yang_2@nxp.com
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/iomap/buffered-io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 41c8f0c68ef5..c5802a459334 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -898,11 +898,11 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
 static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 {
 	loff_t length = iomap_length(iter);
-	size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
 	loff_t pos = iter->pos;
 	ssize_t total_written = 0;
 	long status = 0;
 	struct address_space *mapping = iter->inode->i_mapping;
+	size_t chunk = mapping_max_folio_size(mapping);
 	unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0;
 
 	do {
-- 
cgit v1.2.3


From f826bc9d6fc2f0e089fb8d104415d72e4d2e204c Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin@ispras.ru>
Date: Mon, 20 May 2024 12:08:18 +0300
Subject: signalfd: fix error return code

If anon_inode_getfile() fails, return appropriate error code. This looks
like a single typo: the similar code changes in timerfd and userfaultfd
are okay.

Found by Linux Verification Center (linuxtesting.org).

Fixes: fbe38120eb1d ("signalfd: convert to ->read_iter()")
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Link: https://lore.kernel.org/r/20240520090819.76342-1-pchelkin@ispras.ru
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/signalfd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/signalfd.c b/fs/signalfd.c
index 4a5614442dbf..65fe5eed0be4 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -282,7 +282,7 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
 		if (IS_ERR(file)) {
 			put_unused_fd(ufd);
 			kfree(ctx);
-			return ufd;
+			return PTR_ERR(file);
 		}
 		file->f_mode |= FMODE_NOWAIT;
 
-- 
cgit v1.2.3


From 65bea9953715b19371164a8bec4f74fdd22c9e5a Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin@ispras.ru>
Date: Mon, 20 May 2024 12:08:19 +0300
Subject: signalfd: drop an obsolete comment

Commit fbe38120eb1d ("signalfd: convert to ->read_iter()") removed the
call to anon_inode_getfd() by splitting fd setup into two parts. Drop the
comment referencing the internal details of that function.

Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Link: https://lore.kernel.org/r/20240520090819.76342-2-pchelkin@ispras.ru
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/signalfd.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/signalfd.c b/fs/signalfd.c
index 65fe5eed0be4..ec7b2da2477a 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -286,10 +286,6 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
 		}
 		file->f_mode |= FMODE_NOWAIT;
 
-		/*
-		 * When we call this, the initialization must be complete, since
-		 * anon_inode_getfd() will install the fd.
-		 */
 		fd_install(ufd, file);
 	} else {
 		struct fd f = fdget(ufd);
-- 
cgit v1.2.3


From c596bea1452ddf172ec9b588e4597228e9a1f4d5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 21 May 2024 16:49:46 +0100
Subject: netfs: Fix setting of BDP_ASYNC from iocb flags

Fix netfs_perform_write() to set BDP_ASYNC if IOCB_NOWAIT is set rather
than if IOCB_SYNC is not set.  It reflects asynchronicity in the sense of
not waiting rather than synchronicity in the sense of not returning until
the op is complete.

Without this, generic/590 fails on cifs in strict caching mode with a
complaint that one of the writes fails with EAGAIN.  The test can be
distilled down to:

        mount -t cifs /my/share /mnt -ostuff
        xfs_io -i -c 'falloc 0 8191M -c fsync -f /mnt/file
        xfs_io -i -c 'pwrite -b 1M -W 0 8191M' /mnt/file

Fixes: c38f4e96e605 ("netfs: Provide func to copy data to pagecache for buffered write")
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/316306.1716306586@warthog.procyon.org.uk
Reviewed-by: Jens Axboe <axboe@kernel.dk>
cc: Jeff Layton <jlayton@kernel.org>
cc: Enzo Matsumiya <ematsumiya@suse.de>
cc: Jens Axboe <axboe@kernel.dk>
cc: Matthew Wilcox <willy@infradead.org>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/netfs/buffered_write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 1121601536d1..07bc1fd43530 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -181,7 +181,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
 	struct folio *folio, *writethrough = NULL;
 	enum netfs_how_to_modify howto;
 	enum netfs_folio_trace trace;
-	unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC;
+	unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
 	ssize_t written = 0, ret, ret2;
 	loff_t i_size, pos = iocb->ki_pos, from, to;
 	size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
-- 
cgit v1.2.3


From 712182b67e831912f90259102ae334089e7bccd1 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 21 May 2024 21:00:44 +0200
Subject: swap: yield device immediately

Otherwise we can cause spurious EBUSY issues when trying to mount the
rootfs later on.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=218845
Reported-by: Petri Kaukasoina <petri.kaukasoina@tuni.fi>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 kernel/power/swap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 5bc04bfe2db1..c6f24d17866d 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -1600,7 +1600,7 @@ int swsusp_check(bool exclusive)
 
 put:
 		if (error)
-			fput(hib_resume_bdev_file);
+			bdev_fput(hib_resume_bdev_file);
 		else
 			pr_debug("Image signature found, resuming\n");
 	} else {
-- 
cgit v1.2.3


From 51ef9305b8f40946d65c40368ffb4c14636d369a Mon Sep 17 00:00:00 2001
From: Maher Sanalla <msanalla@nvidia.com>
Date: Wed, 22 May 2024 22:26:52 +0300
Subject: net/mlx5: Lag, do bond only if slaves agree on roce state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, the driver does not enforce that lag bond slaves must have
matching roce capabilities. Yet, in mlx5_do_bond(), the driver attempts
to enable roce on all vports of the bond slaves, causing the following
syndrome when one slave has no roce fw support:

mlx5_cmd_out_err:809:(pid 25427): MODIFY_NIC_VPORT_CONTEXT(0×755) op_mod(0×0)
failed, status bad parameter(0×3), syndrome (0xc1f678), err(-22)

Thus, create HW lag only if bond's slaves agree on roce state,
either all slaves have roce support resulting in a roce lag bond,
or none do, resulting in a raw eth bond.

Fixes: 7907f23adc18 ("net/mlx5: Implement RoCE LAG feature")
Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index f7f0476a4a58..d0871c46b8c5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -719,6 +719,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 	struct mlx5_core_dev *dev;
 	u8 mode;
 #endif
+	bool roce_support;
 	int i;
 
 	for (i = 0; i < ldev->ports; i++)
@@ -743,6 +744,11 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 		if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
 			return false;
 #endif
+	roce_support = mlx5_get_roce_state(ldev->pf[MLX5_LAG_P1].dev);
+	for (i = 1; i < ldev->ports; i++)
+		if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support)
+			return false;
+
 	return true;
 }
 
@@ -910,8 +916,10 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 		} else if (roce_lag) {
 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 			mlx5_rescan_drivers_locked(dev0);
-			for (i = 1; i < ldev->ports; i++)
-				mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
+			for (i = 1; i < ldev->ports; i++) {
+				if (mlx5_get_roce_state(ldev->pf[i].dev))
+					mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
+			}
 		} else if (shared_fdb) {
 			int i;
 
-- 
cgit v1.2.3


From fca3b4791850b7e2181f0b3195b66d53df83151b Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 22 May 2024 22:26:53 +0300
Subject: net/mlx5: Do not query MPIR on embedded CPU function

A proper query to MPIR needs to set the correct value in the depth field.
On embedded CPU this value is not necessarily zero. As there is no real
use case for multi-PF netdev on the embedded CPU of the smart NIC, block
this option.

This fixes the following failure:
ACCESS_REG(0x805) op_mod(0x1) failed, status bad system state(0x4), syndrome (0x685f19), err(-5)

Fixes: 678eb448055a ("net/mlx5: SD, Implement basic query and instantiation")
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index dd5d186dc614..f6deb5a3f820 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -100,10 +100,6 @@ static bool ft_create_alias_supported(struct mlx5_core_dev *dev)
 
 static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses)
 {
-	/* Feature is currently implemented for PFs only */
-	if (!mlx5_core_is_pf(dev))
-		return false;
-
 	/* Honor the SW implementation limit */
 	if (host_buses > MLX5_SD_MAX_GROUP_SZ)
 		return false;
@@ -162,6 +158,14 @@ static int sd_init(struct mlx5_core_dev *dev)
 	bool sdm;
 	int err;
 
+	/* Feature is currently implemented for PFs only */
+	if (!mlx5_core_is_pf(dev))
+		return 0;
+
+	/* Block on embedded CPU PFs */
+	if (mlx5_core_is_ecpf(dev))
+		return 0;
+
 	if (!MLX5_CAP_MCAM_REG(dev, mpir))
 		return 0;
 
-- 
cgit v1.2.3


From 1b9f86c6d53245dab087f1b2c05727b5982142ff Mon Sep 17 00:00:00 2001
From: Gal Pressman <gal@nvidia.com>
Date: Wed, 22 May 2024 22:26:54 +0300
Subject: net/mlx5: Fix MTMP register capability offset in MCAM register

The MTMP register (0x900a) capability offset is off-by-one, move it to
the right place.

Fixes: 1f507e80c700 ("net/mlx5: Expose NIC temperature via hardware monitoring kernel API")
Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/mlx5_ifc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f468763478ae..5df52e15f7d6 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10308,9 +10308,9 @@ struct mlx5_ifc_mcam_access_reg_bits {
 	u8         mfrl[0x1];
 	u8         regs_39_to_32[0x8];
 
-	u8         regs_31_to_10[0x16];
+	u8         regs_31_to_11[0x15];
 	u8         mtmp[0x1];
-	u8         regs_8_to_0[0x9];
+	u8         regs_9_to_0[0xa];
 };
 
 struct mlx5_ifc_mcam_access_reg_bits1 {
-- 
cgit v1.2.3


From 16d66a4fa81da07bc4ed19f4e53b87263c2f8d38 Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Date: Wed, 22 May 2024 22:26:55 +0300
Subject: net/mlx5: Use mlx5_ipsec_rx_status_destroy to correctly delete status
 rules

rx_create no longer allocates a modify_hdr instance that needs to be
cleaned up. The mlx5_modify_header_dealloc call will lead to a NULL pointer
dereference. A leak in the rules also previously occurred since there are
now two rules populated related to status.

  BUG: kernel NULL pointer dereference, address: 0000000000000000
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 109907067 P4D 109907067 PUD 116890067 PMD 0
  Oops: 0000 [#1] SMP
  CPU: 1 PID: 484 Comm: ip Not tainted 6.9.0-rc2-rrameshbabu+ #254
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Arch Linux 1.16.3-1-1 04/01/2014
  RIP: 0010:mlx5_modify_header_dealloc+0xd/0x70
  <snip>
  Call Trace:
   <TASK>
   ? show_regs+0x60/0x70
   ? __die+0x24/0x70
   ? page_fault_oops+0x15f/0x430
   ? free_to_partial_list.constprop.0+0x79/0x150
   ? do_user_addr_fault+0x2c9/0x5c0
   ? exc_page_fault+0x63/0x110
   ? asm_exc_page_fault+0x27/0x30
   ? mlx5_modify_header_dealloc+0xd/0x70
   rx_create+0x374/0x590
   rx_add_rule+0x3ad/0x500
   ? rx_add_rule+0x3ad/0x500
   ? mlx5_cmd_exec+0x2c/0x40
   ? mlx5_create_ipsec_obj+0xd6/0x200
   mlx5e_accel_ipsec_fs_add_rule+0x31/0xf0
   mlx5e_xfrm_add_state+0x426/0xc00
  <snip>

Fixes: 94af50c0a9bb ("net/mlx5e: Unify esw and normal IPsec status table creation/destruction")
Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index 41a2543a52cd..e51b03d4c717 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -750,8 +750,7 @@ err_fs:
 err_fs_ft:
 	if (rx->allow_tunnel_mode)
 		mlx5_eswitch_unblock_encap(mdev);
-	mlx5_del_flow_rules(rx->status.rule);
-	mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr);
+	mlx5_ipsec_rx_status_destroy(ipsec, rx);
 err_add:
 	mlx5_destroy_flow_table(rx->ft.status);
 err_fs_ft_status:
-- 
cgit v1.2.3


From 9a52f6d44f4521773b4699b4ed34b8e21d5a175c Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Date: Wed, 22 May 2024 22:26:56 +0300
Subject: net/mlx5e: Fix IPsec tunnel mode offload feature check

Remove faulty check disabling checksum offload and GSO for offload of
simple IPsec tunnel L4 traffic. Comment previously describing the deleted
code incorrectly claimed the check prevented double tunnel (or three layers
of ip headers).

Fixes: f1267798c980 ("net/mlx5: Fix checksum issue of VXLAN and IPsec crypto offload")
Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h   | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index 82064614846f..359050f0b54d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -97,18 +97,11 @@ mlx5e_ipsec_feature_check(struct sk_buff *skb, netdev_features_t features)
 		if (!x || !x->xso.offload_handle)
 			goto out_disable;
 
-		if (xo->inner_ipproto) {
-			/* Cannot support tunnel packet over IPsec tunnel mode
-			 * because we cannot offload three IP header csum
-			 */
-			if (x->props.mode == XFRM_MODE_TUNNEL)
-				goto out_disable;
-
-			/* Only support UDP or TCP L4 checksum */
-			if (xo->inner_ipproto != IPPROTO_UDP &&
-			    xo->inner_ipproto != IPPROTO_TCP)
-				goto out_disable;
-		}
+		/* Only support UDP or TCP L4 checksum */
+		if (xo->inner_ipproto &&
+		    xo->inner_ipproto != IPPROTO_UDP &&
+		    xo->inner_ipproto != IPPROTO_TCP)
+			goto out_disable;
 
 		return features;
 
-- 
cgit v1.2.3


From f55cd31287e5f77f226c91d2f7756bafa0d583ed Mon Sep 17 00:00:00 2001
From: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Date: Wed, 22 May 2024 22:26:57 +0300
Subject: net/mlx5e: Do not use ptp structure for tx ts stats when not
 initialized

The ptp channel instance is only initialized when ptp traffic is first
processed by the driver. This means that there is a window in between when
port timestamping is enabled and ptp traffic is sent where the ptp channel
instance is not initialized. Accessing statistics during this window will
lead to an access violation (NULL + member offset). Check the validity of
the instance before attempting to query statistics.

  BUG: unable to handle page fault for address: 0000000000003524
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 109dfc067 P4D 109dfc067 PUD 1064ef067 PMD 0
  Oops: 0000 [#1] SMP
  CPU: 0 PID: 420 Comm: ethtool Not tainted 6.9.0-rc2-rrameshbabu+ #245
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Arch Linux 1.16.3-1-1 04/01/204
  RIP: 0010:mlx5e_stats_ts_get+0x4c/0x130
  <snip>
  Call Trace:
   <TASK>
   ? show_regs+0x60/0x70
   ? __die+0x24/0x70
   ? page_fault_oops+0x15f/0x430
   ? do_user_addr_fault+0x2c9/0x5c0
   ? exc_page_fault+0x63/0x110
   ? asm_exc_page_fault+0x27/0x30
   ? mlx5e_stats_ts_get+0x4c/0x130
   ? mlx5e_stats_ts_get+0x20/0x130
   mlx5e_get_ts_stats+0x15/0x20
  <snip>

Fixes: 3579032c08c1 ("net/mlx5e: Implement ethtool hardware timestamping statistics")
Signed-off-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index e211c41cec06..e1ed214e8651 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -1186,6 +1186,9 @@ void mlx5e_stats_ts_get(struct mlx5e_priv *priv,
 		ts_stats->err = 0;
 		ts_stats->lost = 0;
 
+		if (!ptp)
+			goto out;
+
 		/* Aggregate stats across all TCs */
 		for (i = 0; i < ptp->num_tc; i++) {
 			struct mlx5e_ptp_cq_stats *stats =
@@ -1214,6 +1217,7 @@ void mlx5e_stats_ts_get(struct mlx5e_priv *priv,
 		}
 	}
 
+out:
 	mutex_unlock(&priv->state_lock);
 }
 
-- 
cgit v1.2.3


From 5c74195d5dd977e97556e6fa76909b831c241230 Mon Sep 17 00:00:00 2001
From: Carolina Jubran <cjubran@nvidia.com>
Date: Wed, 22 May 2024 22:26:58 +0300
Subject: net/mlx5e: Use rx_missed_errors instead of rx_dropped for reporting
 buffer exhaustion

Previously, the driver incorrectly used rx_dropped to report device
buffer exhaustion.

According to the documentation, rx_dropped should not be used to count
packets dropped due to buffer exhaustion, which is the purpose of
rx_missed_errors.

Use rx_missed_errors as intended for counting packets dropped due to
buffer exhaustion.

Fixes: 269e6b3af3bf ("net/mlx5e: Report additional error statistics in get stats ndo")
Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b758bc72ac36..c53c99dde558 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3886,7 +3886,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 		mlx5e_fold_sw_stats64(priv, stats);
 	}
 
-	stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
+	stats->rx_missed_errors = priv->stats.qcnt.rx_out_of_buffer;
 
 	stats->rx_length_errors =
 		PPORT_802_3_GET(pstats, a_in_range_length_errors) +
-- 
cgit v1.2.3


From 83fea49f2711fc90c0d115b0ed04046b45155b65 Mon Sep 17 00:00:00 2001
From: Gal Pressman <gal@nvidia.com>
Date: Wed, 22 May 2024 22:26:59 +0300
Subject: net/mlx5e: Fix UDP GSO for encapsulated packets

When the skb is encapsulated, adjust the inner UDP header instead of the
outer one, and account for UDP header (instead of TCP) in the inline
header size calculation.

Fixes: 689adf0d4892 ("net/mlx5e: Add UDP GSO support")
Reported-by: Jason Baron <jbaron@akamai.com>
Closes: https://lore.kernel.org/netdev/c42961cb-50b9-4a9a-bd43-87fe48d88d29@akamai.com/
Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Boris Pismenny <borisp@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 8 +++++++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c             | 6 +++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index caa34b9c161e..33e32584b07f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -102,8 +102,14 @@ static inline void
 mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb)
 {
 	int payload_len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr);
+	struct udphdr *udphdr;
 
-	udp_hdr(skb)->len = htons(payload_len);
+	if (skb->encapsulation)
+		udphdr = (struct udphdr *)skb_inner_transport_header(skb);
+	else
+		udphdr = udp_hdr(skb);
+
+	udphdr->len = htons(payload_len);
 }
 
 struct mlx5e_accel_tx_state {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 099bf1078889..b09e9abd39f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -153,7 +153,11 @@ mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop)
 
 	*hopbyhop = 0;
 	if (skb->encapsulation) {
-		ihs = skb_inner_tcp_all_headers(skb);
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+			ihs = skb_inner_transport_offset(skb) +
+			      sizeof(struct udphdr);
+		else
+			ihs = skb_inner_tcp_all_headers(skb);
 		stats->tso_inner_packets++;
 		stats->tso_inner_bytes += skb->len - ihs;
 	} else {
-- 
cgit v1.2.3


From 44382b3ed6b2787710c8ade06c0e97f5970a47c8 Mon Sep 17 00:00:00 2001
From: Friedrich Vock <friedrich.vock@gmx.de>
Date: Tue, 14 May 2024 09:09:31 +0200
Subject: bpf: Fix potential integer overflow in resolve_btfids

err is a 32-bit integer, but elf_update returns an off_t, which is 64-bit
at least on 64-bit platforms. If symbols_patch is called on a binary between
2-4GB in size, the result will be negative when cast to a 32-bit integer,
which the code assumes means an error occurred. This can wrongly trigger
build failures when building very large kernel images.

Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object")
Signed-off-by: Friedrich Vock <friedrich.vock@gmx.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20240514070931.199694-1-friedrich.vock@gmx.de
---
 tools/bpf/resolve_btfids/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index d9520cb826b3..af393c7dee1f 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -728,7 +728,7 @@ static int sets_patch(struct object *obj)
 
 static int symbols_patch(struct object *obj)
 {
-	int err;
+	off_t err;
 
 	if (__symbols_patch(obj, &obj->structs)  ||
 	    __symbols_patch(obj, &obj->unions)   ||
-- 
cgit v1.2.3


From 64e3d02b43b17390f0fa9af6708a4eafdf20ba01 Mon Sep 17 00:00:00 2001
From: Kanchan Joshi <joshi.k@samsung.com>
Date: Fri, 24 May 2024 16:04:48 +0530
Subject: nvme: remove sgs and sws

sgs/sws are unused, so remove these from nvme_ns_head structure.

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/nvme.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index fc31bd340a63..c43a30753d87 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -471,8 +471,6 @@ struct nvme_ns_head {
 	u8			pi_type;
 	u8			pi_offset;
 	u8			guard_type;
-	u16			sgs;
-	u32			sws;
 #ifdef CONFIG_BLK_DEV_ZONED
 	u64			zsze;
 #endif
-- 
cgit v1.2.3


From 1bd293fcf3af84674e82ed022c049491f3768840 Mon Sep 17 00:00:00 2001
From: Kundan Kumar <kundan.kumar@samsung.com>
Date: Thu, 23 May 2024 17:01:49 +0530
Subject: nvme: adjust multiples of NVME_CTRL_PAGE_SIZE in offset

bio_vec start offset may be relatively large particularly when large
folio gets added to the bio. A bigger offset will result in avoiding the
single-segment mapping optimization and end up using expensive
mempool_alloc further.

Rather than using absolute value, adjust bv_offset by
NVME_CTRL_PAGE_SIZE while checking if segment can be fitted into one/two
PRP entries.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Kundan Kumar <kundan.kumar@samsung.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/pci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 710043086dff..102a9fb0c65f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -778,7 +778,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		struct bio_vec bv = req_bvec(req);
 
 		if (!is_pci_p2pdma_page(bv.bv_page)) {
-			if (bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
+			if ((bv.bv_offset & (NVME_CTRL_PAGE_SIZE - 1)) +
+			     bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
 				return nvme_setup_prp_simple(dev, req,
 							     &cmnd->rw, &bv);
 
-- 
cgit v1.2.3


From 29be9100aca2915fab54b5693309bc42956542e5 Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Fri, 24 May 2024 17:17:55 +0100
Subject: afs: Don't cross .backup mountpoint from backup volume

Don't cross a mountpoint that explicitly specifies a backup volume
(target is <vol>.backup) when starting from a backup volume.

It it not uncommon to mount a volume's backup directly in the volume
itself.  This can cause tools that are not paying attention to get
into a loop mounting the volume onto itself as they attempt to
traverse the tree, leading to a variety of problems.

This doesn't prevent the general case of loops in a sequence of
mountpoints, but addresses a common special case in the same way
as other afs clients.

Reported-by: Jan Henrik Sylvester <jan.henrik.sylvester@uni-hamburg.de>
Link: http://lists.infradead.org/pipermail/linux-afs/2024-May/008454.html
Reported-by: Markus Suvanto <markus.suvanto@gmail.com>
Link: http://lists.infradead.org/pipermail/linux-afs/2024-February/008074.html
Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/768760.1716567475@warthog.procyon.org.uk
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/afs/mntpt.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 97f50e9fd9eb..297487ee8323 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -140,6 +140,11 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
 		put_page(page);
 		if (ret < 0)
 			return ret;
+
+		/* Don't cross a backup volume mountpoint from a backup volume */
+		if (src_as->volume && src_as->volume->type == AFSVL_BACKVOL &&
+		    ctx->type == AFSVL_BACKVOL)
+			return -ENODEV;
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 46ba0e49b64232adac35a2bc892f1710c5b0fb7f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 21 May 2024 09:33:57 -0700
Subject: bpf: fix multi-uprobe PID filtering logic

Current implementation of PID filtering logic for multi-uprobes in
uprobe_prog_run() is filtering down to exact *thread*, while the intent
for PID filtering it to filter by *process* instead. The check in
uprobe_prog_run() also differs from the analogous one in
uprobe_multi_link_filter() for some reason. The latter is correct,
checking task->mm, not the task itself.

Fix the check in uprobe_prog_run() to perform the same task->mm check.

While doing this, we also update get_pid_task() use to use PIDTYPE_TGID
type of lookup, given the intent is to get a representative task of an
entire process. This doesn't change behavior, but seems more logical. It
would hold task group leader task now, not any random thread task.

Last but not least, given multi-uprobe support is half-broken due to
this PID filtering logic (depending on whether PID filtering is
important or not), we need to make it easy for user space consumers
(including libbpf) to easily detect whether PID filtering logic was
already fixed.

We do it here by adding an early check on passed pid parameter. If it's
negative (and so has no chance of being a valid PID), we return -EINVAL.
Previous behavior would eventually return -ESRCH ("No process found"),
given there can't be any process with negative PID. This subtle change
won't make any practical change in behavior, but will allow applications
to detect PID filtering fixes easily. Libbpf fixes take advantage of
this in the next patch.

Cc: stable@vger.kernel.org
Acked-by: Jiri Olsa <jolsa@kernel.org>
Fixes: b733eeade420 ("bpf: Add pid filter support for uprobe_multi link")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240521163401.3005045-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/trace/bpf_trace.c                                   | 8 ++++----
 tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f5154c051d2c..1baaeb9ca205 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3295,7 +3295,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
 	struct bpf_run_ctx *old_run_ctx;
 	int err = 0;
 
-	if (link->task && current != link->task)
+	if (link->task && current->mm != link->task->mm)
 		return 0;
 
 	if (sleepable)
@@ -3396,8 +3396,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
 	upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
 	uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
 	cnt = attr->link_create.uprobe_multi.cnt;
+	pid = attr->link_create.uprobe_multi.pid;
 
-	if (!upath || !uoffsets || !cnt)
+	if (!upath || !uoffsets || !cnt || pid < 0)
 		return -EINVAL;
 	if (cnt > MAX_UPROBE_MULTI_CNT)
 		return -E2BIG;
@@ -3421,10 +3422,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
 		goto error_path_put;
 	}
 
-	pid = attr->link_create.uprobe_multi.pid;
 	if (pid) {
 		rcu_read_lock();
-		task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
+		task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
 		rcu_read_unlock();
 		if (!task) {
 			err = -ESRCH;
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
index 8269cdee33ae..38fda42fd70f 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -397,7 +397,7 @@ static void test_attach_api_fails(void)
 	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
 	if (!ASSERT_ERR(link_fd, "link_fd"))
 		goto cleanup;
-	ASSERT_EQ(link_fd, -ESRCH, "pid_is_wrong");
+	ASSERT_EQ(link_fd, -EINVAL, "pid_is_wrong");
 
 cleanup:
 	if (link_fd >= 0)
-- 
cgit v1.2.3


From 4a8f635a60540888dab3804992e86410360339c8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 21 May 2024 09:33:58 -0700
Subject: bpf: remove unnecessary rcu_read_{lock,unlock}() in multi-uprobe
 attach logic

get_pid_task() internally already calls rcu_read_lock() and
rcu_read_unlock(), so there is no point to do this one extra time.

This is a drive-by improvement and has no correctness implications.

Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240521163401.3005045-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/trace/bpf_trace.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 1baaeb9ca205..6249dac61701 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3423,9 +3423,7 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
 	}
 
 	if (pid) {
-		rcu_read_lock();
 		task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
-		rcu_read_unlock();
 		if (!task) {
 			err = -ESRCH;
 			goto error_path_put;
-- 
cgit v1.2.3


From 04d939a2ab229a3821f04fc81f7c027842f501f1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 21 May 2024 09:33:59 -0700
Subject: libbpf: detect broken PID filtering logic for multi-uprobe

Libbpf is automatically (and transparently to user) detecting
multi-uprobe support in the kernel, and, if supported, uses
multi-uprobes to improve USDT attachment speed.

USDTs can be attached system-wide or for the specific process by PID. In
the latter case, we rely on correct kernel logic of not triggering USDT
for unrelated processes.

As such, on older kernels that do support multi-uprobes, but still have
broken PID filtering logic, we need to fall back to singular uprobes.

Unfortunately, whether user is using PID filtering or not is known at
the attachment time, which happens after relevant BPF programs were
loaded into the kernel. Also unfortunately, we need to make a call
whether to use multi-uprobes or singular uprobe for SEC("usdt") programs
during BPF object load time, at which point we have no information about
possible PID filtering.

The distinction between single and multi-uprobes is small, but important
for the kernel. Multi-uprobes get BPF_TRACE_UPROBE_MULTI attach type,
and kernel internally substitiute different implementation of some of
BPF helpers (e.g., bpf_get_attach_cookie()) depending on whether uprobe
is multi or singular. So, multi-uprobes and singular uprobes cannot be
intermixed.

All the above implies that we have to make an early and conservative
call about the use of multi-uprobes. And so this patch modifies libbpf's
existing feature detector for multi-uprobe support to also check correct
PID filtering. If PID filtering is not yet fixed, we fall back to
singular uprobes for USDTs.

This extension to feature detection is simple thanks to kernel's -EINVAL
addition for pid < 0.

Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240521163401.3005045-4-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/features.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index a336786a22a3..3df0125ed5fa 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -392,11 +392,40 @@ static int probe_uprobe_multi_link(int token_fd)
 	link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
 	err = -errno; /* close() can clobber errno */
 
+	if (link_fd >= 0 || err != -EBADF) {
+		close(link_fd);
+		close(prog_fd);
+		return 0;
+	}
+
+	/* Initial multi-uprobe support in kernel didn't handle PID filtering
+	 * correctly (it was doing thread filtering, not process filtering).
+	 * So now we'll detect if PID filtering logic was fixed, and, if not,
+	 * we'll pretend multi-uprobes are not supported, if not.
+	 * Multi-uprobes are used in USDT attachment logic, and we need to be
+	 * conservative here, because multi-uprobe selection happens early at
+	 * load time, while the use of PID filtering is known late at
+	 * attachment time, at which point it's too late to undo multi-uprobe
+	 * selection.
+	 *
+	 * Creating uprobe with pid == -1 for (invalid) '/' binary will fail
+	 * early with -EINVAL on kernels with fixed PID filtering logic;
+	 * otherwise -ESRCH would be returned if passed correct binary path
+	 * (but we'll just get -BADF, of course).
+	 */
+	link_opts.uprobe_multi.pid = -1; /* invalid PID */
+	link_opts.uprobe_multi.path = "/"; /* invalid path */
+	link_opts.uprobe_multi.offsets = &offset;
+	link_opts.uprobe_multi.cnt = 1;
+
+	link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts);
+	err = -errno; /* close() can clobber errno */
+
 	if (link_fd >= 0)
 		close(link_fd);
 	close(prog_fd);
 
-	return link_fd < 0 && err == -EBADF;
+	return link_fd < 0 && err == -EINVAL;
 }
 
 static int probe_kern_bpf_cookie(int token_fd)
-- 
cgit v1.2.3


From 70342420a1cf1173bdec456e5fa574a804e422db Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 21 May 2024 09:34:00 -0700
Subject: selftests/bpf: extend multi-uprobe tests with child thread case

Extend existing multi-uprobe tests to test that PID filtering works
correctly. We already have child *process* tests, but we need also child
*thread* tests. This patch adds spawn_thread() helper to start child
thread, wait for it to be ready, and then instruct it to trigger desired
uprobes.

Additionally, we extend BPF-side code to track thread ID, not just
process ID. Also we detect whether extraneous triggerings with
unexpected process IDs happened, and validate that none of that happened
in practice.

These changes prove that fixed PID filtering logic for multi-uprobe
works as expected. These tests fail on old kernels.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20240521163401.3005045-5-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/uprobe_multi_test.c   | 107 +++++++++++++++++++--
 tools/testing/selftests/bpf/progs/uprobe_multi.c   |  17 +++-
 2 files changed, 115 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
index 38fda42fd70f..677232d31432 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include <unistd.h>
+#include <pthread.h>
 #include <test_progs.h>
 #include "uprobe_multi.skel.h"
 #include "uprobe_multi_bench.skel.h"
@@ -27,7 +28,10 @@ noinline void uprobe_multi_func_3(void)
 
 struct child {
 	int go[2];
+	int c2p[2]; /* child -> parent channel */
 	int pid;
+	int tid;
+	pthread_t thread;
 };
 
 static void release_child(struct child *child)
@@ -38,6 +42,10 @@ static void release_child(struct child *child)
 		return;
 	close(child->go[1]);
 	close(child->go[0]);
+	if (child->thread)
+		pthread_join(child->thread, NULL);
+	close(child->c2p[0]);
+	close(child->c2p[1]);
 	if (child->pid > 0)
 		waitpid(child->pid, &child_status, 0);
 }
@@ -63,7 +71,7 @@ static struct child *spawn_child(void)
 	if (pipe(child.go))
 		return NULL;
 
-	child.pid = fork();
+	child.pid = child.tid = fork();
 	if (child.pid < 0) {
 		release_child(&child);
 		errno = EINVAL;
@@ -89,6 +97,66 @@ static struct child *spawn_child(void)
 	return &child;
 }
 
+static void *child_thread(void *ctx)
+{
+	struct child *child = ctx;
+	int c = 0, err;
+
+	child->tid = syscall(SYS_gettid);
+
+	/* let parent know we are ready */
+	err = write(child->c2p[1], &c, 1);
+	if (err != 1)
+		pthread_exit(&err);
+
+	/* wait for parent's kick */
+	err = read(child->go[0], &c, 1);
+	if (err != 1)
+		pthread_exit(&err);
+
+	uprobe_multi_func_1();
+	uprobe_multi_func_2();
+	uprobe_multi_func_3();
+
+	err = 0;
+	pthread_exit(&err);
+}
+
+static struct child *spawn_thread(void)
+{
+	static struct child child;
+	int c, err;
+
+	/* pipe to notify child to execute the trigger functions */
+	if (pipe(child.go))
+		return NULL;
+	/* pipe to notify parent that child thread is ready */
+	if (pipe(child.c2p)) {
+		close(child.go[0]);
+		close(child.go[1]);
+		return NULL;
+	}
+
+	child.pid = getpid();
+
+	err = pthread_create(&child.thread, NULL, child_thread, &child);
+	if (err) {
+		err = -errno;
+		close(child.go[0]);
+		close(child.go[1]);
+		close(child.c2p[0]);
+		close(child.c2p[1]);
+		errno = -err;
+		return NULL;
+	}
+
+	err = read(child.c2p[0], &c, 1);
+	if (!ASSERT_EQ(err, 1, "child_thread_ready"))
+		return NULL;
+
+	return &child;
+}
+
 static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child)
 {
 	skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1;
@@ -103,15 +171,22 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child
 	 * passed at the probe attach.
 	 */
 	skel->bss->pid = child ? 0 : getpid();
+	skel->bss->expect_pid = child ? child->pid : 0;
+
+	/* trigger all probes, if we are testing child *process*, just to make
+	 * sure that PID filtering doesn't let through activations from wrong
+	 * PIDs; when we test child *thread*, we don't want to do this to
+	 * avoid double counting number of triggering events
+	 */
+	if (!child || !child->thread) {
+		uprobe_multi_func_1();
+		uprobe_multi_func_2();
+		uprobe_multi_func_3();
+	}
 
 	if (child)
 		kick_child(child);
 
-	/* trigger all probes */
-	uprobe_multi_func_1();
-	uprobe_multi_func_2();
-	uprobe_multi_func_3();
-
 	/*
 	 * There are 2 entry and 2 exit probe called for each uprobe_multi_func_[123]
 	 * function and each slepable probe (6) increments uprobe_multi_sleep_result.
@@ -126,8 +201,12 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child
 
 	ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 6, "uprobe_multi_sleep_result");
 
-	if (child)
+	ASSERT_FALSE(skel->bss->bad_pid_seen, "bad_pid_seen");
+
+	if (child) {
 		ASSERT_EQ(skel->bss->child_pid, child->pid, "uprobe_multi_child_pid");
+		ASSERT_EQ(skel->bss->child_tid, child->tid, "uprobe_multi_child_tid");
+	}
 }
 
 static void test_skel_api(void)
@@ -210,6 +289,13 @@ test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi
 		return;
 
 	__test_attach_api(binary, pattern, opts, child);
+
+	/* pid filter (thread) */
+	child = spawn_thread();
+	if (!ASSERT_OK_PTR(child, "spawn_thread"))
+		return;
+
+	__test_attach_api(binary, pattern, opts, child);
 }
 
 static void test_attach_api_pattern(void)
@@ -495,6 +581,13 @@ static void test_link_api(void)
 		return;
 
 	__test_link_api(child);
+
+	/* pid filter (thread) */
+	child = spawn_thread();
+	if (!ASSERT_OK_PTR(child, "spawn_thread"))
+		return;
+
+	__test_link_api(child);
 }
 
 static void test_bench_attach_uprobe(void)
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi.c b/tools/testing/selftests/bpf/progs/uprobe_multi.c
index 419d9aa28fce..86a7ff5d3726 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi.c
@@ -22,6 +22,10 @@ __u64 uprobe_multi_sleep_result = 0;
 
 int pid = 0;
 int child_pid = 0;
+int child_tid = 0;
+
+int expect_pid = 0;
+bool bad_pid_seen = false;
 
 bool test_cookie = false;
 void *user_ptr = 0;
@@ -36,11 +40,19 @@ static __always_inline bool verify_sleepable_user_copy(void)
 
 static void uprobe_multi_check(void *ctx, bool is_return, bool is_sleep)
 {
-	child_pid = bpf_get_current_pid_tgid() >> 32;
+	__u64 cur_pid_tgid = bpf_get_current_pid_tgid();
+	__u32 cur_pid;
 
-	if (pid && child_pid != pid)
+	cur_pid = cur_pid_tgid >> 32;
+	if (pid && cur_pid != pid)
 		return;
 
+	if (expect_pid && cur_pid != expect_pid)
+		bad_pid_seen = true;
+
+	child_pid = cur_pid_tgid >> 32;
+	child_tid = (__u32)cur_pid_tgid;
+
 	__u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
 	__u64 addr = bpf_get_func_ip(ctx);
 
@@ -97,5 +109,6 @@ int uretprobe_sleep(struct pt_regs *ctx)
 SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*")
 int uprobe_extra(struct pt_regs *ctx)
 {
+	/* we need this one just to mix PID-filtered and global uprobes */
 	return 0;
 }
-- 
cgit v1.2.3


From 198034a87dfeb64d5a8359a5089022c6b923646e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 21 May 2024 09:34:01 -0700
Subject: selftests/bpf: extend multi-uprobe tests with USDTs

Validate libbpf's USDT-over-multi-uprobe logic by adding USDTs to
existing multi-uprobe tests. This checks correct libbpf fallback to
singular uprobes (when run on older kernels with buggy PID filtering).
We reuse already established child process and child thread testing
infrastructure, so additions are minimal. These test fail on either
older kernels or older version of libbpf that doesn't detect PID
filtering problems.

Acked-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240521163401.3005045-6-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/uprobe_multi_test.c   | 25 ++++++++++++++++
 tools/testing/selftests/bpf/progs/uprobe_multi.c   | 33 ++++++++++++++++++++--
 2 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
index 677232d31432..bf6ca8e3eb13 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -8,6 +8,7 @@
 #include "uprobe_multi_usdt.skel.h"
 #include "bpf/libbpf_internal.h"
 #include "testing_helpers.h"
+#include "../sdt.h"
 
 static char test_data[] = "test_data";
 
@@ -26,6 +27,11 @@ noinline void uprobe_multi_func_3(void)
 	asm volatile ("");
 }
 
+noinline void usdt_trigger(void)
+{
+	STAP_PROBE(test, pid_filter_usdt);
+}
+
 struct child {
 	int go[2];
 	int c2p[2]; /* child -> parent channel */
@@ -90,6 +96,7 @@ static struct child *spawn_child(void)
 		uprobe_multi_func_1();
 		uprobe_multi_func_2();
 		uprobe_multi_func_3();
+		usdt_trigger();
 
 		exit(errno);
 	}
@@ -117,6 +124,7 @@ static void *child_thread(void *ctx)
 	uprobe_multi_func_1();
 	uprobe_multi_func_2();
 	uprobe_multi_func_3();
+	usdt_trigger();
 
 	err = 0;
 	pthread_exit(&err);
@@ -182,6 +190,7 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child
 		uprobe_multi_func_1();
 		uprobe_multi_func_2();
 		uprobe_multi_func_3();
+		usdt_trigger();
 	}
 
 	if (child)
@@ -269,8 +278,24 @@ __test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_mul
 	if (!ASSERT_OK_PTR(skel->links.uprobe_extra, "bpf_program__attach_uprobe_multi"))
 		goto cleanup;
 
+	/* Attach (uprobe-backed) USDTs */
+	skel->links.usdt_pid = bpf_program__attach_usdt(skel->progs.usdt_pid, pid, binary,
+							"test", "pid_filter_usdt", NULL);
+	if (!ASSERT_OK_PTR(skel->links.usdt_pid, "attach_usdt_pid"))
+		goto cleanup;
+
+	skel->links.usdt_extra = bpf_program__attach_usdt(skel->progs.usdt_extra, -1, binary,
+							  "test", "pid_filter_usdt", NULL);
+	if (!ASSERT_OK_PTR(skel->links.usdt_extra, "attach_usdt_extra"))
+		goto cleanup;
+
 	uprobe_multi_test_run(skel, child);
 
+	ASSERT_FALSE(skel->bss->bad_pid_seen_usdt, "bad_pid_seen_usdt");
+	if (child) {
+		ASSERT_EQ(skel->bss->child_pid_usdt, child->pid, "usdt_multi_child_pid");
+		ASSERT_EQ(skel->bss->child_tid_usdt, child->tid, "usdt_multi_child_tid");
+	}
 cleanup:
 	uprobe_multi__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi.c b/tools/testing/selftests/bpf/progs/uprobe_multi.c
index 86a7ff5d3726..44190efcdba2 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi.c
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include <stdbool.h>
+#include <bpf/usdt.bpf.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -23,9 +23,12 @@ __u64 uprobe_multi_sleep_result = 0;
 int pid = 0;
 int child_pid = 0;
 int child_tid = 0;
+int child_pid_usdt = 0;
+int child_tid_usdt = 0;
 
 int expect_pid = 0;
 bool bad_pid_seen = false;
+bool bad_pid_seen_usdt = false;
 
 bool test_cookie = false;
 void *user_ptr = 0;
@@ -112,3 +115,29 @@ int uprobe_extra(struct pt_regs *ctx)
 	/* we need this one just to mix PID-filtered and global uprobes */
 	return 0;
 }
+
+SEC("usdt")
+int usdt_pid(struct pt_regs *ctx)
+{
+	__u64 cur_pid_tgid = bpf_get_current_pid_tgid();
+	__u32 cur_pid;
+
+	cur_pid = cur_pid_tgid >> 32;
+	if (pid && cur_pid != pid)
+		return 0;
+
+	if (expect_pid && cur_pid != expect_pid)
+		bad_pid_seen_usdt = true;
+
+	child_pid_usdt = cur_pid_tgid >> 32;
+	child_tid_usdt = (__u32)cur_pid_tgid;
+
+	return 0;
+}
+
+SEC("usdt")
+int usdt_extra(struct pt_regs *ctx)
+{
+	/* we need this one just to mix PID-filtered and global USDT probes */
+	return 0;
+}
-- 
cgit v1.2.3


From dd6a403795f0c7b5c566f86f2ee6b687278d3c1c Mon Sep 17 00:00:00 2001
From: Shahab Vahedi <shahab@synopsys.com>
Date: Sat, 25 May 2024 05:56:28 +0200
Subject: ARC, bpf: Fix issues reported by the static analyzers

Also updated couple of comments along the way.

One of the issues reported was indeed a bug in the code:

  memset(ctx, 0, sizeof(ctx))      // original line
  memset(ctx, 0, sizeof(*ctx))     // fixed line

That was a nice catch.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202405222314.UG5F2NHn-lkp@intel.com/
Closes: https://lore.kernel.org/oe-kbuild-all/202405232036.Xqoc3b0J-lkp@intel.com/
Signed-off-by: Shahab Vahedi <shahab@synopsys.com>
Link: https://lore.kernel.org/r/20240525035628.1026-1-list+bpf@vahedi.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/arc/net/bpf_jit.h       |  2 +-
 arch/arc/net/bpf_jit_arcv2.c | 10 ++++++----
 arch/arc/net/bpf_jit_core.c  | 22 +++++++++++-----------
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/arch/arc/net/bpf_jit.h b/arch/arc/net/bpf_jit.h
index ec44873c42d1..495f3023e4c1 100644
--- a/arch/arc/net/bpf_jit.h
+++ b/arch/arc/net/bpf_jit.h
@@ -39,7 +39,7 @@
 
 /************** Functions that the back-end must provide **************/
 /* Extension for 32-bit operations. */
-inline u8 zext(u8 *buf, u8 rd);
+u8 zext(u8 *buf, u8 rd);
 /***** Moves *****/
 u8 mov_r32(u8 *buf, u8 rd, u8 rs, u8 sign_ext);
 u8 mov_r32_i32(u8 *buf, u8 reg, s32 imm);
diff --git a/arch/arc/net/bpf_jit_arcv2.c b/arch/arc/net/bpf_jit_arcv2.c
index 31bfb6e9ce00..4458e409ca0a 100644
--- a/arch/arc/net/bpf_jit_arcv2.c
+++ b/arch/arc/net/bpf_jit_arcv2.c
@@ -62,7 +62,7 @@ enum {
  *   If/when we decide to add ARCv2 instructions that do use register pairs,
  *   the mapping, hopefully, doesn't need to be revisited.
  */
-const u8 bpf2arc[][2] = {
+static const u8 bpf2arc[][2] = {
 	/* Return value from in-kernel function, and exit value from eBPF */
 	[BPF_REG_0] = {ARC_R_8, ARC_R_9},
 	/* Arguments from eBPF program to in-kernel function */
@@ -1302,7 +1302,7 @@ static u8 arc_b(u8 *buf, s32 offset)
 
 /************* Packers (Deal with BPF_REGs) **************/
 
-inline u8 zext(u8 *buf, u8 rd)
+u8 zext(u8 *buf, u8 rd)
 {
 	if (rd != BPF_REG_FP)
 		return arc_movi_r(buf, REG_HI(rd), 0);
@@ -2235,6 +2235,7 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext)
 			break;
 		default:
 			/* The caller must have handled this. */
+			break;
 		}
 	} else {
 		/*
@@ -2253,6 +2254,7 @@ u8 gen_swap(u8 *buf, u8 rd, u8 size, u8 endian, bool force, bool do_zext)
 			break;
 		default:
 			/* The caller must have handled this. */
+			break;
 		}
 	}
 
@@ -2517,7 +2519,7 @@ u8 arc_epilogue(u8 *buf, u32 usage, u16 frame_size)
 #define JCC64_NR_OF_JMPS 3	/* Number of jumps in jcc64 template. */
 #define JCC64_INSNS_TO_END 3	/* Number of insn. inclusive the 2nd jmp to end. */
 #define JCC64_SKIP_JMP 1	/* Index of the "skip" jump to "end". */
-const struct {
+static const struct {
 	/*
 	 * "jit_off" is common between all "jmp[]" and is coupled with
 	 * "cond" of each "jmp[]" instance. e.g.:
@@ -2883,7 +2885,7 @@ u8 gen_jmp_64(u8 *buf, u8 rd, u8 rs, u8 cond, u32 curr_off, u32 targ_off)
  * The "ARC_CC_SET" becomes "CC_unequal" because of the "tst"
  * instruction that precedes the conditional branch.
  */
-const u8 arcv2_32_jmps[ARC_CC_LAST] = {
+static const u8 arcv2_32_jmps[ARC_CC_LAST] = {
 	[ARC_CC_UGT] = CC_great_u,
 	[ARC_CC_UGE] = CC_great_eq_u,
 	[ARC_CC_ULT] = CC_less_u,
diff --git a/arch/arc/net/bpf_jit_core.c b/arch/arc/net/bpf_jit_core.c
index 6f6b4ffccf2c..e3628922c24a 100644
--- a/arch/arc/net/bpf_jit_core.c
+++ b/arch/arc/net/bpf_jit_core.c
@@ -159,7 +159,7 @@ static void jit_dump(const struct jit_context *ctx)
 /* Initialise the context so there's no garbage. */
 static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
 {
-	memset(ctx, 0, sizeof(ctx));
+	memset(ctx, 0, sizeof(*ctx));
 
 	ctx->orig_prog = prog;
 
@@ -167,7 +167,7 @@ static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
 	ctx->prog = bpf_jit_blind_constants(prog);
 	if (IS_ERR(ctx->prog))
 		return PTR_ERR(ctx->prog);
-	ctx->blinded = (ctx->prog == ctx->orig_prog ? false : true);
+	ctx->blinded = (ctx->prog != ctx->orig_prog);
 
 	/* If the verifier doesn't zero-extend, then we have to do it. */
 	ctx->do_zext = !ctx->prog->aux->verifier_zext;
@@ -1182,12 +1182,12 @@ static int jit_prepare(struct jit_context *ctx)
 }
 
 /*
- * All the "handle_*()" functions have been called before by the
- * "jit_prepare()". If there was an error, we would know by now.
- * Therefore, no extra error checking at this point, other than
- * a sanity check at the end that expects the calculated length
- * (jit.len) to be equal to the length of generated instructions
- * (jit.index).
+ * jit_compile() is the real compilation phase. jit_prepare() is
+ * invoked before jit_compile() as a dry-run to make sure everything
+ * will go OK and allocate the necessary memory.
+ *
+ * In the end, jit_compile() checks if it has produced the same number
+ * of instructions as jit_prepare() would.
  */
 static int jit_compile(struct jit_context *ctx)
 {
@@ -1407,9 +1407,9 @@ static struct bpf_prog *do_extra_pass(struct bpf_prog *prog)
 
 /*
  * This function may be invoked twice for the same stream of BPF
- * instructions. The "extra pass" happens, when there are "call"s
- * involved that their addresses are not known during the first
- * invocation.
+ * instructions. The "extra pass" happens, when there are
+ * (re)locations involved that their addresses are not known
+ * during the first run.
  */
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
-- 
cgit v1.2.3


From d6fe532b7499e4575f9647879b7a34625817fe7f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 24 May 2024 18:36:16 +0200
Subject: netkit: Fix setting mac address in l2 mode

When running Cilium connectivity test suite with netkit in L2 mode, we
found that it is expected to be able to specify a custom MAC address for
the devices, in particular, cilium-cni obtains the specified MAC address
by querying the endpoint and sets the MAC address of the interface inside
the Pod. Thus, fix the missing support in netkit for L2 mode.

Fixes: 35dfaad7188c ("netkit, bpf: Add bpf programmable net device")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20240524163619.26001-1-daniel@iogearbox.net
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/netkit.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index a4d2e76a8d58..272894053e2c 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -155,6 +155,16 @@ static void netkit_set_multicast(struct net_device *dev)
 	/* Nothing to do, we receive whatever gets pushed to us! */
 }
 
+static int netkit_set_macaddr(struct net_device *dev, void *sa)
+{
+	struct netkit *nk = netkit_priv(dev);
+
+	if (nk->mode != NETKIT_L2)
+		return -EOPNOTSUPP;
+
+	return eth_mac_addr(dev, sa);
+}
+
 static void netkit_set_headroom(struct net_device *dev, int headroom)
 {
 	struct netkit *nk = netkit_priv(dev), *nk2;
@@ -198,6 +208,7 @@ static const struct net_device_ops netkit_netdev_ops = {
 	.ndo_start_xmit		= netkit_xmit,
 	.ndo_set_rx_mode	= netkit_set_multicast,
 	.ndo_set_rx_headroom	= netkit_set_headroom,
+	.ndo_set_mac_address	= netkit_set_macaddr,
 	.ndo_get_iflink		= netkit_get_iflink,
 	.ndo_get_peer_dev	= netkit_peer_dev,
 	.ndo_get_stats64	= netkit_get_stats,
@@ -300,9 +311,11 @@ static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
 
 	if (!attr)
 		return 0;
-	NL_SET_ERR_MSG_ATTR(extack, attr,
-			    "Setting Ethernet address is not supported");
-	return -EOPNOTSUPP;
+	if (nla_len(attr) != ETH_ALEN)
+		return -EINVAL;
+	if (!is_valid_ether_addr(nla_data(attr)))
+		return -EADDRNOTAVAIL;
+	return 0;
 }
 
 static struct rtnl_link_ops netkit_link_ops;
@@ -365,6 +378,9 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
 		strscpy(ifname, "nk%d", IFNAMSIZ);
 		ifname_assign_type = NET_NAME_ENUM;
 	}
+	if (mode != NETKIT_L2 &&
+	    (tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS]))
+		return -EOPNOTSUPP;
 
 	net = rtnl_link_get_net(src_net, tbp);
 	if (IS_ERR(net))
@@ -379,7 +395,7 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
 
 	netif_inherit_tso_max(peer, dev);
 
-	if (mode == NETKIT_L2)
+	if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
 		eth_hw_addr_random(peer);
 	if (ifmp && dev->ifindex)
 		peer->ifindex = ifmp->ifi_index;
@@ -402,7 +418,7 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
 	if (err < 0)
 		goto err_configure_peer;
 
-	if (mode == NETKIT_L2)
+	if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS])
 		eth_hw_addr_random(dev);
 	if (tb[IFLA_IFNAME])
 		nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
-- 
cgit v1.2.3


From 3998d184267dfcff858aaa84d3de17429253629d Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 24 May 2024 18:36:17 +0200
Subject: netkit: Fix pkt_type override upon netkit pass verdict

When running Cilium connectivity test suite with netkit in L2 mode, we
found that compared to tcx a few tests were failing which pushed traffic
into an L7 proxy sitting in host namespace. The problem in particular is
around the invocation of eth_type_trans() in netkit.

In case of tcx, this is run before the tcx ingress is triggered inside
host namespace and thus if the BPF program uses the bpf_skb_change_type()
helper the newly set type is retained. However, in case of netkit, the
late eth_type_trans() invocation overrides the earlier decision from the
BPF program which eventually leads to the test failure.

Instead of eth_type_trans(), split out the relevant parts, meaning, reset
of mac header and call to eth_skb_pkt_type() before the BPF program is run
in order to have the same behavior as with tcx, and refactor a small helper
called eth_skb_pull_mac() which is run in case it's passed up the stack
where the mac header must be pulled. With this all connectivity tests pass.

Fixes: 35dfaad7188c ("netkit, bpf: Add bpf programmable net device")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://lore.kernel.org/r/20240524163619.26001-2-daniel@iogearbox.net
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/net/netkit.c        | 4 +++-
 include/linux/etherdevice.h | 8 ++++++++
 net/ethernet/eth.c          | 4 +---
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index 272894053e2c..16789cd446e9 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -55,6 +55,7 @@ static void netkit_prep_forward(struct sk_buff *skb, bool xnet)
 	skb_scrub_packet(skb, xnet);
 	skb->priority = 0;
 	nf_skip_egress(skb, true);
+	skb_reset_mac_header(skb);
 }
 
 static struct netkit *netkit_priv(const struct net_device *dev)
@@ -78,6 +79,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
 		     skb_orphan_frags(skb, GFP_ATOMIC)))
 		goto drop;
 	netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)));
+	eth_skb_pkt_type(skb, peer);
 	skb->dev = peer;
 	entry = rcu_dereference(nk->active);
 	if (entry)
@@ -85,7 +87,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
 	switch (ret) {
 	case NETKIT_NEXT:
 	case NETKIT_PASS:
-		skb->protocol = eth_type_trans(skb, skb->dev);
+		eth_skb_pull_mac(skb);
 		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 		if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
 			dev_sw_netstats_tx_add(dev, 1, len);
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 2ad1ffa4ccb9..0ed47d00549b 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -636,6 +636,14 @@ static inline void eth_skb_pkt_type(struct sk_buff *skb,
 	}
 }
 
+static inline struct ethhdr *eth_skb_pull_mac(struct sk_buff *skb)
+{
+	struct ethhdr *eth = (struct ethhdr *)skb->data;
+
+	skb_pull_inline(skb, ETH_HLEN);
+	return eth;
+}
+
 /**
  * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame
  * @skb: Buffer to pad
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 049c3adeb850..4e3651101b86 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -161,9 +161,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
 
-	eth = (struct ethhdr *)skb->data;
-	skb_pull_inline(skb, ETH_HLEN);
-
+	eth = eth_skb_pull_mac(skb);
 	eth_skb_pkt_type(skb, dev);
 
 	/*
-- 
cgit v1.2.3


From 998ffeb2738e26f134dc8e63b5dcaece22573957 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 24 May 2024 18:36:18 +0200
Subject: selftests/bpf: Add netkit tests for mac address

This adds simple tests around setting MAC addresses in the different
netkit modes.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20240524163619.26001-3-daniel@iogearbox.net
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/tc_netkit.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
index 15ee7b2fc410..18b2e969a456 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
@@ -73,6 +73,16 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
 			 "up primary");
 	ASSERT_OK(system("ip addr add dev " netkit_name " 10.0.0.1/24"),
 			 "addr primary");
+
+	if (mode == NETKIT_L3) {
+		ASSERT_EQ(system("ip link set dev " netkit_name
+				 " addr ee:ff:bb:cc:aa:dd 2> /dev/null"), 512,
+				 "set hwaddress");
+	} else {
+		ASSERT_OK(system("ip link set dev " netkit_name
+				 " addr ee:ff:bb:cc:aa:dd"),
+				 "set hwaddress");
+	}
 	if (same_netns) {
 		ASSERT_OK(system("ip link set dev " netkit_peer " up"),
 				 "up peer");
-- 
cgit v1.2.3


From 95348e463eabc803341c67d562f9e0a5f0a48fe6 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 24 May 2024 18:36:19 +0200
Subject: selftests/bpf: Add netkit test for pkt_type

Add a test case to assert that the skb->pkt_type which was set from the BPF
program is retained from the netkit xmit side to the peer's device at tcx
ingress location.

  # ./vmtest.sh -- ./test_progs -t netkit
  [...]
  ./test_progs -t netkit
  [    1.140780] bpf_testmod: loading out-of-tree module taints kernel.
  [    1.141127] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel
  [    1.284601] tsc: Refined TSC clocksource calibration: 3408.006 MHz
  [    1.286672] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x311fd9b189d, max_idle_ns: 440795225691 ns
  [    1.290384] clocksource: Switched to clocksource tsc
  #345     tc_netkit_basic:OK
  #346     tc_netkit_device:OK
  #347     tc_netkit_multi_links:OK
  #348     tc_netkit_multi_opts:OK
  #349     tc_netkit_neigh_links:OK
  #350     tc_netkit_pkt_type:OK
  Summary: 6/0 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20240524163619.26001-4-daniel@iogearbox.net
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/tc_netkit.c | 84 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_tc_link.c   | 35 ++++++++-
 2 files changed, 118 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
index 18b2e969a456..b9135720024c 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
@@ -99,6 +99,16 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
 	return err;
 }
 
+static void move_netkit(void)
+{
+	ASSERT_OK(system("ip link set " netkit_peer " netns foo"),
+			 "move peer");
+	ASSERT_OK(system("ip netns exec foo ip link set dev "
+			 netkit_peer " up"), "up peer");
+	ASSERT_OK(system("ip netns exec foo ip addr add dev "
+			 netkit_peer " 10.0.0.2/24"), "addr peer");
+}
+
 static void destroy_netkit(void)
 {
 	ASSERT_OK(system("ip link del dev " netkit_name), "del primary");
@@ -695,3 +705,77 @@ void serial_test_tc_netkit_neigh_links(void)
 	serial_test_tc_netkit_neigh_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
 	serial_test_tc_netkit_neigh_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
 }
+
+static void serial_test_tc_netkit_pkt_type_mode(int mode)
+{
+	LIBBPF_OPTS(bpf_netkit_opts, optl_nk);
+	LIBBPF_OPTS(bpf_tcx_opts, optl_tcx);
+	int err, ifindex, ifindex2;
+	struct test_tc_link *skel;
+	struct bpf_link *link;
+
+	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+			    &ifindex, true);
+	if (err)
+		return;
+
+	ifindex2 = if_nametoindex(netkit_peer);
+	ASSERT_NEQ(ifindex, ifindex2, "ifindex_1_2");
+
+	skel = test_tc_link__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		goto cleanup;
+
+	ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+		  BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+	ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc7,
+		  BPF_TCX_INGRESS), 0, "tc7_attach_type");
+
+	err = test_tc_link__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	assert_mprog_count_ifindex(ifindex,  BPF_NETKIT_PRIMARY, 0);
+	assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 0);
+
+	link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl_nk);
+	if (!ASSERT_OK_PTR(link, "link_attach"))
+		goto cleanup;
+
+	skel->links.tc1 = link;
+
+	assert_mprog_count_ifindex(ifindex,  BPF_NETKIT_PRIMARY, 1);
+	assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 0);
+
+	link = bpf_program__attach_tcx(skel->progs.tc7, ifindex2, &optl_tcx);
+	if (!ASSERT_OK_PTR(link, "link_attach"))
+		goto cleanup;
+
+	skel->links.tc7 = link;
+
+	assert_mprog_count_ifindex(ifindex,  BPF_NETKIT_PRIMARY, 1);
+	assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 1);
+
+	move_netkit();
+
+	tc_skel_reset_all_seen(skel);
+	skel->bss->set_type = true;
+	ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+	ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+	ASSERT_EQ(skel->bss->seen_tc7, true, "seen_tc7");
+
+	ASSERT_EQ(skel->bss->seen_host,  true, "seen_host");
+	ASSERT_EQ(skel->bss->seen_mcast, true, "seen_mcast");
+cleanup:
+	test_tc_link__destroy(skel);
+
+	assert_mprog_count_ifindex(ifindex,  BPF_NETKIT_PRIMARY, 0);
+	destroy_netkit();
+}
+
+void serial_test_tc_netkit_pkt_type(void)
+{
+	serial_test_tc_netkit_pkt_type_mode(NETKIT_L2);
+	serial_test_tc_netkit_pkt_type_mode(NETKIT_L3);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c
index 992400acb957..ab3eae3d6af8 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_link.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_link.c
@@ -4,7 +4,8 @@
 
 #include <linux/bpf.h>
 #include <linux/if_ether.h>
-
+#include <linux/stddef.h>
+#include <linux/if_packet.h>
 #include <bpf/bpf_endian.h>
 #include <bpf/bpf_helpers.h>
 
@@ -16,7 +17,13 @@ bool seen_tc3;
 bool seen_tc4;
 bool seen_tc5;
 bool seen_tc6;
+bool seen_tc7;
+
+bool set_type;
+
 bool seen_eth;
+bool seen_host;
+bool seen_mcast;
 
 SEC("tc/ingress")
 int tc1(struct __sk_buff *skb)
@@ -28,8 +35,16 @@ int tc1(struct __sk_buff *skb)
 	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)))
 		goto out;
 	seen_eth = eth.h_proto == bpf_htons(ETH_P_IP);
+	seen_host = skb->pkt_type == PACKET_HOST;
+	if (seen_host && set_type) {
+		eth.h_dest[0] = 4;
+		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0))
+			goto fail;
+		bpf_skb_change_type(skb, PACKET_MULTICAST);
+	}
 out:
 	seen_tc1 = true;
+fail:
 	return TCX_NEXT;
 }
 
@@ -67,3 +82,21 @@ int tc6(struct __sk_buff *skb)
 	seen_tc6 = true;
 	return TCX_PASS;
 }
+
+SEC("tc/ingress")
+int tc7(struct __sk_buff *skb)
+{
+	struct ethhdr eth = {};
+
+	if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+		goto out;
+	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)))
+		goto out;
+	if (eth.h_dest[0] == 4 && set_type) {
+		seen_mcast = skb->pkt_type == PACKET_MULTICAST;
+		bpf_skb_change_type(skb, PACKET_HOST);
+	}
+out:
+	seen_tc7 = true;
+	return TCX_PASS;
+}
-- 
cgit v1.2.3


From 67ec8cdf29971677b2fb4b6d92871eb5d5e95597 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 22 May 2024 13:37:54 +0800
Subject: hwrng: core - Remove add_early_randomness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A potential deadlock was reported with the config file at

https://web.archive.org/web/20240522052129/https://0x0.st/XPN_.txt

In this particular configuration, the deadlock doesn't exist because
the warning triggered at a point before modules were even available.
However, the deadlock can be real because any module loaded would
invoke async_synchronize_full.

The issue is spurious for software crypto algorithms which aren't
themselves involved in async probing.  However, it would be hard to
avoid for a PCI crypto driver using async probing.

In this particular call trace, the problem is easily avoided because
the only reason the module is being requested during probing is the
add_early_randomness call in the hwrng core.  This feature is
vestigial since there is now a kernel thread dedicated to doing
exactly this.

So remove add_early_randomness as it is no longer needed.

Reported-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Reported-by: Eric Biggers <ebiggers@kernel.org>
Fixes: 1b6d7f9eb150 ("tpm: add session encryption protection to tpm2_get_random()")
Link: https://lore.kernel.org/r/119dc5ed-f159-41be-9dda-1a056f29888d@notapiano/
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 47 ++++---------------------------------------
 1 file changed, 4 insertions(+), 43 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index f5c71a617a99..4084df65c9fa 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -64,19 +64,6 @@ static size_t rng_buffer_size(void)
 	return RNG_BUFFER_SIZE;
 }
 
-static void add_early_randomness(struct hwrng *rng)
-{
-	int bytes_read;
-
-	mutex_lock(&reading_mutex);
-	bytes_read = rng_get_data(rng, rng_fillbuf, 32, 0);
-	mutex_unlock(&reading_mutex);
-	if (bytes_read > 0) {
-		size_t entropy = bytes_read * 8 * rng->quality / 1024;
-		add_hwgenerator_randomness(rng_fillbuf, bytes_read, entropy, false);
-	}
-}
-
 static inline void cleanup_rng(struct kref *kref)
 {
 	struct hwrng *rng = container_of(kref, struct hwrng, ref);
@@ -340,13 +327,12 @@ static ssize_t rng_current_store(struct device *dev,
 				 const char *buf, size_t len)
 {
 	int err;
-	struct hwrng *rng, *old_rng, *new_rng;
+	struct hwrng *rng, *new_rng;
 
 	err = mutex_lock_interruptible(&rng_mutex);
 	if (err)
 		return -ERESTARTSYS;
 
-	old_rng = current_rng;
 	if (sysfs_streq(buf, "")) {
 		err = enable_best_rng();
 	} else {
@@ -362,11 +348,8 @@ static ssize_t rng_current_store(struct device *dev,
 	new_rng = get_current_rng_nolock();
 	mutex_unlock(&rng_mutex);
 
-	if (new_rng) {
-		if (new_rng != old_rng)
-			add_early_randomness(new_rng);
+	if (new_rng)
 		put_rng(new_rng);
-	}
 
 	return err ? : len;
 }
@@ -544,7 +527,6 @@ int hwrng_register(struct hwrng *rng)
 {
 	int err = -EINVAL;
 	struct hwrng *tmp;
-	bool is_new_current = false;
 
 	if (!rng->name || (!rng->data_read && !rng->read))
 		goto out;
@@ -573,25 +555,8 @@ int hwrng_register(struct hwrng *rng)
 		err = set_current_rng(rng);
 		if (err)
 			goto out_unlock;
-		/* to use current_rng in add_early_randomness() we need
-		 * to take a ref
-		 */
-		is_new_current = true;
-		kref_get(&rng->ref);
 	}
 	mutex_unlock(&rng_mutex);
-	if (is_new_current || !rng->init) {
-		/*
-		 * Use a new device's input to add some randomness to
-		 * the system.  If this rng device isn't going to be
-		 * used right away, its init function hasn't been
-		 * called yet by set_current_rng(); so only use the
-		 * randomness from devices that don't need an init callback
-		 */
-		add_early_randomness(rng);
-	}
-	if (is_new_current)
-		put_rng(rng);
 	return 0;
 out_unlock:
 	mutex_unlock(&rng_mutex);
@@ -602,12 +567,11 @@ EXPORT_SYMBOL_GPL(hwrng_register);
 
 void hwrng_unregister(struct hwrng *rng)
 {
-	struct hwrng *old_rng, *new_rng;
+	struct hwrng *new_rng;
 	int err;
 
 	mutex_lock(&rng_mutex);
 
-	old_rng = current_rng;
 	list_del(&rng->list);
 	complete_all(&rng->dying);
 	if (current_rng == rng) {
@@ -626,11 +590,8 @@ void hwrng_unregister(struct hwrng *rng)
 	} else
 		mutex_unlock(&rng_mutex);
 
-	if (new_rng) {
-		if (old_rng != new_rng)
-			add_early_randomness(new_rng);
+	if (new_rng)
 		put_rng(new_rng);
-	}
 
 	wait_for_completion(&rng->cleanup_done);
 }
-- 
cgit v1.2.3


From d509cadc3a48fee394d68757e4b685f7c143ed64 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sat, 25 May 2024 00:53:25 -0400
Subject: bcachefs: Fix debug assert

Reported-by: syzbot+a8074a75b8d73328751e@syzkaller.appspotmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 2206a8dee693..df2bea38e83f 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -564,7 +564,7 @@ static void __bch2_fs_free(struct bch_fs *c)
 	BUG_ON(atomic_read(&c->journal_keys.ref));
 	bch2_fs_btree_write_buffer_exit(c);
 	percpu_free_rwsem(&c->mark_lock);
-	EBUG_ON(percpu_u64_get(c->online_reserved));
+	EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved));
 	free_percpu(c->online_reserved);
 
 	darray_exit(&c->btree_roots_extra);
-- 
cgit v1.2.3


From 9242a34b760648b722f4958749ad83ef7d0f7525 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sat, 25 May 2024 12:38:53 -0400
Subject: bcachefs: Fix sb-downgrade validation

Superblock downgrade entries are only two byte aligned, but section
sizes are 8 byte aligned, which means we have to be careful about
overrun checks; an entry that crosses the end of the section is allowed
(and ignored) as long as it has zero errors.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/sb-downgrade.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index 390a1bbd2567..3fb23e399ffb 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -146,10 +146,17 @@ static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
 	for (const struct bch_sb_field_downgrade_entry *i = e->entries;
 	     (void *) i	< vstruct_end(&e->field);
 	     i = downgrade_entry_next_c(i)) {
+		/*
+		 * Careful: sb_field_downgrade_entry is only 2 byte aligned, but
+		 * section sizes are 8 byte aligned - an empty entry spanning
+		 * the end of the section is allowed (and ignored):
+		 */
+		if ((void *) &i->errors[0] > vstruct_end(&e->field))
+			break;
+
 		if (flags & BCH_VALIDATE_write &&
-		    ((void *) &i->errors[0] > vstruct_end(&e->field) ||
-		     (void *) downgrade_entry_next_c(i) > vstruct_end(&e->field))) {
-			prt_printf(err, "downgrade entry overruns end of superblock section)");
+		    (void *) downgrade_entry_next_c(i) > vstruct_end(&e->field)) {
+			prt_printf(err, "downgrade entry overruns end of superblock section");
 			return -BCH_ERR_invalid_sb_downgrade;
 		}
 
-- 
cgit v1.2.3


From f94b77709e82242c1101e59a90a7807455c4ab2a Mon Sep 17 00:00:00 2001
From: Jeff Johnson <quic_jjohnson@quicinc.com>
Date: Thu, 23 May 2024 16:22:34 -0700
Subject: firewire: add missing MODULE_DESCRIPTION() to test modules

Fix the 'make W=1' warnings:
WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/firewire/uapi-test.o
WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/firewire/packet-serdes-test.o

Signed-off-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Link: https://lore.kernel.org/r/20240523-md-firewire-uapi-test-v1-1-6be5adcc3aed@quicinc.com
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
---
 drivers/firewire/packet-serdes-test.c | 1 +
 drivers/firewire/uapi-test.c          | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/firewire/packet-serdes-test.c b/drivers/firewire/packet-serdes-test.c
index f93c966e794d..e83b1fece780 100644
--- a/drivers/firewire/packet-serdes-test.c
+++ b/drivers/firewire/packet-serdes-test.c
@@ -579,4 +579,5 @@ static struct kunit_suite packet_serdes_test_suite = {
 };
 kunit_test_suite(packet_serdes_test_suite);
 
+MODULE_DESCRIPTION("FireWire packet serialization/deserialization unit test suite");
 MODULE_LICENSE("GPL");
diff --git a/drivers/firewire/uapi-test.c b/drivers/firewire/uapi-test.c
index 2fcbede4fab1..bc3f10a2e516 100644
--- a/drivers/firewire/uapi-test.c
+++ b/drivers/firewire/uapi-test.c
@@ -86,4 +86,5 @@ static struct kunit_suite structure_layout_test_suite = {
 };
 kunit_test_suite(structure_layout_test_suite);
 
+MODULE_DESCRIPTION("FireWire UAPI unit test suite");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 611b7eb19d0a305d4de00280e4a71a1b15c507fc Mon Sep 17 00:00:00 2001
From: Jim Wylder <jwylder@google.com>
Date: Thu, 23 May 2024 16:14:36 -0500
Subject: regmap-i2c: Subtract reg size from max_write

Currently, when an adapter defines a max_write_len quirk,
the data will be chunked into data sizes equal to the
max_write_len quirk value.  But the payload will be increased by
the size of the register address before transmission.  The
resulting value always ends up larger than the limit set
by the quirk.

Avoid this error by setting regmap's max_write to the quirk's
max_write_len minus the number of bytes for the register and
padding.  This allows the chunking to work correctly for this
limited case without impacting other use-cases.

Signed-off-by: Jim Wylder <jwylder@google.com>
Link: https://msgid.link/r/20240523211437.2839942-1-jwylder@google.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-i2c.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c
index 3ec611dc0c09..a905e955bbfc 100644
--- a/drivers/base/regmap/regmap-i2c.c
+++ b/drivers/base/regmap/regmap-i2c.c
@@ -350,7 +350,8 @@ static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
 
 		if (quirks->max_write_len &&
 		    (bus->max_raw_write == 0 || bus->max_raw_write > quirks->max_write_len))
-			max_write = quirks->max_write_len;
+			max_write = quirks->max_write_len -
+				(config->reg_bits + config->pad_bits) / BITS_PER_BYTE;
 
 		if (max_read || max_write) {
 			ret_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL);
-- 
cgit v1.2.3


From b82b6eeefd30f1ff049bff54da419a30ad9354c0 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 24 May 2024 20:46:25 +0100
Subject: bcachefs: Use copy_folio_from_iter_atomic()

copy_page_from_iter_atomic() will be removed at some point.
Also fixup a comment for folios.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/fs-io-buffered.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index 6b69e5cd68dd..54873ecc635c 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -437,8 +437,8 @@ static void bch2_writepage_io_done(struct bch_write_op *op)
 	 */
 
 	/*
-	 * PageWriteback is effectively our ref on the inode - fixup i_blocks
-	 * before calling end_page_writeback:
+	 * The writeback flag is effectively our ref on the inode -
+	 * fixup i_blocks before calling folio_end_writeback:
 	 */
 	bch2_i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta);
 
@@ -898,7 +898,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
 	darray_for_each(fs, fi) {
 		f = *fi;
 		f_len = min(end, folio_end_pos(f)) - f_pos;
-		f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter);
+		f_copied = copy_folio_from_iter_atomic(f, f_offset, f_len, iter);
 		if (!f_copied) {
 			folios_trunc(&fs, fi);
 			break;
-- 
cgit v1.2.3


From 9ee267a29309233b9ef8f58ee61e0b1c9b5879e8 Mon Sep 17 00:00:00 2001
From: Jeff Johnson <quic_jjohnson@quicinc.com>
Date: Sun, 26 May 2024 09:52:48 -0700
Subject: fs: smb: common: add missing MODULE_DESCRIPTION() macros

Fix the 'make W=1' warnings:
WARNING: modpost: missing MODULE_DESCRIPTION() in fs/smb/common/cifs_arc4.o
WARNING: modpost: missing MODULE_DESCRIPTION() in fs/smb/common/cifs_md4.o

Signed-off-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/common/cifs_arc4.c | 1 +
 fs/smb/common/cifs_md4.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/fs/smb/common/cifs_arc4.c b/fs/smb/common/cifs_arc4.c
index 043e4cb839fa..df360ca47826 100644
--- a/fs/smb/common/cifs_arc4.c
+++ b/fs/smb/common/cifs_arc4.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include "arc4.h"
 
+MODULE_DESCRIPTION("ARC4 Cipher Algorithm");
 MODULE_LICENSE("GPL");
 
 int cifs_arc4_setkey(struct arc4_ctx *ctx, const u8 *in_key, unsigned int key_len)
diff --git a/fs/smb/common/cifs_md4.c b/fs/smb/common/cifs_md4.c
index 50f78cfc6ce9..7ee7f4dad90c 100644
--- a/fs/smb/common/cifs_md4.c
+++ b/fs/smb/common/cifs_md4.c
@@ -24,6 +24,7 @@
 #include <asm/byteorder.h>
 #include "md4.h"
 
+MODULE_DESCRIPTION("MD4 Message Digest Algorithm (RFC1320)");
 MODULE_LICENSE("GPL");
 
 static inline u32 lshift(u32 x, unsigned int s)
-- 
cgit v1.2.3


From 495000a38634e640e2fd02f7e4f1512ccc92d770 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 24 May 2024 17:11:46 +0200
Subject: ALSA: core: Remove debugfs at disconnection

The card-specific debugfs entries are removed at the last stage of
card free phase, and it's performed after synchronization of the
closes of all opened fds.  This works fine for most cases, but it can
be potentially problematic for a hotplug device like USB-audio.  Due
to the nature of snd_card_free_when_closed(), the card free isn't
called immediately after the driver removal for a hotplug device, but
it's left until the last fd is closed.  It implies that the card
debugfs entries also remain.  Meanwhile, when a new device is inserted
before the last close and the very same card slot is assigned, the
driver tries to create the card debugfs root again on the very same
path.  This conflicts with the remaining entry, and results in the
kernel warning such as:
  debugfs: Directory 'card0' with parent 'sound' already present!
with the missing debugfs entry afterwards.

For avoiding such conflicts, remove debugfs entries at the device
disconnection phase instead.  The jack kctl debugfs entries get
removed in snd_jack_dev_disconnect() instead of each kctl
private_free.

Fixes: 2d670ea2bd53 ("ALSA: jack: implement software jack injection via debugfs")
Link: https://lore.kernel.org/r/20240524151256.32521-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/init.c |  9 +++++----
 sound/core/jack.c | 21 ++++++++++++++-------
 2 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/sound/core/init.c b/sound/core/init.c
index 4e52bbe32786..b9b708cf980d 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -537,6 +537,11 @@ void snd_card_disconnect(struct snd_card *card)
 		synchronize_irq(card->sync_irq);
 
 	snd_info_card_disconnect(card);
+#ifdef CONFIG_SND_DEBUG
+	debugfs_remove(card->debugfs_root);
+	card->debugfs_root = NULL;
+#endif
+
 	if (card->registered) {
 		device_del(&card->card_dev);
 		card->registered = false;
@@ -586,10 +591,6 @@ static int snd_card_do_free(struct snd_card *card)
 		dev_warn(card->dev, "unable to free card info\n");
 		/* Not fatal error */
 	}
-#ifdef CONFIG_SND_DEBUG
-	debugfs_remove(card->debugfs_root);
-	card->debugfs_root = NULL;
-#endif
 	if (card->release_completion)
 		complete(card->release_completion);
 	if (!card->managed)
diff --git a/sound/core/jack.c b/sound/core/jack.c
index e08b2c4fbd1a..e4bcecdf89b7 100644
--- a/sound/core/jack.c
+++ b/sound/core/jack.c
@@ -37,11 +37,15 @@ static const int jack_switch_types[SND_JACK_SWITCH_TYPES] = {
 };
 #endif /* CONFIG_SND_JACK_INPUT_DEV */
 
+static void snd_jack_remove_debugfs(struct snd_jack *jack);
+
 static int snd_jack_dev_disconnect(struct snd_device *device)
 {
-#ifdef CONFIG_SND_JACK_INPUT_DEV
 	struct snd_jack *jack = device->device_data;
 
+	snd_jack_remove_debugfs(jack);
+
+#ifdef CONFIG_SND_JACK_INPUT_DEV
 	guard(mutex)(&jack->input_dev_lock);
 	if (!jack->input_dev)
 		return 0;
@@ -381,10 +385,14 @@ static int snd_jack_debugfs_add_inject_node(struct snd_jack *jack,
 	return 0;
 }
 
-static void snd_jack_debugfs_clear_inject_node(struct snd_jack_kctl *jack_kctl)
+static void snd_jack_remove_debugfs(struct snd_jack *jack)
 {
-	debugfs_remove(jack_kctl->jack_debugfs_root);
-	jack_kctl->jack_debugfs_root = NULL;
+	struct snd_jack_kctl *jack_kctl;
+
+	list_for_each_entry(jack_kctl, &jack->kctl_list, list) {
+		debugfs_remove(jack_kctl->jack_debugfs_root);
+		jack_kctl->jack_debugfs_root = NULL;
+	}
 }
 #else /* CONFIG_SND_JACK_INJECTION_DEBUG */
 static int snd_jack_debugfs_add_inject_node(struct snd_jack *jack,
@@ -393,7 +401,7 @@ static int snd_jack_debugfs_add_inject_node(struct snd_jack *jack,
 	return 0;
 }
 
-static void snd_jack_debugfs_clear_inject_node(struct snd_jack_kctl *jack_kctl)
+static void snd_jack_remove_debugfs(struct snd_jack *jack)
 {
 }
 #endif /* CONFIG_SND_JACK_INJECTION_DEBUG */
@@ -404,7 +412,6 @@ static void snd_jack_kctl_private_free(struct snd_kcontrol *kctl)
 
 	jack_kctl = kctl->private_data;
 	if (jack_kctl) {
-		snd_jack_debugfs_clear_inject_node(jack_kctl);
 		list_del(&jack_kctl->list);
 		kfree(jack_kctl);
 	}
@@ -497,8 +504,8 @@ int snd_jack_new(struct snd_card *card, const char *id, int type,
 		.dev_free = snd_jack_dev_free,
 #ifdef CONFIG_SND_JACK_INPUT_DEV
 		.dev_register = snd_jack_dev_register,
-		.dev_disconnect = snd_jack_dev_disconnect,
 #endif /* CONFIG_SND_JACK_INPUT_DEV */
+		.dev_disconnect = snd_jack_dev_disconnect,
 	};
 
 	if (initial_kctl) {
-- 
cgit v1.2.3


From 2be46155d792d629e8fe3188c2cde176833afe36 Mon Sep 17 00:00:00 2001
From: "Luke D. Jones" <luke@ljones.dev>
Date: Sun, 26 May 2024 21:10:32 +1200
Subject: ALSA: hda/realtek: Adjust G814JZR to use SPI init for amp

The 2024 ASUS ROG G814J model is much the same as the 2023 model
and the 2023 16" version. We can use the same Cirrus Amp quirk.

Fixes: 811dd426a9b1 ("ALSA: hda/realtek: Add quirks for Asus ROG 2024 laptops using CS35L41")
Signed-off-by: Luke D. Jones <luke@ljones.dev>
Link: https://lore.kernel.org/r/20240526091032.114545-1-luke@ljones.dev
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index e3c0b9d5552d..aa76d1c88589 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10310,7 +10310,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2),
 	SND_PCI_QUIRK(0x1043, 0x3a20, "ASUS G614JZR", ALC245_FIXUP_CS35L41_SPI_2),
 	SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
-	SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC245_FIXUP_CS35L41_SPI_2),
+	SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
 	SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2),
 	SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
 	SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
-- 
cgit v1.2.3


From 9e2f46cd87473c70d01fcaf8a559809e6d18dd50 Mon Sep 17 00:00:00 2001
From: Jason Nader <dev@kayoway.com>
Date: Tue, 21 May 2024 22:36:24 +0900
Subject: ata: ahci: Do not apply Intel PCS quirk on Intel Alder Lake

Commit b8b8b4e0c052 ("ata: ahci: Add Intel Alder Lake-P AHCI controller
to low power chipsets list") added Intel Alder Lake to the ahci_pci_tbl.

Because of the way that the Intel PCS quirk was implemented, having
an explicit entry in the ahci_pci_tbl caused the Intel PCS quirk to
be applied. (The quirk was not being applied if there was no explict
entry.)

Thus, entries that were added to the ahci_pci_tbl also got the Intel
PCS quirk applied.

The quirk was cleaned up in commit 7edbb6059274 ("ahci: clean up
intel_pcs_quirk"), such that it is clear which entries that actually
applies the Intel PCS quirk.

Newer Intel AHCI controllers do not need the Intel PCS quirk,
and applying it when not needed actually breaks some platforms.

Do not apply the Intel PCS quirk for Intel Alder Lake.
This is in line with how things worked before commit b8b8b4e0c052 ("ata:
ahci: Add Intel Alder Lake-P AHCI controller to low power chipsets list"),
such that certain platforms using Intel Alder Lake will work once again.

Cc: stable@vger.kernel.org # 6.7
Fixes: b8b8b4e0c052 ("ata: ahci: Add Intel Alder Lake-P AHCI controller to low power chipsets list")
Signed-off-by: Jason Nader <dev@kayoway.com>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 drivers/ata/ahci.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 6548f10e61d9..07d66d2c5f0d 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -429,7 +429,6 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, 0x02d7), board_ahci_pcs_quirk }, /* Comet Lake PCH RAID */
 	/* Elkhart Lake IDs 0x4b60 & 0x4b62 https://sata-io.org/product/8803 not tested yet */
 	{ PCI_VDEVICE(INTEL, 0x4b63), board_ahci_pcs_quirk }, /* Elkhart Lake AHCI */
-	{ PCI_VDEVICE(INTEL, 0x7ae2), board_ahci_pcs_quirk }, /* Alder Lake-P AHCI */
 
 	/* JMicron 360/1/3/5/6, match class to avoid IDE function */
 	{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
-- 
cgit v1.2.3


From e8021b94b0412c37bcc79027c2e382086b6ce449 Mon Sep 17 00:00:00 2001
From: Roded Zats <rzats@paloaltonetworks.com>
Date: Wed, 22 May 2024 10:30:44 +0300
Subject: enic: Validate length of nl attributes in enic_set_vf_port

enic_set_vf_port assumes that the nl attribute IFLA_PORT_PROFILE
is of length PORT_PROFILE_MAX and that the nl attributes
IFLA_PORT_INSTANCE_UUID, IFLA_PORT_HOST_UUID are of length PORT_UUID_MAX.
These attributes are validated (in the function do_setlink in rtnetlink.c)
using the nla_policy ifla_port_policy. The policy defines IFLA_PORT_PROFILE
as NLA_STRING, IFLA_PORT_INSTANCE_UUID as NLA_BINARY and
IFLA_PORT_HOST_UUID as NLA_STRING. That means that the length validation
using the policy is for the max size of the attributes and not on exact
size so the length of these attributes might be less than the sizes that
enic_set_vf_port expects. This might cause an out of bands
read access in the memcpys of the data of these
attributes in enic_set_vf_port.

Fixes: f8bd909183ac ("net: Add ndo_{set|get}_vf_port support for enic dynamic vnics")
Signed-off-by: Roded Zats <rzats@paloaltonetworks.com>
Link: https://lore.kernel.org/r/20240522073044.33519-1-rzats@paloaltonetworks.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/cisco/enic/enic_main.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index f604119efc80..5f26fc3ad655 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1117,18 +1117,30 @@ static int enic_set_vf_port(struct net_device *netdev, int vf,
 	pp->request = nla_get_u8(port[IFLA_PORT_REQUEST]);
 
 	if (port[IFLA_PORT_PROFILE]) {
+		if (nla_len(port[IFLA_PORT_PROFILE]) != PORT_PROFILE_MAX) {
+			memcpy(pp, &prev_pp, sizeof(*pp));
+			return -EINVAL;
+		}
 		pp->set |= ENIC_SET_NAME;
 		memcpy(pp->name, nla_data(port[IFLA_PORT_PROFILE]),
 			PORT_PROFILE_MAX);
 	}
 
 	if (port[IFLA_PORT_INSTANCE_UUID]) {
+		if (nla_len(port[IFLA_PORT_INSTANCE_UUID]) != PORT_UUID_MAX) {
+			memcpy(pp, &prev_pp, sizeof(*pp));
+			return -EINVAL;
+		}
 		pp->set |= ENIC_SET_INSTANCE;
 		memcpy(pp->instance_uuid,
 			nla_data(port[IFLA_PORT_INSTANCE_UUID]), PORT_UUID_MAX);
 	}
 
 	if (port[IFLA_PORT_HOST_UUID]) {
+		if (nla_len(port[IFLA_PORT_HOST_UUID]) != PORT_UUID_MAX) {
+			memcpy(pp, &prev_pp, sizeof(*pp));
+			return -EINVAL;
+		}
 		pp->set |= ENIC_SET_HOST;
 		memcpy(pp->host_uuid,
 			nla_data(port[IFLA_PORT_HOST_UUID]), PORT_UUID_MAX);
-- 
cgit v1.2.3


From a4edf675ba3357f60e2ee310acc15eb9cd5a8ae0 Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Fri, 17 May 2024 07:49:46 -0700
Subject: platform/x86: ISST: fix use-after-free in tpmi_sst_dev_remove()

In tpmi_sst_dev_remove(), tpmi_sst is dereferenced after being freed.
Fix this by reordering the kfree() post the dereference.

Fixes: 9d1d36268f3d ("platform/x86: ISST: Support partitioned systems")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://lore.kernel.org/r/20240517144946.289615-1-harshit.m.mogalapalli@oracle.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
index 7bac7841ff0a..7fa360073f6e 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c
@@ -1610,8 +1610,8 @@ void tpmi_sst_dev_remove(struct auxiliary_device *auxdev)
 	tpmi_sst->partition_mask_current &= ~BIT(plat_info->partition);
 	/* Free the package instance when the all partitions are removed */
 	if (!tpmi_sst->partition_mask_current) {
-		kfree(tpmi_sst);
 		isst_common.sst_inst[tpmi_sst->package_id] = NULL;
+		kfree(tpmi_sst);
 	}
 	mutex_unlock(&isst_tpmi_dev_lock);
 }
-- 
cgit v1.2.3


From 4d6ef1be2492a6789ba2b711933625cd72ced39d Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 21 May 2024 11:47:41 +0200
Subject: platform/x86: x86-android-tablets: Add "select LEDS_CLASS"

Since the x86-android-tablets now calls devm_led_classdev_register_ext()
it needs to select LEDS_CLASS as well as LEDS_CLASS' NEW_LEDS dependency.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202405182256.FsKBjIzG-lkp@intel.com/
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20240521094741.273397-1-hdegoede@redhat.com
---
 drivers/platform/x86/x86-android-tablets/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/platform/x86/x86-android-tablets/Kconfig b/drivers/platform/x86/x86-android-tablets/Kconfig
index 6603461d4273..b591419de80c 100644
--- a/drivers/platform/x86/x86-android-tablets/Kconfig
+++ b/drivers/platform/x86/x86-android-tablets/Kconfig
@@ -6,6 +6,8 @@
 config X86_ANDROID_TABLETS
 	tristate "X86 Android tablet support"
 	depends on I2C && SPI && SERIAL_DEV_BUS && ACPI && EFI && GPIOLIB && PMIC_OPREGION
+	select NEW_LEDS
+	select LEDS_CLASS
 	help
 	  X86 tablets which ship with Android as (part of) the factory image
 	  typically have various problems with their DSDTs. The factory kernels
-- 
cgit v1.2.3


From 5d059bf2b1c4d5779a4c09ec418e40eded44a187 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Wed, 22 May 2024 07:48:13 -0400
Subject: platform/x86: thinkpad_acpi: Select INPUT_SPARSEKMAP in Kconfig

Now that drivers/platform/x86/thinkpad_acpi.c uses
sparse_keymap_report_event(), it must select INPUT_SPARSEKMAP in its
Kconfig option otherwise the build fails with:

  ld: vmlinux.o: in function `tpacpi_input_send_key':
  thinkpad_acpi.c:(.text+0xd4d27f): undefined reference to `sparse_keymap_report_event'
  ld: vmlinux.o: in function `hotkey_init':
  thinkpad_acpi.c:(.init.text+0x66cb6): undefined reference to `sparse_keymap_setup'

Fixes: 42f7b965de9d ("platform/x86: thinkpad_acpi: Switch to using sparse-keymap helpers")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20240522074813.379b9fc2@gandalf.local.home
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 0ec952b5d03e..1953317541ea 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -515,6 +515,7 @@ config THINKPAD_ACPI
 	select NVRAM
 	select NEW_LEDS
 	select LEDS_CLASS
+	select INPUT_SPARSEKMAP
 	help
 	  This is a driver for the IBM and Lenovo ThinkPad laptops. It adds
 	  support for Fn-Fx key combinations, Bluetooth control, video
-- 
cgit v1.2.3


From 825fc49497957310e421454fe3fb8b8d8d8e2dd2 Mon Sep 17 00:00:00 2001
From: Martin Tůma <martin.tuma@digiteqautomotive.com>
Date: Tue, 21 May 2024 18:22:54 +0200
Subject: media: mgb4: Fix double debugfs remove
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes an error where debugfs_remove_recursive() is called first on a parent
directory and then again on a child which causes a kernel panic.

Signed-off-by: Martin Tůma <martin.tuma@digiteqautomotive.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Fixes: 0ab13674a9bd ("media: pci: mgb4: Added Digiteq Automotive MGB4 driver")
Cc: <stable@vger.kernel.org>
[hverkuil: added Fixes/Cc tags]
---
 drivers/media/pci/mgb4/mgb4_core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/media/pci/mgb4/mgb4_core.c b/drivers/media/pci/mgb4/mgb4_core.c
index 60498a5abebf..ab4f07e2e560 100644
--- a/drivers/media/pci/mgb4/mgb4_core.c
+++ b/drivers/media/pci/mgb4/mgb4_core.c
@@ -642,9 +642,6 @@ static void mgb4_remove(struct pci_dev *pdev)
 	struct mgb4_dev *mgbdev = pci_get_drvdata(pdev);
 	int i;
 
-#ifdef CONFIG_DEBUG_FS
-	debugfs_remove_recursive(mgbdev->debugfs);
-#endif
 #if IS_REACHABLE(CONFIG_HWMON)
 	hwmon_device_unregister(mgbdev->hwmon_dev);
 #endif
@@ -659,6 +656,10 @@ static void mgb4_remove(struct pci_dev *pdev)
 		if (mgbdev->vin[i])
 			mgb4_vin_free(mgbdev->vin[i]);
 
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove_recursive(mgbdev->debugfs);
+#endif
+
 	device_remove_groups(&mgbdev->pdev->dev, mgb4_pci_groups);
 	free_spi(mgbdev);
 	free_i2c(mgbdev);
-- 
cgit v1.2.3


From 0b178b02673998f5acca5a0365a8858ca45beedb Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 23 May 2024 16:36:01 +0200
Subject: platform/x86: touchscreen_dmi: Add support for setting touchscreen
 properties from cmdline

On x86/ACPI platforms touchscreens mostly just work without needing any
device/model specific configuration. But in some cases (mostly with Silead
and Goodix touchscreens) it is still necessary to manually specify various
touchscreen-properties on a per model basis.

touchscreen_dmi is a special place for DMI quirks for this, but it can be
challenging for users to figure out the right property values, especially
for Silead touchscreens where non of these can be read back from
the touchscreen-controller.

ATM users can only test touchscreen properties by editing touchscreen_dmi.c
and then building a completely new kernel which makes it unnecessary
difficult for users to test and submit properties when necessary for their
laptop / tablet model.

Add support for specifying properties on the kernel commandline to allow
users to easily figure out the right settings. See the added documentation
in kernel-parameters.txt for the commandline syntax.

Cc: Gregor Riepl <onitake@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20240523143601.47555-1-hdegoede@redhat.com
---
 Documentation/admin-guide/kernel-parameters.txt | 22 +++++++
 drivers/platform/x86/touchscreen_dmi.c          | 79 ++++++++++++++++++++++++-
 2 files changed, 98 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 500cfa776225..b600df82669d 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1921,6 +1921,28 @@
 				Format:
 				<bus_id>,<clkrate>
 
+	i2c_touchscreen_props= [HW,ACPI,X86]
+			Set device-properties for ACPI-enumerated I2C-attached
+			touchscreen, to e.g. fix coordinates of upside-down
+			mounted touchscreens. If you need this option please
+			submit a drivers/platform/x86/touchscreen_dmi.c patch
+			adding a DMI quirk for this.
+
+			Format:
+			<ACPI_HW_ID>:<prop_name>=<val>[:prop_name=val][:...]
+			Where <val> is one of:
+			Omit "=<val>" entirely	Set a boolean device-property
+			Unsigned number		Set a u32 device-property
+			Anything else		Set a string device-property
+
+			Examples (split over multiple lines):
+			i2c_touchscreen_props=GDIX1001:touchscreen-inverted-x:
+			touchscreen-inverted-y
+
+			i2c_touchscreen_props=MSSL1680:touchscreen-size-x=1920:
+			touchscreen-size-y=1080:touchscreen-inverted-y:
+			firmware-name=gsl1680-vendor-model.fw:silead,home-button
+
 	i8042.debug	[HW] Toggle i8042 debug mode
 	i8042.unmask_kbd_data
 			[HW] Enable printing of interrupt data from the KBD port
diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index c6a10ec2c83f..b021fb9e579e 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -9,10 +9,13 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/ctype.h>
 #include <linux/device.h>
 #include <linux/dmi.h>
 #include <linux/efi_embedded_fw.h>
 #include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kstrtox.h>
 #include <linux/notifier.h>
 #include <linux/property.h>
 #include <linux/string.h>
@@ -1817,7 +1820,7 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 	{ }
 };
 
-static const struct ts_dmi_data *ts_data;
+static struct ts_dmi_data *ts_data;
 
 static void ts_dmi_add_props(struct i2c_client *client)
 {
@@ -1852,6 +1855,64 @@ static int ts_dmi_notifier_call(struct notifier_block *nb,
 	return 0;
 }
 
+#define MAX_CMDLINE_PROPS 16
+
+static struct property_entry ts_cmdline_props[MAX_CMDLINE_PROPS + 1];
+
+static struct ts_dmi_data ts_cmdline_data = {
+	.properties = ts_cmdline_props,
+};
+
+static int __init ts_parse_props(char *str)
+{
+	/* Save the original str to show it on syntax errors */
+	char orig_str[256];
+	char *name, *value;
+	u32 u32val;
+	int i, ret;
+
+	strscpy(orig_str, str, sizeof(orig_str));
+
+	/*
+	 * str is part of the static_command_line from init/main.c and poking
+	 * holes in that by writing 0 to it is allowed, as is taking long
+	 * lasting references to it.
+	 */
+	ts_cmdline_data.acpi_name = strsep(&str, ":");
+
+	for (i = 0; i < MAX_CMDLINE_PROPS; i++) {
+		name = strsep(&str, ":");
+		if (!name || !name[0])
+			break;
+
+		/* Replace '=' with 0 and make value point past '=' or NULL */
+		value = name;
+		strsep(&value, "=");
+		if (!value) {
+			ts_cmdline_props[i] = PROPERTY_ENTRY_BOOL(name);
+		} else if (isdigit(value[0])) {
+			ret = kstrtou32(value, 0, &u32val);
+			if (ret)
+				goto syntax_error;
+
+			ts_cmdline_props[i] = PROPERTY_ENTRY_U32(name, u32val);
+		} else {
+			ts_cmdline_props[i] = PROPERTY_ENTRY_STRING(name, value);
+		}
+	}
+
+	if (!i || str)
+		goto syntax_error;
+
+	ts_data = &ts_cmdline_data;
+	return 1;
+
+syntax_error:
+	pr_err("Invalid '%s' value for 'i2c_touchscreen_props='\n", orig_str);
+	return 1; /* "i2c_touchscreen_props=" is still a known parameter */
+}
+__setup("i2c_touchscreen_props=", ts_parse_props);
+
 static struct notifier_block ts_dmi_notifier = {
 	.notifier_call = ts_dmi_notifier_call,
 };
@@ -1859,13 +1920,25 @@ static struct notifier_block ts_dmi_notifier = {
 static int __init ts_dmi_init(void)
 {
 	const struct dmi_system_id *dmi_id;
+	struct ts_dmi_data *ts_data_dmi;
 	int error;
 
 	dmi_id = dmi_first_match(touchscreen_dmi_table);
-	if (!dmi_id)
+	ts_data_dmi = dmi_id ? dmi_id->driver_data : NULL;
+
+	if (ts_data) {
+		/*
+		 * Kernel cmdline provided data takes precedence, copy over
+		 * DMI efi_embedded_fw info if available.
+		 */
+		if (ts_data_dmi)
+			ts_data->embedded_fw = ts_data_dmi->embedded_fw;
+	} else if (ts_data_dmi) {
+		ts_data = ts_data_dmi;
+	} else {
 		return 0; /* Not an error */
+	}
 
-	ts_data = dmi_id->driver_data;
 	/* Some dmi table entries only provide an efi_embedded_fw_desc */
 	if (!ts_data->properties)
 		return 0;
-- 
cgit v1.2.3


From 7c8639aa41343fd7b3dbe09baf6b0791fcc407a1 Mon Sep 17 00:00:00 2001
From: hmtheboy154 <buingoc67@gmail.com>
Date: Mon, 27 May 2024 11:14:46 +0200
Subject: platform/x86: touchscreen_dmi: Add info for GlobalSpace SolT IVW
 11.6" tablet

This is a tablet created by GlobalSpace Technologies Limited
which uses an Intel Atom x5-Z8300, 4GB of RAM & 64GB of storage.

Link: https://web.archive.org/web/20171102141952/http://globalspace.in/11.6-device.html
Signed-off-by: hmtheboy154 <buingoc67@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20240527091447.248849-2-hdegoede@redhat.com
---
 drivers/platform/x86/touchscreen_dmi.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index b021fb9e579e..6c03e7daadd4 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -900,6 +900,22 @@ static const struct ts_dmi_data schneider_sct101ctm_data = {
 	.properties	= schneider_sct101ctm_props,
 };
 
+static const struct property_entry globalspace_solt_ivw116_props[] = {
+	PROPERTY_ENTRY_U32("touchscreen-min-x", 7),
+	PROPERTY_ENTRY_U32("touchscreen-min-y", 22),
+	PROPERTY_ENTRY_U32("touchscreen-size-x", 1723),
+	PROPERTY_ENTRY_U32("touchscreen-size-y", 1077),
+	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-globalspace-solt-ivw116.fw"),
+	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+	PROPERTY_ENTRY_BOOL("silead,home-button"),
+	{ }
+};
+
+static const struct ts_dmi_data globalspace_solt_ivw116_data = {
+	.acpi_name	= "MSSL1680:00",
+	.properties	= globalspace_solt_ivw116_props,
+};
+
 static const struct property_entry techbite_arc_11_6_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-min-x", 5),
 	PROPERTY_ENTRY_U32("touchscreen-min-y", 7),
@@ -1627,6 +1643,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "SCT101CTM"),
 		},
 	},
+	{
+		/* GlobalSpace SoLT IVW 11.6" */
+		.driver_data = (void *)&globalspace_solt_ivw116_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Globalspace Tech Pvt Ltd"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "SolTIVW"),
+			DMI_MATCH(DMI_PRODUCT_SKU, "PN20170413488"),
+		},
+	},
 	{
 		/* Techbite Arc 11.6 */
 		.driver_data = (void *)&techbite_arc_11_6_data,
-- 
cgit v1.2.3


From 3050052613790e75b5e4a8536930426b0a8b0774 Mon Sep 17 00:00:00 2001
From: hmtheboy154 <buingoc67@gmail.com>
Date: Mon, 27 May 2024 11:14:47 +0200
Subject: platform/x86: touchscreen_dmi: Add info for the EZpad 6s Pro

The "EZpad 6s Pro" uses the same touchscreen as the "EZpad 6 Pro B",
unlike the "Ezpad 6 Pro" which has its own touchscreen.

Signed-off-by: hmtheboy154 <buingoc67@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20240527091447.248849-3-hdegoede@redhat.com
---
 drivers/platform/x86/touchscreen_dmi.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 6c03e7daadd4..2d9ca2292ea1 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -1404,6 +1404,17 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 			DMI_MATCH(DMI_BIOS_DATE, "04/24/2018"),
 		},
 	},
+	{
+		/* Jumper EZpad 6s Pro */
+		.driver_data = (void *)&jumper_ezpad_6_pro_b_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Jumper"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Ezpad"),
+			/* Above matches are too generic, add bios match */
+			DMI_MATCH(DMI_BIOS_VERSION, "E.WSA116_8.E1.042.bin"),
+			DMI_MATCH(DMI_BIOS_DATE, "01/08/2020"),
+		},
+	},
 	{
 		/* Jumper EZpad 6 m4 */
 		.driver_data = (void *)&jumper_ezpad_6_m4_data,
-- 
cgit v1.2.3


From 21a22ed618d072a47597e63ee591973c18524880 Mon Sep 17 00:00:00 2001
From: Geliang Tang <tanggeliang@kylinos.cn>
Date: Wed, 22 May 2024 18:45:04 +0800
Subject: selftests: hsr: Fix "File exists" errors for hsr_ping

The hsr_ping test reports the following errors:

 INFO: preparing interfaces for HSRv0.
 INFO: Initial validation ping.
 INFO: Longer ping test.
 INFO: Cutting one link.
 INFO: Delay the link and drop a few packages.
 INFO: All good.
 INFO: preparing interfaces for HSRv1.
 RTNETLINK answers: File exists
 RTNETLINK answers: File exists
 RTNETLINK answers: File exists
 RTNETLINK answers: File exists
 RTNETLINK answers: File exists
 RTNETLINK answers: File exists
 Error: ipv4: Address already assigned.
 Error: ipv6: address already assigned.
 Error: ipv4: Address already assigned.
 Error: ipv6: address already assigned.
 Error: ipv4: Address already assigned.
 Error: ipv6: address already assigned.
 INFO: Initial validation ping.

That is because the cleanup code for the 2nd round test before
"setup_hsr_interfaces 1" is removed incorrectly in commit 680fda4f6714
("test: hsr: Remove script code already implemented in lib.sh").

This patch fixes it by re-setup the namespaces using

	setup_ns ns1 ns2 ns3

command before "setup_hsr_interfaces 1". It deletes previous namespaces
and create new ones.

Fixes: 680fda4f6714 ("test: hsr: Remove script code already implemented in lib.sh")
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
Link: https://lore.kernel.org/r/6485d3005f467758d49f0f313c8c009759ba6b05.1716374462.git.tanggeliang@kylinos.cn
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/hsr/hsr_ping.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
index 790294c8af83..3684b813b0f6 100755
--- a/tools/testing/selftests/net/hsr/hsr_ping.sh
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -174,6 +174,8 @@ trap cleanup_all_ns EXIT
 setup_hsr_interfaces 0
 do_complete_ping_test
 
+setup_ns ns1 ns2 ns3
+
 setup_hsr_interfaces 1
 do_complete_ping_test
 
-- 
cgit v1.2.3


From 97e1db06c7bb948da10ba85acad8030b56886593 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 23 May 2024 00:40:02 +0900
Subject: af_unix: Annotate data-race around unix_sk(sk)->addr.

Once unix_sk(sk)->addr is assigned under net->unx.table.locks and
unix_sk(sk)->bindlock, *(unix_sk(sk)->addr) and unix_sk(sk)->path are
fully set up, and unix_sk(sk)->addr is never changed.

unix_getname() and unix_copy_addr() access the two fields locklessly,
and commit ae3b564179bf ("missing barriers in some of unix_sock ->addr
and ->path accesses") added smp_store_release() and smp_load_acquire()
pairs.

In other functions, we still read unix_sk(sk)->addr locklessly to check
if the socket is bound, and KCSAN complains about it.  [0]

Given these functions have no dependency for *(unix_sk(sk)->addr) and
unix_sk(sk)->path, READ_ONCE() is enough to annotate the data-race.

Note that it is safe to access unix_sk(sk)->addr locklessly if the socket
is found in the hash table.  For example, the lockless read of otheru->addr
in unix_stream_connect() is safe.

Note also that newu->addr there is of the child socket that is still not
accessible from userspace, and smp_store_release() publishes the address
in case the socket is accept()ed and unix_getname() / unix_copy_addr()
is called.

[0]:
BUG: KCSAN: data-race in unix_bind / unix_listen

write (marked) to 0xffff88805f8d1840 of 8 bytes by task 13723 on cpu 0:
 __unix_set_addr_hash net/unix/af_unix.c:329 [inline]
 unix_bind_bsd net/unix/af_unix.c:1241 [inline]
 unix_bind+0x881/0x1000 net/unix/af_unix.c:1319
 __sys_bind+0x194/0x1e0 net/socket.c:1847
 __do_sys_bind net/socket.c:1858 [inline]
 __se_sys_bind net/socket.c:1856 [inline]
 __x64_sys_bind+0x40/0x50 net/socket.c:1856
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x46/0x4e

read to 0xffff88805f8d1840 of 8 bytes by task 13724 on cpu 1:
 unix_listen+0x72/0x180 net/unix/af_unix.c:734
 __sys_listen+0xdc/0x160 net/socket.c:1881
 __do_sys_listen net/socket.c:1890 [inline]
 __se_sys_listen net/socket.c:1888 [inline]
 __x64_sys_listen+0x2e/0x40 net/socket.c:1888
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x46/0x4e

value changed: 0x0000000000000000 -> 0xffff88807b5b1b40

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 13724 Comm: syz-executor.4 Not tainted 6.8.0-12822-gcd51db110a7e #12
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: syzkaller <syzkaller@googlegroups.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20240522154002.77857-1-kuniyu@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e4af6616e1df..fe631212a345 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -731,7 +731,7 @@ static int unix_listen(struct socket *sock, int backlog)
 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 		goto out;	/* Only stream/seqpacket sockets accept */
 	err = -EINVAL;
-	if (!u->addr)
+	if (!READ_ONCE(u->addr))
 		goto out;	/* No listens on an unbound socket */
 	unix_state_lock(sk);
 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
@@ -1369,7 +1369,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 
 		if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
 		     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
-		    !unix_sk(sk)->addr) {
+		    !READ_ONCE(unix_sk(sk)->addr)) {
 			err = unix_autobind(sk);
 			if (err)
 				goto out;
@@ -1481,7 +1481,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 		goto out;
 
 	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
-	     test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
+	     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
+	    !READ_ONCE(u->addr)) {
 		err = unix_autobind(sk);
 		if (err)
 			goto out;
@@ -1950,7 +1951,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	}
 
 	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
-	     test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
+	     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
+	    !READ_ONCE(u->addr)) {
 		err = unix_autobind(sk);
 		if (err)
 			goto out;
-- 
cgit v1.2.3


From 51d1b25a720982324871338b1a36b197ec9bd6f0 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Thu, 23 May 2024 00:42:18 +0900
Subject: af_unix: Read sk->sk_hash under bindlock during bind().

syzkaller reported data-race of sk->sk_hash in unix_autobind() [0],
and the same ones exist in unix_bind_bsd() and unix_bind_abstract().

The three bind() functions prefetch sk->sk_hash locklessly and
use it later after validating that unix_sk(sk)->addr is NULL under
unix_sk(sk)->bindlock.

The prefetched sk->sk_hash is the hash value of unbound socket set
in unix_create1() and does not change until bind() completes.

There could be a chance that sk->sk_hash changes after the lockless
read.  However, in such a case, non-NULL unix_sk(sk)->addr is visible
under unix_sk(sk)->bindlock, and bind() returns -EINVAL without using
the prefetched value.

The KCSAN splat is false-positive, but let's silence it by reading
sk->sk_hash under unix_sk(sk)->bindlock.

[0]:
BUG: KCSAN: data-race in unix_autobind / unix_autobind

write to 0xffff888034a9fb88 of 4 bytes by task 4468 on cpu 0:
 __unix_set_addr_hash net/unix/af_unix.c:331 [inline]
 unix_autobind+0x47a/0x7d0 net/unix/af_unix.c:1185
 unix_dgram_connect+0x7e3/0x890 net/unix/af_unix.c:1373
 __sys_connect_file+0xd7/0xe0 net/socket.c:2048
 __sys_connect+0x114/0x140 net/socket.c:2065
 __do_sys_connect net/socket.c:2075 [inline]
 __se_sys_connect net/socket.c:2072 [inline]
 __x64_sys_connect+0x40/0x50 net/socket.c:2072
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x46/0x4e

read to 0xffff888034a9fb88 of 4 bytes by task 4465 on cpu 1:
 unix_autobind+0x28/0x7d0 net/unix/af_unix.c:1134
 unix_dgram_connect+0x7e3/0x890 net/unix/af_unix.c:1373
 __sys_connect_file+0xd7/0xe0 net/socket.c:2048
 __sys_connect+0x114/0x140 net/socket.c:2065
 __do_sys_connect net/socket.c:2075 [inline]
 __se_sys_connect net/socket.c:2072 [inline]
 __x64_sys_connect+0x40/0x50 net/socket.c:2072
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0x4f/0x110 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x46/0x4e

value changed: 0x000000e4 -> 0x000001e3

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 4465 Comm: syz-executor.0 Not tainted 6.8.0-12822-gcd51db110a7e #12
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014

Fixes: afd20b9290e1 ("af_unix: Replace the big lock with small locks.")
Reported-by: syzkaller <syzkaller@googlegroups.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20240522154218.78088-1-kuniyu@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index fe631212a345..25b49efc0926 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1131,8 +1131,8 @@ static struct sock *unix_find_other(struct net *net,
 
 static int unix_autobind(struct sock *sk)
 {
-	unsigned int new_hash, old_hash = sk->sk_hash;
 	struct unix_sock *u = unix_sk(sk);
+	unsigned int new_hash, old_hash;
 	struct net *net = sock_net(sk);
 	struct unix_address *addr;
 	u32 lastnum, ordernum;
@@ -1155,6 +1155,7 @@ static int unix_autobind(struct sock *sk)
 	addr->name->sun_family = AF_UNIX;
 	refcount_set(&addr->refcnt, 1);
 
+	old_hash = sk->sk_hash;
 	ordernum = get_random_u32();
 	lastnum = ordernum & 0xFFFFF;
 retry:
@@ -1195,8 +1196,8 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
 {
 	umode_t mode = S_IFSOCK |
 	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
-	unsigned int new_hash, old_hash = sk->sk_hash;
 	struct unix_sock *u = unix_sk(sk);
+	unsigned int new_hash, old_hash;
 	struct net *net = sock_net(sk);
 	struct mnt_idmap *idmap;
 	struct unix_address *addr;
@@ -1234,6 +1235,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
 	if (u->addr)
 		goto out_unlock;
 
+	old_hash = sk->sk_hash;
 	new_hash = unix_bsd_hash(d_backing_inode(dentry));
 	unix_table_double_lock(net, old_hash, new_hash);
 	u->path.mnt = mntget(parent.mnt);
@@ -1261,8 +1263,8 @@ out:
 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
 			      int addr_len)
 {
-	unsigned int new_hash, old_hash = sk->sk_hash;
 	struct unix_sock *u = unix_sk(sk);
+	unsigned int new_hash, old_hash;
 	struct net *net = sock_net(sk);
 	struct unix_address *addr;
 	int err;
@@ -1280,6 +1282,7 @@ static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
 		goto out_mutex;
 	}
 
+	old_hash = sk->sk_hash;
 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
 	unix_table_double_lock(net, old_hash, new_hash);
 
-- 
cgit v1.2.3


From 1684842147677a1279bcff95f8adb6de9a656e30 Mon Sep 17 00:00:00 2001
From: Hariprasad Kelam <hkelam@marvell.com>
Date: Thu, 23 May 2024 13:06:26 +0530
Subject: Octeontx2-pf: Free send queue buffers incase of leaf to inner

There are two type of classes. "Leaf classes" that are  the
bottom of the class hierarchy. "Inner classes" that are neither
the root class nor leaf classes. QoS rules can only specify leaf
classes as targets for traffic.

			 Root
		        /  \
		       /    \
                      1      2
                             /\
                            /  \
                           4    5
               classes 1,4 and 5 are leaf classes.
               class 2 is a inner class.

When a leaf class made as inner, or vice versa, resources associated
with send queue (send queue buffers and transmit schedulers) are not
getting freed.

Fixes: 5e6808b4c68d ("octeontx2-pf: Add support for HTB offload")
Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
Link: https://lore.kernel.org/r/20240523073626.4114-1-hkelam@marvell.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/marvell/octeontx2/nic/qos.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
index 070711df612e..edac008099c0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c
@@ -1422,7 +1422,10 @@ static int otx2_qos_leaf_to_inner(struct otx2_nic *pfvf, u16 classid,
 	otx2_qos_read_txschq_cfg(pfvf, node, old_cfg);
 
 	/* delete the txschq nodes allocated for this node */
+	otx2_qos_disable_sq(pfvf, qid);
+	otx2_qos_free_hw_node_schq(pfvf, node);
 	otx2_qos_free_sw_node_schq(pfvf, node);
+	pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ;
 
 	/* mark this node as htb inner node */
 	WRITE_ONCE(node->qid, OTX2_QOS_QID_INNER);
@@ -1632,6 +1635,7 @@ static int otx2_qos_leaf_del_last(struct otx2_nic *pfvf, u16 classid, bool force
 		dwrr_del_node = true;
 
 	/* destroy the leaf node */
+	otx2_qos_disable_sq(pfvf, qid);
 	otx2_qos_destroy_node(pfvf, node);
 	pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ;
 
-- 
cgit v1.2.3


From d7ba701da636afae17d8e1243b3d12eed149abcb Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Thu, 2 May 2024 10:08:25 +0000
Subject: xfs: Clear W=1 warning in xfs_iwalk_run_callbacks()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For CONFIG_XFS_DEBUG unset, xfs_iwalk_run_callbacks() generates the
following warning for when building with W=1:

fs/xfs/xfs_iwalk.c: In function ‘xfs_iwalk_run_callbacks’:
fs/xfs/xfs_iwalk.c:354:42: error: variable ‘irec’ set but not used [-Werror=unused-but-set-variable]
  354 |         struct xfs_inobt_rec_incore     *irec;
      |                                          ^~~~
cc1: all warnings being treated as errors

Drop @irec, as it is only an intermediate variable.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
---
 fs/xfs/xfs_iwalk.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index 730c8d48da28..86f14ec7c31f 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -351,7 +351,6 @@ xfs_iwalk_run_callbacks(
 	int				*has_more)
 {
 	struct xfs_mount		*mp = iwag->mp;
-	struct xfs_inobt_rec_incore	*irec;
 	xfs_agino_t			next_agino;
 	int				error;
 
@@ -361,8 +360,8 @@ xfs_iwalk_run_callbacks(
 
 	/* Delete cursor but remember the last record we cached... */
 	xfs_iwalk_del_inobt(iwag->tp, curpp, agi_bpp, 0);
-	irec = &iwag->recs[iwag->nr_recs - 1];
-	ASSERT(next_agino >= irec->ir_startino + XFS_INODES_PER_CHUNK);
+	ASSERT(next_agino >= iwag->recs[iwag->nr_recs - 1].ir_startino +
+			XFS_INODES_PER_CHUNK);
 
 	if (iwag->drop_trans) {
 		xfs_trans_cancel(iwag->tp);
-- 
cgit v1.2.3


From b33874fb7f28326380562f208d948bab785fbd6f Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Thu, 2 May 2024 10:08:26 +0000
Subject: xfs: Stop using __maybe_unused in xfs_alloc.c

In both xfs_alloc_cur_finish() and xfs_alloc_ag_vextent_exact(), local
variable @afg is tagged as __maybe_unused. Otherwise an unused variable
warning would be generated for when building with W=1 and CONFIG_XFS_DEBUG
unset. In both cases, the variable is unused as it is only referenced in
an ASSERT() call, which is compiled out (in this config).

It is generally a poor programming style to use __maybe_unused for
variables.

The ASSERT() call is to verify that agbno of the end of the extent is
within bounds for both functions. @afg is used as an intermediate variable
to find the AG length.

However xfs_verify_agbext() already exists to verify a valid extent range.
The arguments for calling xfs_verify_agbext() are already available, so use
that instead.

An advantage of using xfs_verify_agbext() is that it verifies that both the
start and the end of the extent are within the bounds of the AG and
catches overflows.

Suggested-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: John Garry <john.g.garry@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
---
 fs/xfs/libxfs/xfs_alloc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 6cb8b2ddc541..6c55a6e88eba 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1008,13 +1008,12 @@ xfs_alloc_cur_finish(
 	struct xfs_alloc_arg	*args,
 	struct xfs_alloc_cur	*acur)
 {
-	struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
 	int			error;
 
 	ASSERT(acur->cnt && acur->bnolt);
 	ASSERT(acur->bno >= acur->rec_bno);
 	ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
-	ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
+	ASSERT(xfs_verify_agbext(args->pag, acur->rec_bno, acur->rec_len));
 
 	error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
 				      acur->rec_len, acur->bno, acur->len, 0);
@@ -1217,7 +1216,6 @@ STATIC int			/* error */
 xfs_alloc_ag_vextent_exact(
 	xfs_alloc_arg_t	*args)	/* allocation argument structure */
 {
-	struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
 	struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
 	struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
 	int		error;
@@ -1297,7 +1295,7 @@ xfs_alloc_ag_vextent_exact(
 	 */
 	cnt_cur = xfs_cntbt_init_cursor(args->mp, args->tp, args->agbp,
 					args->pag);
-	ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
+	ASSERT(xfs_verify_agbext(args->pag, args->agbno, args->len));
 	error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
 				      args->len, XFSA_FIXUP_BNO_OK);
 	if (error) {
-- 
cgit v1.2.3


From 2b3f004d3d518ec7a392066d935fd85c81412e33 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Tue, 21 May 2024 23:01:45 -0700
Subject: xfs: drop xfarray sortinfo folio on error

Chandan Babu reports the following livelock in xfs/708:

 run fstests xfs/708 at 2024-05-04 15:35:29
 XFS (loop16): EXPERIMENTAL online scrub feature in use. Use at your own risk!
 XFS (loop5): Mounting V5 Filesystem e96086f0-a2f9-4424-a1d5-c75d53d823be
 XFS (loop5): Ending clean mount
 XFS (loop5): Quotacheck needed: Please wait.
 XFS (loop5): Quotacheck: Done.
 XFS (loop5): EXPERIMENTAL online scrub feature in use. Use at your own risk!
 INFO: task xfs_io:143725 blocked for more than 122 seconds.
       Not tainted 6.9.0-rc4+ #1
 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 task:xfs_io          state:D stack:0     pid:143725 tgid:143725 ppid:117661 flags:0x00004006
 Call Trace:
  <TASK>
  __schedule+0x69c/0x17a0
  schedule+0x74/0x1b0
  io_schedule+0xc4/0x140
  folio_wait_bit_common+0x254/0x650
  shmem_undo_range+0x9d5/0xb40
  shmem_evict_inode+0x322/0x8f0
  evict+0x24e/0x560
  __dentry_kill+0x17d/0x4d0
  dput+0x263/0x430
  __fput+0x2fc/0xaa0
  task_work_run+0x132/0x210
  get_signal+0x1a8/0x1910
  arch_do_signal_or_restart+0x7b/0x2f0
  syscall_exit_to_user_mode+0x1c2/0x200
  do_syscall_64+0x72/0x170
  entry_SYSCALL_64_after_hwframe+0x76/0x7e

The shmem code is trying to drop all the folios attached to a shmem
file and gets stuck on a locked folio after a bnobt repair.  It looks
like the process has a signal pending, so I started looking for places
where we lock an xfile folio and then deal with a fatal signal.

I found a bug in xfarray_sort_scan via code inspection.  This function
is called to set up the scanning phase of a quicksort operation, which
may involve grabbing a locked xfile folio.  If we exit the function with
an error code, the caller does not call xfarray_sort_scan_done to put
the xfile folio.  If _sort_scan returns an error code while si->folio is
set, we leak the reference and never unlock the folio.

Therefore, change xfarray_sort to call _scan_done on exit.  This is safe
to call multiple times because it sets si->folio to NULL and ignores a
NULL si->folio.  Also change _sort_scan to use an intermediate variable
so that we never pollute si->folio with an errptr.

Fixes: 232ea052775f9 ("xfs: enable sorting of xfile-backed arrays")
Reported-by: Chandan Babu R <chandanbabu@kernel.org>
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
---
 fs/xfs/scrub/xfarray.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/scrub/xfarray.c b/fs/xfs/scrub/xfarray.c
index 9185ae7088d4..cdd13ed9c569 100644
--- a/fs/xfs/scrub/xfarray.c
+++ b/fs/xfs/scrub/xfarray.c
@@ -822,12 +822,14 @@ xfarray_sort_scan(
 
 	/* Grab the first folio that backs this array element. */
 	if (!si->folio) {
+		struct folio	*folio;
 		loff_t		next_pos;
 
-		si->folio = xfile_get_folio(si->array->xfile, idx_pos,
+		folio = xfile_get_folio(si->array->xfile, idx_pos,
 				si->array->obj_size, XFILE_ALLOC);
-		if (IS_ERR(si->folio))
-			return PTR_ERR(si->folio);
+		if (IS_ERR(folio))
+			return PTR_ERR(folio);
+		si->folio = folio;
 
 		si->first_folio_idx = xfarray_idx(si->array,
 				folio_pos(si->folio) + si->array->obj_size - 1);
@@ -1048,6 +1050,7 @@ xfarray_sort(
 
 out_free:
 	trace_xfarray_sort_stats(si, error);
+	xfarray_sort_scan_done(si);
 	kvfree(si);
 	return error;
 }
-- 
cgit v1.2.3


From 97835e6866796874571646a1a8ff44f24c0b39f7 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Tue, 21 May 2024 23:02:01 -0700
Subject: xfs: fix xfs_init_attr_trans not handling explicit operation codes

When we were converting the attr code to use an explicit operation code
instead of keying off of attr->value being null, we forgot to change the
code that initializes the transaction reservation.  Split the function
into two helpers that handle the !remove and remove cases, then fix both
callsites to handle this correctly.

Fixes: c27411d4c640 ("xfs: make attr removal an explicit operation")
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
---
 fs/xfs/libxfs/xfs_attr.c | 40 +++++++++++++++++++---------------------
 fs/xfs/libxfs/xfs_attr.h |  3 +--
 fs/xfs/xfs_attr_item.c   | 17 +++++++++++++++--
 3 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 430cd3244c14..f30bcc64100d 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -329,26 +329,20 @@ xfs_attr_calc_size(
 	return nblks;
 }
 
-/* Initialize transaction reservation for attr operations */
-void
-xfs_init_attr_trans(
-	struct xfs_da_args	*args,
-	struct xfs_trans_res	*tres,
-	unsigned int		*total)
+/* Initialize transaction reservation for an xattr set/replace/upsert */
+inline struct xfs_trans_res
+xfs_attr_set_resv(
+	const struct xfs_da_args	*args)
 {
-	struct xfs_mount	*mp = args->dp->i_mount;
-
-	if (args->value) {
-		tres->tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
-				 M_RES(mp)->tr_attrsetrt.tr_logres *
-				 args->total;
-		tres->tr_logcount = XFS_ATTRSET_LOG_COUNT;
-		tres->tr_logflags = XFS_TRANS_PERM_LOG_RES;
-		*total = args->total;
-	} else {
-		*tres = M_RES(mp)->tr_attrrm;
-		*total = XFS_ATTRRM_SPACE_RES(mp);
-	}
+	struct xfs_mount		*mp = args->dp->i_mount;
+	struct xfs_trans_res		ret = {
+		.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
+			    M_RES(mp)->tr_attrsetrt.tr_logres * args->total,
+		.tr_logcount		= XFS_ATTRSET_LOG_COUNT,
+		.tr_logflags		= XFS_TRANS_PERM_LOG_RES,
+	};
+
+	return ret;
 }
 
 /*
@@ -1006,7 +1000,7 @@ xfs_attr_set(
 	struct xfs_trans_res	tres;
 	int			error, local;
 	int			rmt_blks = 0;
-	unsigned int		total;
+	unsigned int		total = 0;
 
 	ASSERT(!args->trans);
 
@@ -1033,10 +1027,15 @@ xfs_attr_set(
 
 		if (!local)
 			rmt_blks = xfs_attr3_rmt_blocks(mp, args->valuelen);
+
+		tres = xfs_attr_set_resv(args);
+		total = args->total;
 		break;
 	case XFS_ATTRUPDATE_REMOVE:
 		XFS_STATS_INC(mp, xs_attr_remove);
 		rmt_blks = xfs_attr3_max_rmt_blocks(mp);
+		tres = M_RES(mp)->tr_attrrm;
+		total = XFS_ATTRRM_SPACE_RES(mp);
 		break;
 	}
 
@@ -1044,7 +1043,6 @@ xfs_attr_set(
 	 * Root fork attributes can use reserved data blocks for this
 	 * operation if necessary
 	 */
-	xfs_init_attr_trans(args, &tres, &total);
 	error = xfs_trans_alloc_inode(dp, &tres, total, 0, rsvd, &args->trans);
 	if (error)
 		return error;
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 088cb7b30168..0e51d0723f9a 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -565,8 +565,7 @@ bool xfs_attr_check_namespace(unsigned int attr_flags);
 bool xfs_attr_namecheck(unsigned int attr_flags, const void *name,
 		size_t length);
 int xfs_attr_calc_size(struct xfs_da_args *args, int *local);
-void xfs_init_attr_trans(struct xfs_da_args *args, struct xfs_trans_res *tres,
-			 unsigned int *total);
+struct xfs_trans_res xfs_attr_set_resv(const struct xfs_da_args *args);
 
 /*
  * Check to see if the attr should be upgraded from non-existent or shortform to
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index 2b10ac4c5fce..f683b7a9323f 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -746,7 +746,7 @@ xfs_attr_recover_work(
 	struct xfs_attri_log_format	*attrp;
 	struct xfs_attri_log_nameval	*nv = attrip->attri_nameval;
 	int				error;
-	int				total;
+	unsigned int			total = 0;
 
 	/*
 	 * First check the validity of the attr described by the ATTRI.  If any
@@ -763,7 +763,20 @@ xfs_attr_recover_work(
 		return PTR_ERR(attr);
 	args = attr->xattri_da_args;
 
-	xfs_init_attr_trans(args, &resv, &total);
+	switch (xfs_attr_intent_op(attr)) {
+	case XFS_ATTRI_OP_FLAGS_PPTR_SET:
+	case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE:
+	case XFS_ATTRI_OP_FLAGS_SET:
+	case XFS_ATTRI_OP_FLAGS_REPLACE:
+		resv = xfs_attr_set_resv(args);
+		total = args->total;
+		break;
+	case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE:
+	case XFS_ATTRI_OP_FLAGS_REMOVE:
+		resv = M_RES(mp)->tr_attrrm;
+		total = XFS_ATTRRM_SPACE_RES(mp);
+		break;
+	}
 	resv = xlog_recover_resv(&resv);
 	error = xfs_trans_alloc(mp, &resv, total, 0, XFS_TRANS_RESERVE, &tp);
 	if (error)
-- 
cgit v1.2.3


From 38de567906d95c397d87f292b892686b7ec6fbc3 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Tue, 21 May 2024 23:02:16 -0700
Subject: xfs: allow symlinks with short remote targets

An internal user complained about log recovery failing on a symlink
("Bad dinode after recovery") with the following (excerpted) format:

core.magic = 0x494e
core.mode = 0120777
core.version = 3
core.format = 2 (extents)
core.nlinkv2 = 1
core.nextents = 1
core.size = 297
core.nblocks = 1
core.naextents = 0
core.forkoff = 0
core.aformat = 2 (extents)
u3.bmx[0] = [startoff,startblock,blockcount,extentflag]
0:[0,12,1,0]

This is a symbolic link with a 297-byte target stored in a disk block,
which is to say this is a symlink with a remote target.  The forkoff is
0, which is to say that there's 512 - 176 == 336 bytes in the inode core
to store the data fork.

Eventually, testing of generic/388 failed with the same inode corruption
message during inode recovery.  In writing a debugging patch to call
xfs_dinode_verify on dirty inode log items when we're committing
transactions, I observed that xfs/298 can reproduce the problem quite
quickly.

xfs/298 creates a symbolic link, adds some extended attributes, then
deletes them all.  The test failure occurs when the final removexattr
also deletes the attr fork because that does not convert the remote
symlink back into a shortform symlink.  That is how we trip this test.
The only reason why xfs/298 only triggers with the debug patch added is
that it deletes the symlink, so the final iflush shows the inode as
free.

I wrote a quick fstest to emulate the behavior of xfs/298, except that
it leaves the symlinks on the filesystem after inducing the "corrupt"
state.  Kernels going back at least as far as 4.18 have written out
symlink inodes in this manner and prior to 1eb70f54c445f they did not
object to reading them back in.

Because we've been writing out inodes this way for quite some time, the
only way to fix this is to relax the check for symbolic links.
Directories don't have this problem because di_size is bumped to
blocksize during the sf->data conversion.

Fixes: 1eb70f54c445f ("xfs: validate inode fork size against fork format")
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
---
 fs/xfs/libxfs/xfs_inode_buf.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index d79002343d0b..e7a7bfbe75b4 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -374,17 +374,37 @@ xfs_dinode_verify_fork(
 	/*
 	 * For fork types that can contain local data, check that the fork
 	 * format matches the size of local data contained within the fork.
-	 *
-	 * For all types, check that when the size says the should be in extent
-	 * or btree format, the inode isn't claiming it is in local format.
 	 */
 	if (whichfork == XFS_DATA_FORK) {
-		if (S_ISDIR(mode) || S_ISLNK(mode)) {
+		/*
+		 * A directory small enough to fit in the inode must be stored
+		 * in local format.  The directory sf <-> extents conversion
+		 * code updates the directory size accordingly.
+		 */
+		if (S_ISDIR(mode)) {
+			if (be64_to_cpu(dip->di_size) <= fork_size &&
+			    fork_format != XFS_DINODE_FMT_LOCAL)
+				return __this_address;
+		}
+
+		/*
+		 * A symlink with a target small enough to fit in the inode can
+		 * be stored in extents format if xattrs were added (thus
+		 * converting the data fork from shortform to remote format)
+		 * and then removed.
+		 */
+		if (S_ISLNK(mode)) {
 			if (be64_to_cpu(dip->di_size) <= fork_size &&
+			    fork_format != XFS_DINODE_FMT_EXTENTS &&
 			    fork_format != XFS_DINODE_FMT_LOCAL)
 				return __this_address;
 		}
 
+		/*
+		 * For all types, check that when the size says the fork should
+		 * be in extent or btree format, the inode isn't claiming to be
+		 * in local format.
+		 */
 		if (be64_to_cpu(dip->di_size) > fork_size &&
 		    fork_format == XFS_DINODE_FMT_LOCAL)
 			return __this_address;
-- 
cgit v1.2.3


From 95b19e2f4e0f730c83910e7f5e4d62ec68c6d862 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Tue, 21 May 2024 23:02:32 -0700
Subject: xfs: don't open-code u64_to_user_ptr

Don't open-code what the kernel already provides.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
---
 fs/xfs/scrub/scrub.c | 2 +-
 fs/xfs/xfs_handle.c  | 7 +------
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index c013f0ba4f36..4cbcf7a86dbe 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -856,7 +856,7 @@ xfs_ioc_scrubv_metadata(
 	if (vec_bytes > PAGE_SIZE)
 		return -ENOMEM;
 
-	uvectors = (void __user *)(uintptr_t)head.svh_vectors;
+	uvectors = u64_to_user_ptr(head.svh_vectors);
 	vectors = memdup_user(uvectors, vec_bytes);
 	if (IS_ERR(vectors))
 		return PTR_ERR(vectors);
diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c
index c8785ed59543..a3f16e9b6fe5 100644
--- a/fs/xfs/xfs_handle.c
+++ b/fs/xfs/xfs_handle.c
@@ -773,11 +773,6 @@ xfs_getparents_expand_lastrec(
 	trace_xfs_getparents_expand_lastrec(gpx->ip, gp, &gpx->context, gpr);
 }
 
-static inline void __user *u64_to_uptr(u64 val)
-{
-	return (void __user *)(uintptr_t)val;
-}
-
 /* Retrieve the parent pointers for a given inode. */
 STATIC int
 xfs_getparents(
@@ -862,7 +857,7 @@ xfs_getparents(
 	ASSERT(gpx->context.firstu <= gpx->gph.gph_request.gp_bufsize);
 
 	/* Copy the records to userspace. */
-	if (copy_to_user(u64_to_uptr(gpx->gph.gph_request.gp_buffer),
+	if (copy_to_user(u64_to_user_ptr(gpx->gph.gph_request.gp_buffer),
 				gpx->krecords, gpx->context.firstu))
 		error = -EFAULT;
 
-- 
cgit v1.2.3


From 7ff6c798eca05e4a9dcb80163cb454d7787a4bc3 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 21 May 2024 15:46:32 -0700
Subject: ACPI: APEI: EINJ: Fix einj_dev release leak

The platform driver conversion of EINJ mistakenly used
platform_device_del() to unwind platform_device_register_full() at
module exit. This leads to a small leak of one 'struct platform_device'
instance per module load/unload cycle. Switch to
platform_device_unregister() which performs both device_del() and final
put_device().

Fixes: 5621fafaac00 ("EINJ: Migrate to a platform driver")
Cc: 6.9+ <stable@vger.kernel.org> # 6.9+
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Ben Cheatham <Benjamin.Cheatham@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/einj-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/apei/einj-core.c b/drivers/acpi/apei/einj-core.c
index 9515bcfe5e97..73903a497d73 100644
--- a/drivers/acpi/apei/einj-core.c
+++ b/drivers/acpi/apei/einj-core.c
@@ -909,7 +909,7 @@ static void __exit einj_exit(void)
 	if (einj_initialized)
 		platform_driver_unregister(&einj_driver);
 
-	platform_device_del(einj_dev);
+	platform_device_unregister(einj_dev);
 }
 
 module_init(einj_init);
-- 
cgit v1.2.3


From f6f172dc6a6d7775b2df6adfd1350700e9a847ec Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Wed, 22 May 2024 23:36:48 +0200
Subject: ACPI: EC: Abort address space access upon error

When a multi-byte address space access is requested, acpi_ec_read()/
acpi_ec_write() is being called multiple times.

Abort such operations if a single call to acpi_ec_read() /
acpi_ec_write() fails, as the data read from / written to the EC
might be incomplete.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index e7793ee9e649..a68dce2147a4 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1333,10 +1333,13 @@ acpi_ec_space_handler(u32 function, acpi_physical_address address,
 	if (ec->busy_polling || bits > 8)
 		acpi_ec_burst_enable(ec);
 
-	for (i = 0; i < bytes; ++i, ++address, ++value)
+	for (i = 0; i < bytes; ++i, ++address, ++value) {
 		result = (function == ACPI_READ) ?
 			acpi_ec_read(ec, address, value) :
 			acpi_ec_write(ec, address, *value);
+		if (result < 0)
+			break;
+	}
 
 	if (ec->busy_polling || bits > 8)
 		acpi_ec_burst_disable(ec);
-- 
cgit v1.2.3


From c4bd7f1d78340e63de4d073fd3dbe5391e2996e5 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Wed, 22 May 2024 23:36:49 +0200
Subject: ACPI: EC: Avoid returning AE_OK on errors in address space handler

If an error code other than EINVAL, ENODEV or ETIME is returned
by acpi_ec_read() / acpi_ec_write(), then AE_OK is incorrectly
returned by acpi_ec_space_handler().

Fix this by only returning AE_OK on success, and return AE_ERROR
otherwise.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index a68dce2147a4..68dd17f96f63 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1351,8 +1351,10 @@ acpi_ec_space_handler(u32 function, acpi_physical_address address,
 		return AE_NOT_FOUND;
 	case -ETIME:
 		return AE_TIME;
-	default:
+	case 0:
 		return AE_OK;
+	default:
+		return AE_ERROR;
 	}
 }
 
-- 
cgit v1.2.3


From 52a2f0608366a629d43dacd3191039c95fef74ba Mon Sep 17 00:00:00 2001
From: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
Date: Thu, 23 May 2024 14:23:14 +0530
Subject: net: usb: smsc95xx: fix changing LED_SEL bit value updated from
 EEPROM

LED Select (LED_SEL) bit in the LED General Purpose IO Configuration
register is used to determine the functionality of external LED pins
(Speed Indicator, Link and Activity Indicator, Full Duplex Link
Indicator). The default value for this bit is 0 when no EEPROM is
present. If a EEPROM is present, the default value is the value of the
LED Select bit in the Configuration Flags of the EEPROM. A USB Reset or
Lite Reset (LRST) will cause this bit to be restored to the image value
last loaded from EEPROM, or to be set to 0 if no EEPROM is present.

While configuring the dual purpose GPIO/LED pins to LED outputs in the
LED General Purpose IO Configuration register, the LED_SEL bit is changed
as 0 and resulting the configured value from the EEPROM is cleared. The
issue is fixed by using read-modify-write approach.

Fixes: f293501c61c5 ("smsc95xx: configure LED outputs")
Signed-off-by: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Woojung Huh <woojung.huh@microchip.com>
Link: https://lore.kernel.org/r/20240523085314.167650-1-Parthiban.Veerasooran@microchip.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/usb/smsc95xx.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index cbea24666479..8e82184be5e7 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -879,7 +879,7 @@ static int smsc95xx_start_rx_path(struct usbnet *dev)
 static int smsc95xx_reset(struct usbnet *dev)
 {
 	struct smsc95xx_priv *pdata = dev->driver_priv;
-	u32 read_buf, write_buf, burst_cap;
+	u32 read_buf, burst_cap;
 	int ret = 0, timeout;
 
 	netif_dbg(dev, ifup, dev->net, "entering smsc95xx_reset\n");
@@ -1003,10 +1003,13 @@ static int smsc95xx_reset(struct usbnet *dev)
 		return ret;
 	netif_dbg(dev, ifup, dev->net, "ID_REV = 0x%08x\n", read_buf);
 
+	ret = smsc95xx_read_reg(dev, LED_GPIO_CFG, &read_buf);
+	if (ret < 0)
+		return ret;
 	/* Configure GPIO pins as LED outputs */
-	write_buf = LED_GPIO_CFG_SPD_LED | LED_GPIO_CFG_LNK_LED |
-		LED_GPIO_CFG_FDX_LED;
-	ret = smsc95xx_write_reg(dev, LED_GPIO_CFG, write_buf);
+	read_buf |= LED_GPIO_CFG_SPD_LED | LED_GPIO_CFG_LNK_LED |
+		    LED_GPIO_CFG_FDX_LED;
+	ret = smsc95xx_write_reg(dev, LED_GPIO_CFG, read_buf);
 	if (ret < 0)
 		return ret;
 
-- 
cgit v1.2.3


From 9e69acc1de306b5243ca2ef8a54242e56d01b8e5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 23 May 2024 17:57:21 +0200
Subject: thermal/debugfs: Print initial trip temperature and hysteresis in
 tze_seq_show()

The temperature and hysteresis of a trip point may change during a
mitigation episode it is involved in (it may even become invalid
altogether), so in order to avoid possible confusion related to that,
store the temperature and hysteresis of trip points at the time they
are crossed on the way up and print those values instead of their
current temperature and hysteresis.

Fixes: 7ef01f228c9f ("thermal/debugfs: Add thermal debugfs information for mitigation episodes")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/thermal_debugfs.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c
index 91f9c21235a8..e0e4eb6fad39 100644
--- a/drivers/thermal/thermal_debugfs.c
+++ b/drivers/thermal/thermal_debugfs.c
@@ -91,6 +91,8 @@ struct cdev_record {
  *
  * @timestamp: the trip crossing timestamp
  * @duration: total time when the zone temperature was above the trip point
+ * @trip_temp: trip temperature at mitigation start
+ * @trip_hyst: trip hysteresis at mitigation start
  * @count: the number of times the zone temperature was above the trip point
  * @max: maximum recorded temperature above the trip point
  * @min: minimum recorded temperature above the trip point
@@ -99,6 +101,8 @@ struct cdev_record {
 struct trip_stats {
 	ktime_t timestamp;
 	ktime_t duration;
+	int trip_temp;
+	int trip_hyst;
 	int count;
 	int max;
 	int min;
@@ -574,6 +578,7 @@ void thermal_debug_tz_trip_up(struct thermal_zone_device *tz,
 	struct thermal_debugfs *thermal_dbg = tz->debugfs;
 	int trip_id = thermal_zone_trip_id(tz, trip);
 	ktime_t now = ktime_get();
+	struct trip_stats *trip_stats;
 
 	if (!thermal_dbg)
 		return;
@@ -639,7 +644,10 @@ void thermal_debug_tz_trip_up(struct thermal_zone_device *tz,
 	tz_dbg->trips_crossed[tz_dbg->nr_trips++] = trip_id;
 
 	tze = list_first_entry(&tz_dbg->tz_episodes, struct tz_episode, node);
-	tze->trip_stats[trip_id].timestamp = now;
+	trip_stats = &tze->trip_stats[trip_id];
+	trip_stats->trip_temp = trip->temperature;
+	trip_stats->trip_hyst = trip->hysteresis;
+	trip_stats->timestamp = now;
 
 unlock:
 	mutex_unlock(&thermal_dbg->lock);
@@ -836,8 +844,8 @@ static int tze_seq_show(struct seq_file *s, void *v)
 		seq_printf(s, "| %*d | %*s | %*d | %*d | %c%*lld | %*d | %*d | %*d |\n",
 			   4 , trip_id,
 			   8, type,
-			   9, trip->temperature,
-			   9, trip->hysteresis,
+			   9, trip_stats->trip_temp,
+			   9, trip_stats->trip_hyst,
 			   c, 10, duration_ms,
 			   9, trip_stats->avg,
 			   9, trip_stats->min,
-- 
cgit v1.2.3


From 5a599e10e53d1914adf0032c730bc0f604a5d947 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 23 May 2024 17:58:42 +0200
Subject: thermal/debugfs: Allow tze_seq_show() to print statistics for invalid
 trips

Commit a6258fde8de3 ("thermal/debugfs: Make tze_seq_show() skip invalid
trips and trips with no stats") modified tze_seq_show() to skip invalid
trips, but it overlooked the fact that a trip may become invalid during
a mitigation eposide involving it, in which case its statistics should
still be reported.

For this reason, remove the invalid trip temperature check from the
main loop in tze_seq_show().

The trips that have never been valid will still be skipped after this
change because there are no statistics to report for them.

Fixes: a6258fde8de3 ("thermal/debugfs: Make tze_seq_show() skip invalid trips and trips with no stats")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/thermal_debugfs.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c
index e0e4eb6fad39..942447229157 100644
--- a/drivers/thermal/thermal_debugfs.c
+++ b/drivers/thermal/thermal_debugfs.c
@@ -802,10 +802,6 @@ static int tze_seq_show(struct seq_file *s, void *v)
 		const struct thermal_trip *trip = &td->trip;
 		struct trip_stats *trip_stats;
 
-		/* Skip invalid trips. */
-		if (trip->temperature == THERMAL_TEMP_INVALID)
-			continue;
-
 		/*
 		 * There is no possible mitigation happening at the
 		 * critical trip point, so the stats will be always
-- 
cgit v1.2.3


From cb573eec609c44f44b27c2fa07afd926eddc9f89 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 23 May 2024 18:00:14 +0200
Subject: thermal: core: Introduce thermal_trip_crossed()

Add a helper function called thermal_trip_crossed() to be invoked by
__thermal_zone_device_update() in order to notify user space, the
thermal debug code and the zone governor about trip crossing.

Subsequently, this will also be used in the case when a trip point
becomes invalid after being crossed on the way up.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/thermal_core.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 54cce4e523bc..c119ef920793 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -467,6 +467,21 @@ static void thermal_governor_trip_crossed(struct thermal_governor *governor,
 		governor->trip_crossed(tz, trip, crossed_up);
 }
 
+static void thermal_trip_crossed(struct thermal_zone_device *tz,
+				 const struct thermal_trip *trip,
+				 struct thermal_governor *governor,
+				 bool crossed_up)
+{
+	if (crossed_up) {
+		thermal_notify_tz_trip_up(tz, trip);
+		thermal_debug_tz_trip_up(tz, trip);
+	} else {
+		thermal_notify_tz_trip_down(tz, trip);
+		thermal_debug_tz_trip_down(tz, trip);
+	}
+	thermal_governor_trip_crossed(governor, tz, trip, crossed_up);
+}
+
 static int thermal_trip_notify_cmp(void *ascending, const struct list_head *a,
 				   const struct list_head *b)
 {
@@ -506,18 +521,12 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
 		handle_thermal_trip(tz, td, &way_up_list, &way_down_list);
 
 	list_sort(&way_up_list, &way_up_list, thermal_trip_notify_cmp);
-	list_for_each_entry(td, &way_up_list, notify_list_node) {
-		thermal_notify_tz_trip_up(tz, &td->trip);
-		thermal_debug_tz_trip_up(tz, &td->trip);
-		thermal_governor_trip_crossed(governor, tz, &td->trip, true);
-	}
+	list_for_each_entry(td, &way_up_list, notify_list_node)
+		thermal_trip_crossed(tz, &td->trip, governor, true);
 
 	list_sort(NULL, &way_down_list, thermal_trip_notify_cmp);
-	list_for_each_entry(td, &way_down_list, notify_list_node) {
-		thermal_notify_tz_trip_down(tz, &td->trip);
-		thermal_debug_tz_trip_down(tz, &td->trip);
-		thermal_governor_trip_crossed(governor, tz, &td->trip, false);
-	}
+	list_for_each_entry(td, &way_down_list, notify_list_node)
+		thermal_trip_crossed(tz, &td->trip, governor, false);
 
 	if (governor->manage)
 		governor->manage(tz);
-- 
cgit v1.2.3


From ae2170d6ea96e652c7fb5689f1980986bf48b7b8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 23 May 2024 18:05:03 +0200
Subject: thermal: trip: Trigger trip down notifications when trips involved in
 mitigation become invalid

When a trip point becomes invalid after being crossed on the way up,
it is involved in a mitigation episode that needs to be adjusted to
compensate for the trip going away.

For this reason, introduce thermal_zone_trip_down() as a wrapper
around thermal_trip_crossed() and make thermal_zone_set_trip_temp()
call it if the new temperature of the trip at hand is equal to
THERMAL_TEMP_INVALID and it has been crossed on the way up to trigger
all of the necessary adjustments in user space, the thermal debug
code and the zone governor.

Fixes: 8c69a777e480 ("thermal: core: Fix the handling of invalid trip points")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/thermal_core.c |  6 ++++++
 drivers/thermal/thermal_core.h |  2 ++
 drivers/thermal/thermal_trip.c | 20 ++++++++++++--------
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index c119ef920793..30567b499455 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -602,6 +602,12 @@ void thermal_zone_device_update(struct thermal_zone_device *tz,
 }
 EXPORT_SYMBOL_GPL(thermal_zone_device_update);
 
+void thermal_zone_trip_down(struct thermal_zone_device *tz,
+			    const struct thermal_trip *trip)
+{
+	thermal_trip_crossed(tz, trip, thermal_get_tz_governor(tz), false);
+}
+
 int for_each_thermal_governor(int (*cb)(struct thermal_governor *, void *),
 			      void *data)
 {
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index d9785e5bbb08..20e7b45673d6 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -246,6 +246,8 @@ int thermal_zone_trip_id(const struct thermal_zone_device *tz,
 void thermal_zone_trip_updated(struct thermal_zone_device *tz,
 			       const struct thermal_trip *trip);
 int __thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp);
+void thermal_zone_trip_down(struct thermal_zone_device *tz,
+			    const struct thermal_trip *trip);
 
 /* sysfs I/F */
 int thermal_zone_create_device_groups(struct thermal_zone_device *tz);
diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c
index d6a6acc78ddb..49e63db68517 100644
--- a/drivers/thermal/thermal_trip.c
+++ b/drivers/thermal/thermal_trip.c
@@ -152,17 +152,23 @@ void thermal_zone_set_trip_temp(struct thermal_zone_device *tz,
 	if (trip->temperature == temp)
 		return;
 
+	trip->temperature = temp;
+	thermal_notify_tz_trip_change(tz, trip);
+
 	if (temp == THERMAL_TEMP_INVALID) {
 		struct thermal_trip_desc *td = trip_to_trip_desc(trip);
 
-		if (trip->type == THERMAL_TRIP_PASSIVE &&
-		    tz->temperature >= td->threshold) {
+		if (tz->temperature >= td->threshold) {
 			/*
-			 * The trip has been crossed, so the thermal zone's
-			 * passive count needs to be adjusted.
+			 * The trip has been crossed on the way up, so some
+			 * adjustments are needed to compensate for the lack
+			 * of it going forward.
 			 */
-			tz->passive--;
-			WARN_ON_ONCE(tz->passive < 0);
+			if (trip->type == THERMAL_TRIP_PASSIVE) {
+				tz->passive--;
+				WARN_ON_ONCE(tz->passive < 0);
+			}
+			thermal_zone_trip_down(tz, trip);
 		}
 		/*
 		 * Invalidate the threshold to avoid triggering a spurious
@@ -170,7 +176,5 @@ void thermal_zone_set_trip_temp(struct thermal_zone_device *tz,
 		 */
 		td->threshold = INT_MAX;
 	}
-	trip->temperature = temp;
-	thermal_notify_tz_trip_change(tz, trip);
 }
 EXPORT_SYMBOL_GPL(thermal_zone_set_trip_temp);
-- 
cgit v1.2.3


From f89ea63f1c65d3e93b255f14f9d9e05df87955fa Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 24 May 2024 15:26:11 +0100
Subject: netfs, 9p: Fix race between umount and async request completion

There's a problem in 9p's interaction with netfslib whereby a crash occurs
because the 9p_fid structs get forcibly destroyed during client teardown
(without paying attention to their refcounts) before netfslib has finished
with them.  However, it's not a simple case of deferring the clunking that
p9_fid_put() does as that requires the p9_client record to still be
present.

The problem is that netfslib has to unlock pages and clear the IN_PROGRESS
flag before destroying the objects involved - including the fid - and, in
any case, nothing checks to see if writeback completed barring looking at
the page flags.

Fix this by keeping a count of outstanding I/O requests (of any type) and
waiting for it to quiesce during inode eviction.

Reported-by: syzbot+df038d463cca332e8414@syzkaller.appspotmail.com
Link: https://lore.kernel.org/all/0000000000005be0aa061846f8d6@google.com/
Reported-by: syzbot+d7c7a495a5e466c031b6@syzkaller.appspotmail.com
Link: https://lore.kernel.org/all/000000000000b86c5e06130da9c6@google.com/
Reported-by: syzbot+1527696d41a634cc1819@syzkaller.appspotmail.com
Link: https://lore.kernel.org/all/000000000000041f960618206d7e@google.com/
Signed-off-by: David Howells <dhowells@redhat.com>
Link: https://lore.kernel.org/r/755891.1716560771@warthog.procyon.org.uk
Tested-by: syzbot+d7c7a495a5e466c031b6@syzkaller.appspotmail.com
Reviewed-by: Dominique Martinet <asmadeus@codewreck.org>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: Steve French <sfrench@samba.org>
cc: Hillf Danton <hdanton@sina.com>
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Reported-and-tested-by: syzbot+d7c7a495a5e466c031b6@syzkaller.appspotmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/9p/vfs_inode.c      |  1 +
 fs/afs/inode.c         |  1 +
 fs/netfs/objects.c     |  5 +++++
 fs/smb/client/cifsfs.c |  1 +
 include/linux/netfs.h  | 18 ++++++++++++++++++
 5 files changed, 26 insertions(+)

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7a3308d77606..fd72fc38c8f5 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -348,6 +348,7 @@ void v9fs_evict_inode(struct inode *inode)
 	__le32 __maybe_unused version;
 
 	if (!is_bad_inode(inode)) {
+		netfs_wait_for_outstanding_io(inode);
 		truncate_inode_pages_final(&inode->i_data);
 
 		version = cpu_to_le32(v9inode->qid.version);
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 94fc049aff58..15bb7989c387 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -648,6 +648,7 @@ void afs_evict_inode(struct inode *inode)
 
 	ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
 
+	netfs_wait_for_outstanding_io(inode);
 	truncate_inode_pages_final(&inode->i_data);
 
 	afs_set_cache_aux(vnode, &aux);
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index c90d482b1650..f4a642727479 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -72,6 +72,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
 		}
 	}
 
+	atomic_inc(&ctx->io_count);
 	trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new);
 	netfs_proc_add_rreq(rreq);
 	netfs_stat(&netfs_n_rh_rreq);
@@ -124,6 +125,7 @@ static void netfs_free_request(struct work_struct *work)
 {
 	struct netfs_io_request *rreq =
 		container_of(work, struct netfs_io_request, work);
+	struct netfs_inode *ictx = netfs_inode(rreq->inode);
 	unsigned int i;
 
 	trace_netfs_rreq(rreq, netfs_rreq_trace_free);
@@ -142,6 +144,9 @@ static void netfs_free_request(struct work_struct *work)
 		}
 		kvfree(rreq->direct_bv);
 	}
+
+	if (atomic_dec_and_test(&ictx->io_count))
+		wake_up_var(&ictx->io_count);
 	call_rcu(&rreq->rcu, netfs_free_request_rcu);
 }
 
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index ec5b639f421a..14810ffd15c8 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -431,6 +431,7 @@ cifs_free_inode(struct inode *inode)
 static void
 cifs_evict_inode(struct inode *inode)
 {
+	netfs_wait_for_outstanding_io(inode);
 	truncate_inode_pages_final(&inode->i_data);
 	if (inode->i_state & I_PINNING_NETFS_WB)
 		cifs_fscache_unuse_inode_cookie(inode, true);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index ca56a4428043..3b22ce0d064c 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -68,6 +68,7 @@ struct netfs_inode {
 	loff_t			remote_i_size;	/* Size of the remote file */
 	loff_t			zero_point;	/* Size after which we assume there's no data
 						 * on the server */
+	atomic_t		io_count;	/* Number of outstanding reqs */
 	unsigned long		flags;
 #define NETFS_ICTX_ODIRECT	0		/* The file has DIO in progress */
 #define NETFS_ICTX_UNBUFFERED	1		/* I/O should not use the pagecache */
@@ -472,6 +473,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
 	ctx->remote_i_size = i_size_read(&ctx->inode);
 	ctx->zero_point = LLONG_MAX;
 	ctx->flags = 0;
+	atomic_set(&ctx->io_count, 0);
 #if IS_ENABLED(CONFIG_FSCACHE)
 	ctx->cache = NULL;
 #endif
@@ -515,4 +517,20 @@ static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)
 #endif
 }
 
+/**
+ * netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete
+ * @ctx: The netfs inode to wait on
+ *
+ * Wait for outstanding I/O requests of any type to complete.  This is intended
+ * to be called from inode eviction routines.  This makes sure that any
+ * resources held by those requests are cleaned up before we let the inode get
+ * cleaned up.
+ */
+static inline void netfs_wait_for_outstanding_io(struct inode *inode)
+{
+	struct netfs_inode *ictx = netfs_inode(inode);
+
+	wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0);
+}
+
 #endif /* _LINUX_NETFS_H */
-- 
cgit v1.2.3


From e569eb34970281438e2b48a3ef11c87459fcfbcb Mon Sep 17 00:00:00 2001
From: Carlos López <clopez@suse.de>
Date: Mon, 27 May 2024 11:43:52 +0200
Subject: tracing/probes: fix error check in parse_btf_field()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

btf_find_struct_member() might return NULL or an error via the
ERR_PTR() macro. However, its caller in parse_btf_field() only checks
for the NULL condition. Fix this by using IS_ERR() and returning the
error up the stack.

Link: https://lore.kernel.org/all/20240527094351.15687-1-clopez@suse.de/

Fixes: c440adfbe3025 ("tracing/probes: Support BTF based data structure field access")
Signed-off-by: Carlos López <clopez@suse.de>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/trace/trace_probe.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 5e263c141574..39877c80d6cb 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -554,6 +554,10 @@ static int parse_btf_field(char *fieldname, const struct btf_type *type,
 			anon_offs = 0;
 			field = btf_find_struct_member(ctx->btf, type, fieldname,
 						       &anon_offs);
+			if (IS_ERR(field)) {
+				trace_probe_log_err(ctx->offset, BAD_BTF_TID);
+				return PTR_ERR(field);
+			}
 			if (!field) {
 				trace_probe_log_err(ctx->offset, NO_BTF_FIELD);
 				return -ENOENT;
-- 
cgit v1.2.3


From 797c525e85d1e44cf0e6f338890e8e0c661f524a Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Mon, 27 May 2024 11:08:40 +0100
Subject: ASoC: cs42l43: Only restrict 44.1kHz for the ASP

The SoundWire interface can always support 44.1kHz using flow controlled
mode, and whether the ASP is in master mode should obviously only affect
the ASP. Update cs42l43_startup() to only restrict the rates for the ASP
DAI.

Fixes: fc918cbe874e ("ASoC: cs42l43: Add support for the cs42l43")
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://msgid.link/r/20240527100840.439832-1-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/cs42l43.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c
index 94685449f0f4..92674314227c 100644
--- a/sound/soc/codecs/cs42l43.c
+++ b/sound/soc/codecs/cs42l43.c
@@ -310,8 +310,9 @@ static int cs42l43_startup(struct snd_pcm_substream *substream, struct snd_soc_d
 	struct snd_soc_component *component = dai->component;
 	struct cs42l43_codec *priv = snd_soc_component_get_drvdata(component);
 	struct cs42l43 *cs42l43 = priv->core;
-	int provider = !!regmap_test_bits(cs42l43->regmap, CS42L43_ASP_CLK_CONFIG2,
-					  CS42L43_ASP_MASTER_MODE_MASK);
+	int provider = !dai->id || !!regmap_test_bits(cs42l43->regmap,
+						      CS42L43_ASP_CLK_CONFIG2,
+						      CS42L43_ASP_MASTER_MODE_MASK);
 
 	if (provider)
 		priv->constraint.mask = CS42L43_PROVIDER_RATE_MASK;
-- 
cgit v1.2.3


From 8d34c12e8751fe180157fbb34a758ed4eede3806 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Mon, 27 May 2024 11:02:37 +0100
Subject: ASoC: wm_adsp: Add missing MODULE_DESCRIPTION()

wm_adsp is built as a separate module and as such should include a
MODULE_DESCRIPTION() macro.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://msgid.link/r/20240527100237.430240-1-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/wm_adsp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index c9d9a7b28efb..68d2d6444533 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -2085,5 +2085,6 @@ static const struct cs_dsp_client_ops wm_adsp2_client_ops = {
 	.watchdog_expired = wm_adsp_fatal_error,
 };
 
+MODULE_DESCRIPTION("Cirrus Logic ASoC DSP Support");
 MODULE_LICENSE("GPL v2");
 MODULE_IMPORT_NS(FW_CS_DSP);
-- 
cgit v1.2.3


From d5d2a5dacbc8ea4386071ce243c43ea0dac23cb8 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Mon, 27 May 2024 11:13:26 +0100
Subject: MAINTAINERS: Remove James Schulman from Cirrus audio maintainers

James no longer works for Cirrus Logic, remove him from the list of
maintainers for the Cirrus audio CODEC drivers.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://msgid.link/r/20240527101326.440345-1-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index d6c90161c7bf..a6a011792167 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5187,7 +5187,6 @@ F:	Documentation/devicetree/bindings/media/i2c/chrontel,ch7322.yaml
 F:	drivers/media/cec/i2c/ch7322.c
 
 CIRRUS LOGIC AUDIO CODEC DRIVERS
-M:	James Schulman <james.schulman@cirrus.com>
 M:	David Rhodes <david.rhodes@cirrus.com>
 M:	Richard Fitzgerald <rf@opensource.cirrus.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
-- 
cgit v1.2.3


From 82d71b53d7e732ede6028591342bdc80fabfa29f Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@redhat.com>
Date: Mon, 27 May 2024 15:13:14 +0200
Subject: Documentation/core-api: correct reference to SWIOTLB_DYNAMIC

Commit c93f261dfc39 ("Documentation/core-api: add swiotlb documentation")
accidentally refers to CONFIG_DYNAMIC_SWIOTLB in one place, while the
config is actually called CONFIG_SWIOTLB_DYNAMIC.

Correct the reference to the intended config option.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@redhat.com>
Reviewed-by: Petr Tesarik <petr@tesarici.cz>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/core-api/swiotlb.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/core-api/swiotlb.rst b/Documentation/core-api/swiotlb.rst
index 5ad2c9ca85bc..cf06bae44ff8 100644
--- a/Documentation/core-api/swiotlb.rst
+++ b/Documentation/core-api/swiotlb.rst
@@ -192,7 +192,7 @@ alignment larger than PAGE_SIZE.
 
 Dynamic swiotlb
 ---------------
-When CONFIG_DYNAMIC_SWIOTLB is enabled, swiotlb can do on-demand expansion of
+When CONFIG_SWIOTLB_DYNAMIC is enabled, swiotlb can do on-demand expansion of
 the amount of memory available for allocation as bounce buffers. If a bounce
 buffer request fails due to lack of available space, an asynchronous background
 task is kicked off to allocate memory from general system memory and turn it
-- 
cgit v1.2.3


From e5c7bd4e5ca0f549108a7013ba77885926c6a56b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 22 May 2024 13:40:19 -0300
Subject: tools include UAPI: Sync linux/stat.h with the kernel sources

To get the changes in:

  2a82bb02941fb53d ("statx: stx_subvol")

To pick up this change and support it:

  $ tools/perf/trace/beauty/statx_mask.sh > before
  $ cp include/uapi/linux/stat.h tools/perf/trace/beauty/include/uapi/linux/stat.h
  $ tools/perf/trace/beauty/statx_mask.sh > after
  $ diff -u before after
  --- before	2024-05-22 13:39:49.742470571 -0300
  +++ after	2024-05-22 13:39:59.157883101 -0300
  @@ -14,4 +14,5 @@
   	[ilog2(0x00001000) + 1] = "MNT_ID",
   	[ilog2(0x00002000) + 1] = "DIOALIGN",
   	[ilog2(0x00004000) + 1] = "MNT_ID_UNIQUE",
  +	[ilog2(0x00008000) + 1] = "SUBVOL",
   };
  $

Now we'll see it like we see these:

  # perf trace -e statx
     0.000 ( 0.015 ms): systemd-userwo/3982299 statx(dfd: 6, filename: ".", mask: TYPE|INO|MNT_ID, buffer: 0x7ffd8945e850) = 0
     <SNIP>
   180.559 ( 0.007 ms): (ostnamed)/3982957 statx(dfd: 4, filename: "sys", flags: SYMLINK_NOFOLLOW|NO_AUTOMOUNT|STATX_DONT_SYNC, mask: TYPE, buffer: 0x7fff13161190) = 0
   180.918 ( 0.011 ms): (ostnamed)/3982957 statx(dfd: CWD, filename: "/run/systemd/mount-rootfs/sys/kernel/security", flags: SYMLINK_NOFOLLOW|NO_AUTOMOUNT|STATX_DONT_SYNC, mask: MNT_ID, buffer: 0x7fff13161120) = 0
   180.956 ( 0.010 ms): (ostnamed)/3982957 statx(dfd: CWD, filename: "/run/systemd/mount-rootfs/sys/fs/cgroup", flags: SYMLINK_NOFOLLOW|NO_AUTOMOUNT|STATX_DONT_SYNC, mask: MNT_ID, buffer: 0x7fff13161120) = 0
   <SNIP>

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/lkml/Zk5nO9yT0oPezUoo@x1
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/include/uapi/linux/stat.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h
index 2f2ee82d5517..67626d535316 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/stat.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h
@@ -126,8 +126,9 @@ struct statx {
 	__u64	stx_mnt_id;
 	__u32	stx_dio_mem_align;	/* Memory buffer alignment for direct I/O */
 	__u32	stx_dio_offset_align;	/* File offset alignment for direct I/O */
+	__u64	stx_subvol;	/* Subvolume identifier */
 	/* 0xa0 */
-	__u64	__spare3[12];	/* Spare space for future expansion */
+	__u64	__spare3[11];	/* Spare space for future expansion */
 	/* 0x100 */
 };
 
@@ -155,6 +156,7 @@ struct statx {
 #define STATX_MNT_ID		0x00001000U	/* Got stx_mnt_id */
 #define STATX_DIOALIGN		0x00002000U	/* Want/got direct I/O alignment info */
 #define STATX_MNT_ID_UNIQUE	0x00004000U	/* Want/got extended stx_mount_id */
+#define STATX_SUBVOL		0x00008000U	/* Want/got stx_subvol */
 
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
 
-- 
cgit v1.2.3


From 80e4e17ac9e05adba3f1f0e1793398086e6d4007 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 21 May 2024 15:16:06 -0700
Subject: block: remove blk_queue_max_integrity_segments

This is unused now that all the atomic queue limit conversions are
merged.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20240521221606.393040-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-integrity.h | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index e253e7bd0d17..7428cb43952d 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -66,12 +66,6 @@ blk_integrity_queue_supports_integrity(struct request_queue *q)
 	return q->integrity.profile;
 }
 
-static inline void blk_queue_max_integrity_segments(struct request_queue *q,
-						    unsigned int segs)
-{
-	q->limits.max_integrity_segments = segs;
-}
-
 static inline unsigned short
 queue_max_integrity_segments(const struct request_queue *q)
 {
@@ -151,10 +145,6 @@ static inline void blk_integrity_register(struct gendisk *d,
 static inline void blk_integrity_unregister(struct gendisk *d)
 {
 }
-static inline void blk_queue_max_integrity_segments(struct request_queue *q,
-						    unsigned int segs)
-{
-}
 static inline unsigned short
 queue_max_integrity_segments(const struct request_queue *q)
 {
-- 
cgit v1.2.3


From d9780064b163b91c28e4d44ec3115599db65b7fa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 27 May 2024 14:36:18 +0200
Subject: dm: move setting zoned_enabled to dm_table_set_restrictions

Keep it together with the rest of the zoned code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20240527123634.1116952-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-table.c | 3 ---
 drivers/md/dm-zone.c  | 8 +++++++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index cc66a27c363a..e291b78b307b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -2040,9 +2040,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 		r = dm_set_zones_restrictions(t, q);
 		if (r)
 			return r;
-		if (blk_queue_is_zoned(q) &&
-		    !static_key_enabled(&zoned_enabled.key))
-			static_branch_enable(&zoned_enabled);
 	}
 
 	dm_update_crypto_profile(q, t);
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 8e6bcb0d786a..3103360ce7f0 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -287,7 +287,13 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
 		       queue_emulates_zone_append(q) ? "emulated" : "native");
 	}
 
-	return dm_revalidate_zones(md, t);
+	ret = dm_revalidate_zones(md, t);
+	if (ret < 0)
+		return ret;
+
+	if (!static_key_enabled(&zoned_enabled.key))
+		static_branch_enable(&zoned_enabled);
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3


From 5e7a4bbcc33d7df6bcc8565a8938c196285e5423 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 27 May 2024 14:36:19 +0200
Subject: dm: remove dm_check_zoned

Fold it into the only caller in preparation to changes in the
queue limits setup.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20240527123634.1116952-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-zone.c | 59 ++++++++++++++++++++--------------------------------
 1 file changed, 23 insertions(+), 36 deletions(-)

diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 3103360ce7f0..0ee22494857d 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -160,37 +160,6 @@ static int dm_check_zoned_cb(struct blk_zone *zone, unsigned int idx,
 	return 0;
 }
 
-static int dm_check_zoned(struct mapped_device *md, struct dm_table *t)
-{
-	struct gendisk *disk = md->disk;
-	unsigned int nr_conv_zones = 0;
-	int ret;
-
-	/* Count conventional zones */
-	md->zone_revalidate_map = t;
-	ret = dm_blk_report_zones(disk, 0, UINT_MAX,
-				  dm_check_zoned_cb, &nr_conv_zones);
-	md->zone_revalidate_map = NULL;
-	if (ret < 0) {
-		DMERR("Check zoned failed %d", ret);
-		return ret;
-	}
-
-	/*
-	 * If we only have conventional zones, expose the mapped device as
-	 * a regular device.
-	 */
-	if (nr_conv_zones >= ret) {
-		disk->queue->limits.max_open_zones = 0;
-		disk->queue->limits.max_active_zones = 0;
-		disk->queue->limits.zoned = false;
-		clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
-		disk->nr_zones = 0;
-	}
-
-	return 0;
-}
-
 /*
  * Revalidate the zones of a mapped device to initialize resource necessary
  * for zone append emulation. Note that we cannot simply use the block layer
@@ -254,6 +223,8 @@ static bool dm_table_supports_zone_append(struct dm_table *t)
 int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
 {
 	struct mapped_device *md = t->md;
+	struct gendisk *disk = md->disk;
+	unsigned int nr_conv_zones = 0;
 	int ret;
 
 	/*
@@ -272,14 +243,30 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
 		return 0;
 
 	/*
-	 * Check that the mapped device will indeed be zoned, that is, that it
-	 * has sequential write required zones.
+	 * Count conventional zones to check that the mapped device will indeed 
+	 * have sequential write required zones.
 	 */
-	ret = dm_check_zoned(md, t);
-	if (ret)
+	md->zone_revalidate_map = t;
+	ret = dm_blk_report_zones(disk, 0, UINT_MAX,
+				  dm_check_zoned_cb, &nr_conv_zones);
+	md->zone_revalidate_map = NULL;
+	if (ret < 0) {
+		DMERR("Check zoned failed %d", ret);
 		return ret;
-	if (!blk_queue_is_zoned(q))
+	}
+
+	/*
+	 * If we only have conventional zones, expose the mapped device as
+	 * a regular device.
+	 */
+	if (nr_conv_zones >= ret) {
+		disk->queue->limits.max_open_zones = 0;
+		disk->queue->limits.max_active_zones = 0;
+		disk->queue->limits.zoned = false;
+		clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+		disk->nr_zones = 0;
 		return 0;
+	}
 
 	if (!md->disk->nr_zones) {
 		DMINFO("%s using %s zone append",
-- 
cgit v1.2.3


From c8c1f7012b807ca4da0136eacab96961b56f25d5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 27 May 2024 14:36:20 +0200
Subject: dm: make dm_set_zones_restrictions work on the queue limits

Don't stuff the values directly into the queue without any
synchronization, but instead delay applying the queue limits in
the caller and let dm_set_zones_restrictions work on the limit
structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20240527123634.1116952-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-table.c | 12 ++++++------
 drivers/md/dm-zone.c  | 11 ++++++-----
 drivers/md/dm.h       |  3 ++-
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index e291b78b307b..b2d5246cff21 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1981,10 +1981,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	if (!dm_table_supports_secure_erase(t))
 		limits->max_secure_erase_sectors = 0;
 
-	r = queue_limits_set(q, limits);
-	if (r)
-		return r;
-
 	if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
 		wc = true;
 		if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_FUA)))
@@ -2036,12 +2032,16 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	 * For a zoned target, setup the zones related queue attributes
 	 * and resources necessary for zone append emulation if necessary.
 	 */
-	if (blk_queue_is_zoned(q)) {
-		r = dm_set_zones_restrictions(t, q);
+	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && limits->zoned) {
+		r = dm_set_zones_restrictions(t, q, limits);
 		if (r)
 			return r;
 	}
 
+	r = queue_limits_set(q, limits);
+	if (r)
+		return r;
+
 	dm_update_crypto_profile(q, t);
 
 	/*
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 0ee22494857d..5d66d916730e 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -220,7 +220,8 @@ static bool dm_table_supports_zone_append(struct dm_table *t)
 	return true;
 }
 
-int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
+int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
+		struct queue_limits *lim)
 {
 	struct mapped_device *md = t->md;
 	struct gendisk *disk = md->disk;
@@ -236,7 +237,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
 		clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
 	} else {
 		set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
-		blk_queue_max_zone_append_sectors(q, 0);
+		lim->max_zone_append_sectors = 0;
 	}
 
 	if (!get_capacity(md->disk))
@@ -260,9 +261,9 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
 	 * a regular device.
 	 */
 	if (nr_conv_zones >= ret) {
-		disk->queue->limits.max_open_zones = 0;
-		disk->queue->limits.max_active_zones = 0;
-		disk->queue->limits.zoned = false;
+		lim->max_open_zones = 0;
+		lim->max_active_zones = 0;
+		lim->zoned = false;
 		clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
 		disk->nr_zones = 0;
 		return 0;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index e0c57f19839b..53ef8207fe2c 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -101,7 +101,8 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
 /*
  * Zoned targets related functions.
  */
-int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q);
+int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
+		struct queue_limits *lim);
 void dm_zone_endio(struct dm_io *io, struct bio *clone);
 #ifdef CONFIG_BLK_DEV_ZONED
 int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
-- 
cgit v1.2.3


From 0efc88e444d9d74f220f60a3e37143c8decf1bea Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 27 May 2024 12:15:27 -0300
Subject: tools headers UAPI: Sync linux/prctl.h with the kernel sources

To pick the changes in:

  628d701f2de5b9a1 ("powerpc/dexcr: Add DEXCR prctl interface")
  6b9391b581fddd85 ("riscv: Include riscv_set_icache_flush_ctx prctl")

That adds some PowerPC and a RISC-V specific prctl options:

  $ tools/perf/trace/beauty/prctl_option.sh > before
  $ cp include/uapi/linux/prctl.h tools/perf/trace/beauty/include/uapi/linux/prctl.h
  $ tools/perf/trace/beauty/prctl_option.sh > after
  $ diff -u before after
  --- before	2024-05-27 12:14:21.358032781 -0300
  +++ after	2024-05-27 12:14:32.364530185 -0300
  @@ -65,6 +65,9 @@
   	[68] = "GET_MEMORY_MERGE",
   	[69] = "RISCV_V_SET_CONTROL",
   	[70] = "RISCV_V_GET_CONTROL",
  +	[71] = "RISCV_SET_ICACHE_FLUSH_CTX",
  +	[72] = "PPC_GET_DEXCR",
  +	[73] = "PPC_SET_DEXCR",
   };
   static const char *prctl_set_mm_options[] = {
   	[1] = "START_CODE",
  $

That now will be used to decode the syscall option and also to compose
filters, for instance:

  [root@five ~]# perf trace -e syscalls:sys_enter_prctl --filter option==SET_NAME
       0.000 Isolated Servi/3474327 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23f13b7aee)
       0.032 DOM Worker/3474327 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23deb25670)
       7.920 :3474328/3474328 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23e24fbb10)
       7.935 StreamT~s #374/3474328 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23e24fb970)
       8.400 Isolated Servi/3474329 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23e24bab10)
       8.418 StreamT~s #374/3474329 syscalls:sys_enter_prctl(option: SET_NAME, arg2: 0x7f23e24ba970)
  ^C[root@five ~]#

This addresses this perf build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/linux/prctl.h include/uapi/linux/prctl.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Benjamin Gray <bgray@linux.ibm.com>
Cc: Charlie Jenkins <charlie@rivosinc.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/lkml/ZlSklGWp--v_Ije7@x1
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/include/uapi/linux/prctl.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
index 370ed14b1ae0..35791791a879 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
@@ -306,4 +306,26 @@ struct prctl_mm_map {
 # define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK	0xc
 # define PR_RISCV_V_VSTATE_CTRL_MASK		0x1f
 
+#define PR_RISCV_SET_ICACHE_FLUSH_CTX	71
+# define PR_RISCV_CTX_SW_FENCEI_ON	0
+# define PR_RISCV_CTX_SW_FENCEI_OFF	1
+# define PR_RISCV_SCOPE_PER_PROCESS	0
+# define PR_RISCV_SCOPE_PER_THREAD	1
+
+/* PowerPC Dynamic Execution Control Register (DEXCR) controls */
+#define PR_PPC_GET_DEXCR		72
+#define PR_PPC_SET_DEXCR		73
+/* DEXCR aspect to act on */
+# define PR_PPC_DEXCR_SBHE		0 /* Speculative branch hint enable */
+# define PR_PPC_DEXCR_IBRTPD		1 /* Indirect branch recurrent target prediction disable */
+# define PR_PPC_DEXCR_SRAPD		2 /* Subroutine return address prediction disable */
+# define PR_PPC_DEXCR_NPHIE		3 /* Non-privileged hash instruction enable */
+/* Action to apply / return */
+# define PR_PPC_DEXCR_CTRL_EDITABLE	 0x1 /* Aspect can be modified with PR_PPC_SET_DEXCR */
+# define PR_PPC_DEXCR_CTRL_SET		 0x2 /* Set the aspect for this process */
+# define PR_PPC_DEXCR_CTRL_CLEAR	 0x4 /* Clear the aspect for this process */
+# define PR_PPC_DEXCR_CTRL_SET_ONEXEC	 0x8 /* Set the aspect on exec */
+# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC	0x10 /* Clear the aspect on exec */
+# define PR_PPC_DEXCR_CTRL_MASK		0x1f
+
 #endif /* _LINUX_PRCTL_H */
-- 
cgit v1.2.3


From b0c6bcd58d44b1b843d1b7218db5a1efe917d27e Mon Sep 17 00:00:00 2001
From: "Ritesh Harjani (IBM)" <ritesh.list@gmail.com>
Date: Tue, 7 May 2024 15:06:55 +0530
Subject: xfs: Add cond_resched to block unmap range and reflink remap path

An async dio write to a sparse file can generate a lot of extents
and when we unlink this file (using rm), the kernel can be busy in umapping
and freeing those extents as part of transaction processing.

Similarly xfs reflink remapping path can also iterate over a million
extent entries in xfs_reflink_remap_blocks().

Since we can busy loop in these two functions, so let's add cond_resched()
to avoid softlockup messages like these.

watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [kworker/1:0:82435]
CPU: 1 PID: 82435 Comm: kworker/1:0 Tainted: G S  L   6.9.0-rc5-0-default #1
Workqueue: xfs-inodegc/sda2 xfs_inodegc_worker
NIP [c000000000beea10] xfs_extent_busy_trim+0x100/0x290
LR [c000000000bee958] xfs_extent_busy_trim+0x48/0x290
Call Trace:
  xfs_alloc_get_rec+0x54/0x1b0 (unreliable)
  xfs_alloc_compute_aligned+0x5c/0x144
  xfs_alloc_ag_vextent_size+0x238/0x8d4
  xfs_alloc_fix_freelist+0x540/0x694
  xfs_free_extent_fix_freelist+0x84/0xe0
  __xfs_free_extent+0x74/0x1ec
  xfs_extent_free_finish_item+0xcc/0x214
  xfs_defer_finish_one+0x194/0x388
  xfs_defer_finish_noroll+0x1b4/0x5c8
  xfs_defer_finish+0x2c/0xc4
  xfs_bunmapi_range+0xa4/0x100
  xfs_itruncate_extents_flags+0x1b8/0x2f4
  xfs_inactive_truncate+0xe0/0x124
  xfs_inactive+0x30c/0x3e0
  xfs_inodegc_worker+0x140/0x234
  process_scheduled_works+0x240/0x57c
  worker_thread+0x198/0x468
  kthread+0x138/0x140
  start_kernel_thread+0x14/0x18

run fstests generic/175 at 2024-02-02 04:40:21
[   C17] watchdog: BUG: soft lockup - CPU#17 stuck for 23s! [xfs_io:7679]
 watchdog: BUG: soft lockup - CPU#17 stuck for 23s! [xfs_io:7679]
 CPU: 17 PID: 7679 Comm: xfs_io Kdump: loaded Tainted: G X 6.4.0
 NIP [c008000005e3ec94] xfs_rmapbt_diff_two_keys+0x54/0xe0 [xfs]
 LR [c008000005e08798] xfs_btree_get_leaf_keys+0x110/0x1e0 [xfs]
 Call Trace:
  0xc000000014107c00 (unreliable)
  __xfs_btree_updkeys+0x8c/0x2c0 [xfs]
  xfs_btree_update_keys+0x150/0x170 [xfs]
  xfs_btree_lshift+0x534/0x660 [xfs]
  xfs_btree_make_block_unfull+0x19c/0x240 [xfs]
  xfs_btree_insrec+0x4e4/0x630 [xfs]
  xfs_btree_insert+0x104/0x2d0 [xfs]
  xfs_rmap_insert+0xc4/0x260 [xfs]
  xfs_rmap_map_shared+0x228/0x630 [xfs]
  xfs_rmap_finish_one+0x2d4/0x350 [xfs]
  xfs_rmap_update_finish_item+0x44/0xc0 [xfs]
  xfs_defer_finish_noroll+0x2e4/0x740 [xfs]
  __xfs_trans_commit+0x1f4/0x400 [xfs]
  xfs_reflink_remap_extent+0x2d8/0x650 [xfs]
  xfs_reflink_remap_blocks+0x154/0x320 [xfs]
  xfs_file_remap_range+0x138/0x3a0 [xfs]
  do_clone_file_range+0x11c/0x2f0
  vfs_clone_file_range+0x60/0x1c0
  ioctl_file_clone+0x78/0x140
  sys_ioctl+0x934/0x1270
  system_call_exception+0x158/0x320
  system_call_vectored_common+0x15c/0x2ec

Cc: Ojaswin Mujoo <ojaswin@linux.ibm.com>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Tested-by: Disha Goel<disgoel@linux.ibm.com>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
---
 fs/xfs/libxfs/xfs_bmap.c | 1 +
 fs/xfs/xfs_reflink.c     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 3b3206d312d6..c101cf266bc4 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -6383,6 +6383,7 @@ xfs_bunmapi_range(
 		error = xfs_defer_finish(tpp);
 		if (error)
 			goto out;
+		cond_resched();
 	}
 out:
 	return error;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 063a2e00d169..265a2a418bc7 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1387,6 +1387,7 @@ xfs_reflink_remap_blocks(
 		destoff += imap.br_blockcount;
 		len -= imap.br_blockcount;
 		remapped_len += imap.br_blockcount;
+		cond_resched();
 	}
 
 	if (error)
-- 
cgit v1.2.3


From 1437a9f06f740cc2950d9b6ac23fad838ff023a3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 27 May 2024 12:34:20 -0300
Subject: tools headers UAPI: Sync fcntl.h with the kernel sources to pick
 F_DUPFD_QUERY

There is no scrape script yet for those, but the warning pointed out we
need to update the array with the F_LINUX_SPECIFIC_BASE entries, do it.

Now 'perf trace' can decode that cmd and also use it in filter, as in:

  root@number:~# perf trace -e syscalls:*enter_fcntl --filter 'cmd != SETFL && cmd != GETFL'
     0.000 sssd_kcm/303828 syscalls:sys_enter_fcntl(fd: 13</var/lib/sss/secrets/secrets.ldb>, cmd: SETLK, arg: 0x7fffdc6a8a50)
     0.013 sssd_kcm/303828 syscalls:sys_enter_fcntl(fd: 13</var/lib/sss/secrets/secrets.ldb>, cmd: SETLKW, arg: 0x7fffdc6a8aa0)
     0.090 sssd_kcm/303828 syscalls:sys_enter_fcntl(fd: 13</var/lib/sss/secrets/secrets.ldb>, cmd: SETLKW, arg: 0x7fffdc6a88e0)
  ^Croot@number:~#

This picks up the changes in:

  c62b758bae6af16f ("fcntl: add F_DUPFD_QUERY fcntl()")

Addressing this perf tools build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/perf/trace/beauty/include/uapi/linux/fcntl.h include/uapi/linux/fcntl.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/lkml/ZlSqNQH9mFw2bmjq@x1
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c                         |  2 +-
 tools/perf/trace/beauty/include/uapi/linux/fcntl.h | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 51eca671c797..08a3a6effac1 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -765,7 +765,7 @@ static const char *fcntl_cmds[] = {
 static DEFINE_STRARRAY(fcntl_cmds, "F_");
 
 static const char *fcntl_linux_specific_cmds[] = {
-	"SETLEASE", "GETLEASE", "NOTIFY", [5] =	"CANCELLK", "DUPFD_CLOEXEC",
+	"SETLEASE", "GETLEASE", "NOTIFY", "DUPFD_QUERY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
 	"SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
 	"GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
 };
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
index 282e90aeb163..c0bcc185fa48 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
@@ -8,6 +8,14 @@
 #define F_SETLEASE	(F_LINUX_SPECIFIC_BASE + 0)
 #define F_GETLEASE	(F_LINUX_SPECIFIC_BASE + 1)
 
+/*
+ * Request nofications on a directory.
+ * See below for events that may be notified.
+ */
+#define F_NOTIFY	(F_LINUX_SPECIFIC_BASE + 2)
+
+#define F_DUPFD_QUERY	(F_LINUX_SPECIFIC_BASE + 3)
+
 /*
  * Cancel a blocking posix lock; internal use only until we expose an
  * asynchronous lock api to userspace:
@@ -17,12 +25,6 @@
 /* Create a file descriptor with FD_CLOEXEC set. */
 #define F_DUPFD_CLOEXEC	(F_LINUX_SPECIFIC_BASE + 6)
 
-/*
- * Request nofications on a directory.
- * See below for events that may be notified.
- */
-#define F_NOTIFY	(F_LINUX_SPECIFIC_BASE+2)
-
 /*
  * Set and get of pipe page size array
  */
-- 
cgit v1.2.3


From a3eed53beec4a7bac8acd109abcb187df2577781 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 27 May 2024 12:45:33 -0300
Subject: perf beauty: Update copy of linux/socket.h with the kernel sources

To pick up the fixes in:

  0645fbe760afcc53 ("net: have do_accept() take a struct proto_accept_arg argument")

That just changes a function prototype, not touching things used by the
perf scrape scripts such as:

  $ tools/perf/trace/beauty/sockaddr.sh | head -5
  static const char *socket_families[] = {
  	[0] = "UNSPEC",
  	[1] = "LOCAL",
  	[2] = "INET",
  	[3] = "AX25",
  $

This addresses this perf tools build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/lkml/ZlSrceExgjrUiDb5@x1
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/include/linux/socket.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index 139c330ccf2c..89d16b90370b 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -16,6 +16,7 @@ struct cred;
 struct socket;
 struct sock;
 struct sk_buff;
+struct proto_accept_arg;
 
 #define __sockaddr_check_size(size)	\
 	BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage)))
@@ -433,7 +434,7 @@ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size,
 extern int __sys_sendto(int fd, void __user *buff, size_t len,
 			unsigned int flags, struct sockaddr __user *addr,
 			int addr_len);
-extern struct file *do_accept(struct file *file, unsigned file_flags,
+extern struct file *do_accept(struct file *file, struct proto_accept_arg *arg,
 			      struct sockaddr __user *upeer_sockaddr,
 			      int __user *upeer_addrlen, int flags);
 extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
-- 
cgit v1.2.3


From 001821b0e79716c4e17c71d8e053a23599a7a508 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 27 May 2024 12:51:30 -0300
Subject: perf trace beauty: Update the arch/x86/include/asm/irq_vectors.h copy
 with the kernel sources to pick POSTED_MSI_NOTIFICATION

To pick up the change in:

  f5a3562ec9dd29e6 ("x86/irq: Reserve a per CPU IDT vector for posted MSIs")

That picks up this new vector:

  $ cp arch/x86/include/asm/irq_vectors.h tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h
  $ tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh > after
  $ diff -u before after
  --- before	2024-05-27 12:50:47.708863932 -0300
  +++ after	2024-05-27 12:51:15.335113123 -0300
  @@ -1,6 +1,7 @@
   static const char *x86_irq_vectors[] = {
   	[0x02] = "NMI",
   	[0x80] = "IA32_SYSCALL",
  +	[0xeb] = "POSTED_MSI_NOTIFICATION",
   	[0xec] = "LOCAL_TIMER",
   	[0xed] = "HYPERV_STIMER0",
   	[0xee] = "HYPERV_REENLIGHTENMENT",
  $

Now those will be known when pretty printing the irq_vectors:*
tracepoints.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/lkml/ZlS34M0x30EFVhbg@x1
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h
index d18bfb238f66..13aea8fc3d45 100644
--- a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h
+++ b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h
@@ -97,10 +97,16 @@
 
 #define LOCAL_TIMER_VECTOR		0xec
 
+/*
+ * Posted interrupt notification vector for all device MSIs delivered to
+ * the host kernel.
+ */
+#define POSTED_MSI_NOTIFICATION_VECTOR	0xeb
+
 #define NR_VECTORS			 256
 
 #ifdef CONFIG_X86_LOCAL_APIC
-#define FIRST_SYSTEM_VECTOR		LOCAL_TIMER_VECTOR
+#define FIRST_SYSTEM_VECTOR		POSTED_MSI_NOTIFICATION_VECTOR
 #else
 #define FIRST_SYSTEM_VECTOR		NR_VECTORS
 #endif
-- 
cgit v1.2.3


From 947051e361d551e0590777080ffc4926190f62f2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 24 May 2024 15:19:54 +0100
Subject: KVM: arm64: Fix AArch32 register narrowing on userspace write

When userspace writes to one of the core registers, we make
sure to narrow the corresponding GPRs if PSTATE indicates
an AArch32 context.

The code tries to check whether the context is EL0 or EL1 so
that it narrows the correct registers. But it does so by checking
the full PSTATE instead of PSTATE.M.

As a consequence, and if we are restoring an AArch32 EL0 context
in a 64bit guest, and that PSTATE has *any* bit set outside of
PSTATE.M, we narrow *all* registers instead of only the first 15,
destroying the 64bit state.

Obviously, this is not something the guest is likely to enjoy.

Correctly masking PSTATE to only evaluate PSTATE.M fixes it.

Fixes: 90c1f934ed71 ("KVM: arm64: Get rid of the AArch32 register mapping code")
Reported-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
Cc: stable@vger.kernel.org
Reviewed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
Acked-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20240524141956.1450304-2-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/guest.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index e2f762d959bb..d9617b11f7a8 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -276,7 +276,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
 		int i, nr_reg;
 
-		switch (*vcpu_cpsr(vcpu)) {
+		switch (*vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK) {
 		/*
 		 * Either we are dealing with user mode, and only the
 		 * first 15 registers (+ PC) must be narrowed to 32bit.
-- 
cgit v1.2.3


From dfe6d190f38fc5df5ff2614b463a5195a399c885 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 24 May 2024 15:19:55 +0100
Subject: KVM: arm64: Allow AArch32 PSTATE.M to be restored as System mode

It appears that we don't allow a vcpu to be restored in AArch32
System mode, as we *never* included it in the list of valid modes.

Just add it to the list of allowed modes.

Fixes: 0d854a60b1d7 ("arm64: KVM: enable initialization of a 32bit vcpu")
Cc: stable@vger.kernel.org
Acked-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20240524141956.1450304-3-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/guest.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index d9617b11f7a8..11098eb7eb44 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -251,6 +251,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 		case PSR_AA32_MODE_SVC:
 		case PSR_AA32_MODE_ABT:
 		case PSR_AA32_MODE_UND:
+		case PSR_AA32_MODE_SYS:
 			if (!vcpu_el1_is_32bit(vcpu))
 				return -EINVAL;
 			break;
-- 
cgit v1.2.3


From c92e8b9eacebb4060634ebd9395bba1b29aadc68 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 24 May 2024 15:19:56 +0100
Subject: KVM: arm64: AArch32: Fix spurious trapping of conditional
 instructions

We recently upgraded the view of ESR_EL2 to 64bit, in keeping with
the requirements of the architecture.

However, the AArch32 emulation code was left unaudited, and the
(already dodgy) code that triages whether a trap is spurious or not
(because the condition code failed) broke in a subtle way:

If ESR_EL2.ISS2 is ever non-zero (unlikely, but hey, this is the ARM
architecture we're talking about), the hack that tests the top bits
of ESR_EL2.EC will break in an interesting way.

Instead, use kvm_vcpu_trap_get_class() to obtain the EC, and list
all the possible ECs that can fail a condition code check.

While we're at it, add SMC32 to the list, as it is explicitly listed
as being allowed to trap despite failing a condition code check (as
described in the HCR_EL2.TSC documentation).

Fixes: 0b12620fddb8 ("KVM: arm64: Treat ESR_EL2 as a 64-bit register")
Cc: stable@vger.kernel.org
Acked-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20240524141956.1450304-4-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/hyp/aarch32.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c
index 8d9670e6615d..449fa58cf3b6 100644
--- a/arch/arm64/kvm/hyp/aarch32.c
+++ b/arch/arm64/kvm/hyp/aarch32.c
@@ -50,9 +50,23 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
 	u32 cpsr_cond;
 	int cond;
 
-	/* Top two bits non-zero?  Unconditional. */
-	if (kvm_vcpu_get_esr(vcpu) >> 30)
+	/*
+	 * These are the exception classes that could fire with a
+	 * conditional instruction.
+	 */
+	switch (kvm_vcpu_trap_get_class(vcpu)) {
+	case ESR_ELx_EC_CP15_32:
+	case ESR_ELx_EC_CP15_64:
+	case ESR_ELx_EC_CP14_MR:
+	case ESR_ELx_EC_CP14_LS:
+	case ESR_ELx_EC_FP_ASIMD:
+	case ESR_ELx_EC_CP10_ID:
+	case ESR_ELx_EC_CP14_64:
+	case ESR_ELx_EC_SVC32:
+		break;
+	default:
 		return true;
+	}
 
 	/* Is condition field valid? */
 	cond = kvm_vcpu_get_condition(vcpu);
-- 
cgit v1.2.3


From 98e948fb60d41447fd8d2d0c3b8637fc6b6dc26d Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Mon, 27 May 2024 13:20:07 +0200
Subject: bpf: Allow delete from sockmap/sockhash only if update is allowed

We have seen an influx of syzkaller reports where a BPF program attached to
a tracepoint triggers a locking rule violation by performing a map_delete
on a sockmap/sockhash.

We don't intend to support this artificial use scenario. Extend the
existing verifier allowed-program-type check for updating sockmap/sockhash
to also cover deleting from a map.

From now on only BPF programs which were previously allowed to update
sockmap/sockhash can delete from these map types.

Fixes: ff9105993240 ("bpf, sockmap: Prevent lock inversion deadlock in map delete elem")
Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Reported-by: syzbot+ec941d6e24f633a59172@syzkaller.appspotmail.com
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: syzbot+ec941d6e24f633a59172@syzkaller.appspotmail.com
Acked-by: John Fastabend <john.fastabend@gmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=ec941d6e24f633a59172
Link: https://lore.kernel.org/bpf/20240527-sockmap-verify-deletes-v1-1-944b372f2101@cloudflare.com
---
 kernel/bpf/verifier.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 77da1f438bec..48f3a9acdef3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8882,7 +8882,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
 	enum bpf_attach_type eatype = env->prog->expected_attach_type;
 	enum bpf_prog_type type = resolve_prog_type(env->prog);
 
-	if (func_id != BPF_FUNC_map_update_elem)
+	if (func_id != BPF_FUNC_map_update_elem &&
+	    func_id != BPF_FUNC_map_delete_elem)
 		return false;
 
 	/* It's not possible to get access to a locked struct sock in these
@@ -8893,6 +8894,11 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
 		if (eatype == BPF_TRACE_ITER)
 			return true;
 		break;
+	case BPF_PROG_TYPE_SOCK_OPS:
+		/* map_update allowed only via dedicated helpers with event type checks */
+		if (func_id == BPF_FUNC_map_delete_elem)
+			return true;
+		break;
 	case BPF_PROG_TYPE_SOCKET_FILTER:
 	case BPF_PROG_TYPE_SCHED_CLS:
 	case BPF_PROG_TYPE_SCHED_ACT:
@@ -8988,7 +8994,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 	case BPF_MAP_TYPE_SOCKMAP:
 		if (func_id != BPF_FUNC_sk_redirect_map &&
 		    func_id != BPF_FUNC_sock_map_update &&
-		    func_id != BPF_FUNC_map_delete_elem &&
 		    func_id != BPF_FUNC_msg_redirect_map &&
 		    func_id != BPF_FUNC_sk_select_reuseport &&
 		    func_id != BPF_FUNC_map_lookup_elem &&
@@ -8998,7 +9003,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 	case BPF_MAP_TYPE_SOCKHASH:
 		if (func_id != BPF_FUNC_sk_redirect_hash &&
 		    func_id != BPF_FUNC_sock_hash_update &&
-		    func_id != BPF_FUNC_map_delete_elem &&
 		    func_id != BPF_FUNC_msg_redirect_hash &&
 		    func_id != BPF_FUNC_sk_select_reuseport &&
 		    func_id != BPF_FUNC_map_lookup_elem &&
-- 
cgit v1.2.3


From 3b9ce0491a43e9af7f108b2f1bced7cd35931660 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Mon, 27 May 2024 13:20:08 +0200
Subject: Revert "bpf, sockmap: Prevent lock inversion deadlock in map delete
 elem"

This reverts commit ff91059932401894e6c86341915615c5eb0eca48.

This check is no longer needed. BPF programs attached to tracepoints are
now rejected by the verifier when they attempt to delete from a
sockmap/sockhash maps.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20240527-sockmap-verify-deletes-v1-2-944b372f2101@cloudflare.com
---
 net/core/sock_map.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 9402889840bf..63c016b4c169 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -423,9 +423,6 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
 	struct sock *sk;
 	int err = 0;
 
-	if (irqs_disabled())
-		return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
-
 	spin_lock_bh(&stab->lock);
 	sk = *psk;
 	if (!sk_test || sk_test == sk)
@@ -948,9 +945,6 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
 	struct bpf_shtab_elem *elem;
 	int ret = -ENOENT;
 
-	if (irqs_disabled())
-		return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
-
 	hash = sock_hash_bucket_hash(key, key_size);
 	bucket = sock_hash_select_bucket(htab, hash);
 
-- 
cgit v1.2.3


From a63bf556160fb19591183383da6757f52119981d Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Mon, 27 May 2024 13:20:09 +0200
Subject: selftests/bpf: Cover verifier checks for mutating sockmap/sockhash

Verifier enforces that only certain program types can mutate sock{map,hash}
maps, that is update it or delete from it. Add test coverage for these
checks so we don't regress.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20240527-sockmap-verify-deletes-v1-3-944b372f2101@cloudflare.com
---
 tools/testing/selftests/bpf/prog_tests/verifier.c  |   2 +
 .../selftests/bpf/progs/verifier_sockmap_mutate.c  | 187 +++++++++++++++++++++
 2 files changed, 189 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c

diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index c60db8beeb73..1c9c4ec1be11 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -67,6 +67,7 @@
 #include "verifier_search_pruning.skel.h"
 #include "verifier_sock.skel.h"
 #include "verifier_sock_addr.skel.h"
+#include "verifier_sockmap_mutate.skel.h"
 #include "verifier_spill_fill.skel.h"
 #include "verifier_spin_lock.skel.h"
 #include "verifier_stack_ptr.skel.h"
@@ -183,6 +184,7 @@ void test_verifier_sdiv(void)                 { RUN(verifier_sdiv); }
 void test_verifier_search_pruning(void)       { RUN(verifier_search_pruning); }
 void test_verifier_sock(void)                 { RUN(verifier_sock); }
 void test_verifier_sock_addr(void)            { RUN(verifier_sock_addr); }
+void test_verifier_sockmap_mutate(void)       { RUN(verifier_sockmap_mutate); }
 void test_verifier_spill_fill(void)           { RUN(verifier_spill_fill); }
 void test_verifier_spin_lock(void)            { RUN(verifier_spin_lock); }
 void test_verifier_stack_ptr(void)            { RUN(verifier_stack_ptr); }
diff --git a/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c b/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c
new file mode 100644
index 000000000000..fe4b123187b8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+
+#define __always_unused __attribute__((unused))
+
+char _license[] SEC("license") = "GPL";
+
+struct sock {
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__sockmap {
+	union {
+		struct sock *sk;
+	};
+} __attribute__((preserve_access_index));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKHASH);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sockhash SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sockmap SEC(".maps");
+
+enum { CG_OK = 1 };
+
+int zero = 0;
+
+static __always_inline void test_sockmap_delete(void)
+{
+	bpf_map_delete_elem(&sockmap, &zero);
+	bpf_map_delete_elem(&sockhash, &zero);
+}
+
+static __always_inline void test_sockmap_update(void *sk)
+{
+	if (sk) {
+		bpf_map_update_elem(&sockmap, &zero, sk, BPF_ANY);
+		bpf_map_update_elem(&sockhash, &zero, sk, BPF_ANY);
+	}
+}
+
+static __always_inline void test_sockmap_lookup_and_update(void)
+{
+	struct bpf_sock *sk = bpf_map_lookup_elem(&sockmap, &zero);
+
+	if (sk) {
+		test_sockmap_update(sk);
+		bpf_sk_release(sk);
+	}
+}
+
+static __always_inline void test_sockmap_mutate(void *sk)
+{
+	test_sockmap_delete();
+	test_sockmap_update(sk);
+}
+
+static __always_inline void test_sockmap_lookup_and_mutate(void)
+{
+	test_sockmap_delete();
+	test_sockmap_lookup_and_update();
+}
+
+SEC("action")
+__success
+int test_sched_act(struct __sk_buff *skb)
+{
+	test_sockmap_mutate(skb->sk);
+	return 0;
+}
+
+SEC("classifier")
+__success
+int test_sched_cls(struct __sk_buff *skb)
+{
+	test_sockmap_mutate(skb->sk);
+	return 0;
+}
+
+SEC("flow_dissector")
+__success
+int test_flow_dissector_delete(struct __sk_buff *skb __always_unused)
+{
+	test_sockmap_delete();
+	return 0;
+}
+
+SEC("flow_dissector")
+__failure __msg("program of this type cannot use helper bpf_sk_release")
+int test_flow_dissector_update(struct __sk_buff *skb __always_unused)
+{
+	test_sockmap_lookup_and_update(); /* no access to skb->sk */
+	return 0;
+}
+
+SEC("iter/sockmap")
+__success
+int test_trace_iter(struct bpf_iter__sockmap *ctx)
+{
+	test_sockmap_mutate(ctx->sk);
+	return 0;
+}
+
+SEC("raw_tp/kfree")
+__failure __msg("cannot update sockmap in this context")
+int test_raw_tp_delete(const void *ctx __always_unused)
+{
+	test_sockmap_delete();
+	return 0;
+}
+
+SEC("raw_tp/kfree")
+__failure __msg("cannot update sockmap in this context")
+int test_raw_tp_update(const void *ctx __always_unused)
+{
+	test_sockmap_lookup_and_update();
+	return 0;
+}
+
+SEC("sk_lookup")
+__success
+int test_sk_lookup(struct bpf_sk_lookup *ctx)
+{
+	test_sockmap_mutate(ctx->sk);
+	return 0;
+}
+
+SEC("sk_reuseport")
+__success
+int test_sk_reuseport(struct sk_reuseport_md *ctx)
+{
+	test_sockmap_mutate(ctx->sk);
+	return 0;
+}
+
+SEC("socket")
+__success
+int test_socket_filter(struct __sk_buff *skb)
+{
+	test_sockmap_mutate(skb->sk);
+	return 0;
+}
+
+SEC("sockops")
+__success
+int test_sockops_delete(struct bpf_sock_ops *ctx __always_unused)
+{
+	test_sockmap_delete();
+	return CG_OK;
+}
+
+SEC("sockops")
+__failure __msg("cannot update sockmap in this context")
+int test_sockops_update(struct bpf_sock_ops *ctx)
+{
+	test_sockmap_update(ctx->sk);
+	return CG_OK;
+}
+
+SEC("sockops")
+__success
+int test_sockops_update_dedicated(struct bpf_sock_ops *ctx)
+{
+	bpf_sock_map_update(ctx, &sockmap, &zero, BPF_ANY);
+	bpf_sock_hash_update(ctx, &sockhash, &zero, BPF_ANY);
+	return CG_OK;
+}
+
+SEC("xdp")
+__success
+int test_xdp(struct xdp_md *ctx __always_unused)
+{
+	test_sockmap_lookup_and_mutate();
+	return XDP_PASS;
+}
-- 
cgit v1.2.3


From d9ff882b54f99f96787fa3df7cd938966843c418 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Mon, 27 May 2024 13:34:45 +0900
Subject: null_blk: Fix return value of nullb_device_power_store()

When powering on a null_blk device that is not already on, the return
value ret that is initialized to be count is reused to check the return
value of null_add_dev(), leading to nullb_device_power_store() to return
null_add_dev() return value (0 on success) instead of "count".
So make sure to set ret to be equal to count when there are no errors.

Fixes: a2db328b0839 ("null_blk: fix null-ptr-dereference while configuring 'power' and 'submit_queues'")
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Link: https://lore.kernel.org/r/20240527043445.235267-1-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index eb023d267369..631dca2e4e84 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -494,6 +494,7 @@ static ssize_t nullb_device_power_store(struct config_item *item,
 
 		set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
 		dev->power = newp;
+		ret = count;
 	} else if (dev->power && !newp) {
 		if (test_and_clear_bit(NULLB_DEV_FL_UP, &dev->flags)) {
 			dev->power = newp;
-- 
cgit v1.2.3


From 30a0e3135f9aa14ba745f767375183b3112d7440 Mon Sep 17 00:00:00 2001
From: hexue <xue01.he@samsung.com>
Date: Mon, 27 May 2024 16:45:33 +0800
Subject: block: delete redundant function declaration

blk_stats_alloc_enable was used for block hybrid poll, the related
function definition was removed by patch:
commit 54bdd67d0f88 ("blk-mq: remove hybrid polling")
but the function declaration was not deleted.

Signed-off-by: hexue <xue01.he@samsung.com>
Link: https://lore.kernel.org/r/20240527084533.1485210-1-xue01.he@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-stat.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/blk-stat.h b/block/blk-stat.h
index 17e1eb4ec7e2..5d7f18ba436d 100644
--- a/block/blk-stat.h
+++ b/block/blk-stat.h
@@ -64,7 +64,6 @@ struct blk_stat_callback {
 
 struct blk_queue_stats *blk_alloc_queue_stats(void);
 void blk_free_queue_stats(struct blk_queue_stats *);
-bool blk_stats_alloc_enable(struct request_queue *q);
 
 void blk_stat_add(struct request *rq, u64 now);
 
-- 
cgit v1.2.3


From e30a942861b540e056425a8e31ba801de1ed4f25 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 27 May 2024 14:44:11 -0500
Subject: ASoC: SOF: stream-ipc: remove unnecessary MODULE_LICENSE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This file is part of the snd-sof module, there's no reason to re-add the
MODULE_LICENSE here.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://msgid.link/r/20240527194414.166156-2-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/stream-ipc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sound/soc/sof/stream-ipc.c b/sound/soc/sof/stream-ipc.c
index eb71303aa24c..794c7bbccbaf 100644
--- a/sound/soc/sof/stream-ipc.c
+++ b/sound/soc/sof/stream-ipc.c
@@ -125,5 +125,3 @@ int sof_stream_pcm_close(struct snd_sof_dev *sdev,
 	return 0;
 }
 EXPORT_SYMBOL(sof_stream_pcm_close);
-
-MODULE_LICENSE("Dual BSD/GPL");
-- 
cgit v1.2.3


From b88056df4fcb7b5930d6ee3fef494e8729dcf2b2 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 27 May 2024 14:44:12 -0500
Subject: ASoC: SOF: AMD: group all module related information
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The module information is spread across files, group in a single
location.  For maintenability and alignment, the arbitrary Intel
convention is used with the following order:

MODULE_LICENSE
MODULE_DESCRIPTION
MODULE_IMPORT

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://msgid.link/r/20240527194414.166156-3-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/amd/acp63.c       | 4 ----
 sound/soc/sof/amd/pci-acp63.c   | 1 +
 sound/soc/sof/amd/pci-rmb.c     | 1 +
 sound/soc/sof/amd/pci-rn.c      | 1 +
 sound/soc/sof/amd/pci-vangogh.c | 1 +
 sound/soc/sof/amd/rembrandt.c   | 4 ----
 sound/soc/sof/amd/renoir.c      | 4 ----
 sound/soc/sof/amd/vangogh.c     | 4 ----
 8 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/sound/soc/sof/amd/acp63.c b/sound/soc/sof/amd/acp63.c
index 9fb645079c3a..9e6eb4bfc805 100644
--- a/sound/soc/sof/amd/acp63.c
+++ b/sound/soc/sof/amd/acp63.c
@@ -140,7 +140,3 @@ int sof_acp63_ops_init(struct snd_sof_dev *sdev)
 
 	return 0;
 }
-
-MODULE_IMPORT_NS(SND_SOC_SOF_AMD_COMMON);
-MODULE_DESCRIPTION("ACP63 SOF Driver");
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/sound/soc/sof/amd/pci-acp63.c b/sound/soc/sof/amd/pci-acp63.c
index eeaa12cceb23..fc8984447365 100644
--- a/sound/soc/sof/amd/pci-acp63.c
+++ b/sound/soc/sof/amd/pci-acp63.c
@@ -109,5 +109,6 @@ static struct pci_driver snd_sof_pci_amd_acp63_driver = {
 module_pci_driver(snd_sof_pci_amd_acp63_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("ACP63 SOF Driver");
 MODULE_IMPORT_NS(SND_SOC_SOF_AMD_COMMON);
 MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/amd/pci-rmb.c b/sound/soc/sof/amd/pci-rmb.c
index 2f288545c426..4bc30951f8b0 100644
--- a/sound/soc/sof/amd/pci-rmb.c
+++ b/sound/soc/sof/amd/pci-rmb.c
@@ -99,5 +99,6 @@ static struct pci_driver snd_sof_pci_amd_rmb_driver = {
 module_pci_driver(snd_sof_pci_amd_rmb_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("REMBRANDT SOF Driver");
 MODULE_IMPORT_NS(SND_SOC_SOF_AMD_COMMON);
 MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/amd/pci-rn.c b/sound/soc/sof/amd/pci-rn.c
index a0195e9b400c..e08875bdfa8b 100644
--- a/sound/soc/sof/amd/pci-rn.c
+++ b/sound/soc/sof/amd/pci-rn.c
@@ -103,5 +103,6 @@ static struct pci_driver snd_sof_pci_amd_rn_driver = {
 module_pci_driver(snd_sof_pci_amd_rn_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("RENOIR SOF Driver");
 MODULE_IMPORT_NS(SND_SOC_SOF_AMD_COMMON);
 MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/amd/pci-vangogh.c b/sound/soc/sof/amd/pci-vangogh.c
index 5cd3ac84752f..16eb2994fbab 100644
--- a/sound/soc/sof/amd/pci-vangogh.c
+++ b/sound/soc/sof/amd/pci-vangogh.c
@@ -101,5 +101,6 @@ static struct pci_driver snd_sof_pci_amd_vgh_driver = {
 module_pci_driver(snd_sof_pci_amd_vgh_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("VANGOGH SOF Driver");
 MODULE_IMPORT_NS(SND_SOC_SOF_AMD_COMMON);
 MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/amd/rembrandt.c b/sound/soc/sof/amd/rembrandt.c
index f1d1ba57ab3a..076f2f05a95c 100644
--- a/sound/soc/sof/amd/rembrandt.c
+++ b/sound/soc/sof/amd/rembrandt.c
@@ -140,7 +140,3 @@ int sof_rembrandt_ops_init(struct snd_sof_dev *sdev)
 
 	return 0;
 }
-
-MODULE_IMPORT_NS(SND_SOC_SOF_AMD_COMMON);
-MODULE_DESCRIPTION("REMBRANDT SOF Driver");
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/sound/soc/sof/amd/renoir.c b/sound/soc/sof/amd/renoir.c
index 47b863f6258c..aa2d24dac6f5 100644
--- a/sound/soc/sof/amd/renoir.c
+++ b/sound/soc/sof/amd/renoir.c
@@ -115,7 +115,3 @@ int sof_renoir_ops_init(struct snd_sof_dev *sdev)
 
 	return 0;
 }
-
-MODULE_IMPORT_NS(SND_SOC_SOF_AMD_COMMON);
-MODULE_DESCRIPTION("RENOIR SOF Driver");
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/sound/soc/sof/amd/vangogh.c b/sound/soc/sof/amd/vangogh.c
index bc6ffdb5471a..61372958c09d 100644
--- a/sound/soc/sof/amd/vangogh.c
+++ b/sound/soc/sof/amd/vangogh.c
@@ -161,7 +161,3 @@ int sof_vangogh_ops_init(struct snd_sof_dev *sdev)
 
 	return 0;
 }
-
-MODULE_IMPORT_NS(SND_SOC_SOF_AMD_COMMON);
-MODULE_DESCRIPTION("VANGOGH SOF Driver");
-MODULE_LICENSE("Dual BSD/GPL");
-- 
cgit v1.2.3


From 06a2315da0b02db4f2115bc9253daa270571e389 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 27 May 2024 14:44:13 -0500
Subject: ASoC: SOF: reorder MODULE_ definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow the arbitrary Intel convention order to allow for easier grep.

MODULE_LICENSE
MODULE_DESCRIPTION
MODULE_IMPORT

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://msgid.link/r/20240527194414.166156-4-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/amd/acp-common.c                 | 4 ++--
 sound/soc/sof/amd/acp.c                        | 2 +-
 sound/soc/sof/core.c                           | 2 +-
 sound/soc/sof/nocodec.c                        | 2 +-
 sound/soc/sof/sof-client-ipc-flood-test.c      | 2 +-
 sound/soc/sof/sof-client-ipc-kernel-injector.c | 2 +-
 sound/soc/sof/sof-client-ipc-msg-injector.c    | 2 +-
 sound/soc/sof/sof-client-probes.c              | 2 +-
 sound/soc/sof/xtensa/core.c                    | 2 +-
 9 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/sound/soc/sof/amd/acp-common.c b/sound/soc/sof/amd/acp-common.c
index b26fa471b431..81bb93e98358 100644
--- a/sound/soc/sof/amd/acp-common.c
+++ b/sound/soc/sof/amd/acp-common.c
@@ -258,8 +258,8 @@ const struct snd_sof_dsp_ops sof_acp_common_ops = {
 };
 EXPORT_SYMBOL_NS(sof_acp_common_ops, SND_SOC_SOF_AMD_COMMON);
 
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("ACP SOF COMMON Driver");
 MODULE_IMPORT_NS(SND_SOC_SOF_AMD_COMMON);
 MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
 MODULE_IMPORT_NS(SOUNDWIRE_AMD_INIT);
-MODULE_DESCRIPTION("ACP SOF COMMON Driver");
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c
index c12c7f820529..74fd5f2b148b 100644
--- a/sound/soc/sof/amd/acp.c
+++ b/sound/soc/sof/amd/acp.c
@@ -801,7 +801,7 @@ void amd_sof_acp_remove(struct snd_sof_dev *sdev)
 }
 EXPORT_SYMBOL_NS(amd_sof_acp_remove, SND_SOC_SOF_AMD_COMMON);
 
+MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("AMD ACP sof driver");
 MODULE_IMPORT_NS(SOUNDWIRE_AMD_INIT);
 MODULE_IMPORT_NS(SND_AMD_SOUNDWIRE_ACPI);
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c
index 0a4917136ff9..83fe0401baf8 100644
--- a/sound/soc/sof/core.c
+++ b/sound/soc/sof/core.c
@@ -769,7 +769,7 @@ void sof_machine_unregister(struct snd_sof_dev *sdev, void *pdata)
 EXPORT_SYMBOL(sof_machine_unregister);
 
 MODULE_AUTHOR("Liam Girdwood");
-MODULE_DESCRIPTION("Sound Open Firmware (SOF) Core");
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Sound Open Firmware (SOF) Core");
 MODULE_ALIAS("platform:sof-audio");
 MODULE_IMPORT_NS(SND_SOC_SOF_CLIENT);
diff --git a/sound/soc/sof/nocodec.c b/sound/soc/sof/nocodec.c
index fdcbe33d3dcf..b12b3d865ae3 100644
--- a/sound/soc/sof/nocodec.c
+++ b/sound/soc/sof/nocodec.c
@@ -110,7 +110,7 @@ static struct platform_driver sof_nocodec_audio = {
 };
 module_platform_driver(sof_nocodec_audio)
 
+MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("ASoC sof nocodec");
 MODULE_AUTHOR("Liam Girdwood");
-MODULE_LICENSE("Dual BSD/GPL");
 MODULE_ALIAS("platform:sof-nocodec");
diff --git a/sound/soc/sof/sof-client-ipc-flood-test.c b/sound/soc/sof/sof-client-ipc-flood-test.c
index 435614926092..e7d2001140e8 100644
--- a/sound/soc/sof/sof-client-ipc-flood-test.c
+++ b/sound/soc/sof/sof-client-ipc-flood-test.c
@@ -394,6 +394,6 @@ static struct auxiliary_driver sof_ipc_flood_client_drv = {
 
 module_auxiliary_driver(sof_ipc_flood_client_drv);
 
-MODULE_DESCRIPTION("SOF IPC Flood Test Client Driver");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SOF IPC Flood Test Client Driver");
 MODULE_IMPORT_NS(SND_SOC_SOF_CLIENT);
diff --git a/sound/soc/sof/sof-client-ipc-kernel-injector.c b/sound/soc/sof/sof-client-ipc-kernel-injector.c
index 6973b6690df4..d3f541069b24 100644
--- a/sound/soc/sof/sof-client-ipc-kernel-injector.c
+++ b/sound/soc/sof/sof-client-ipc-kernel-injector.c
@@ -157,6 +157,6 @@ static struct auxiliary_driver sof_msg_inject_client_drv = {
 
 module_auxiliary_driver(sof_msg_inject_client_drv);
 
-MODULE_DESCRIPTION("SOF IPC Kernel Injector Client Driver");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SOF IPC Kernel Injector Client Driver");
 MODULE_IMPORT_NS(SND_SOC_SOF_CLIENT);
diff --git a/sound/soc/sof/sof-client-ipc-msg-injector.c b/sound/soc/sof/sof-client-ipc-msg-injector.c
index af22e6421029..d0f8beb9d000 100644
--- a/sound/soc/sof/sof-client-ipc-msg-injector.c
+++ b/sound/soc/sof/sof-client-ipc-msg-injector.c
@@ -335,6 +335,6 @@ static struct auxiliary_driver sof_msg_inject_client_drv = {
 
 module_auxiliary_driver(sof_msg_inject_client_drv);
 
-MODULE_DESCRIPTION("SOF IPC Message Injector Client Driver");
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SOF IPC Message Injector Client Driver");
 MODULE_IMPORT_NS(SND_SOC_SOF_CLIENT);
diff --git a/sound/soc/sof/sof-client-probes.c b/sound/soc/sof/sof-client-probes.c
index b8f297307565..ccc7d38ddc38 100644
--- a/sound/soc/sof/sof-client-probes.c
+++ b/sound/soc/sof/sof-client-probes.c
@@ -540,6 +540,6 @@ static struct auxiliary_driver sof_probes_client_drv = {
 
 module_auxiliary_driver(sof_probes_client_drv);
 
-MODULE_DESCRIPTION("SOF Probes Client Driver");
 MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("SOF Probes Client Driver");
 MODULE_IMPORT_NS(SND_SOC_SOF_CLIENT);
diff --git a/sound/soc/sof/xtensa/core.c b/sound/soc/sof/xtensa/core.c
index ccbc3fcdadd5..3cf8c84beff9 100644
--- a/sound/soc/sof/xtensa/core.c
+++ b/sound/soc/sof/xtensa/core.c
@@ -151,5 +151,5 @@ const struct dsp_arch_ops sof_xtensa_arch_ops = {
 };
 EXPORT_SYMBOL_NS(sof_xtensa_arch_ops, SND_SOC_SOF_XTENSA);
 
-MODULE_DESCRIPTION("SOF Xtensa DSP support");
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF Xtensa DSP support");
-- 
cgit v1.2.3


From 3ff78451b8e446e9a548b98a0d4dd8d24dc5780b Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 27 May 2024 14:44:14 -0500
Subject: ASoC: SOF: add missing MODULE_DESCRIPTION()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MODULE_DESCRIPTION() was optional until it became mandatory and
flagged as an error by 'make W=1'.

Reported-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Daniel Baluta <daniel.baluta@nxp.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://msgid.link/r/20240527194414.166156-5-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/imx/imx-common.c           | 1 +
 sound/soc/sof/imx/imx8.c                 | 3 ++-
 sound/soc/sof/imx/imx8m.c                | 3 ++-
 sound/soc/sof/imx/imx8ulp.c              | 3 ++-
 sound/soc/sof/intel/atom.c               | 1 +
 sound/soc/sof/intel/bdw.c                | 1 +
 sound/soc/sof/intel/byt.c                | 1 +
 sound/soc/sof/intel/hda-codec.c          | 1 +
 sound/soc/sof/intel/hda-ctrl.c           | 1 +
 sound/soc/sof/intel/hda-mlink.c          | 1 +
 sound/soc/sof/intel/hda.c                | 1 +
 sound/soc/sof/intel/pci-apl.c            | 1 +
 sound/soc/sof/intel/pci-cnl.c            | 1 +
 sound/soc/sof/intel/pci-icl.c            | 1 +
 sound/soc/sof/intel/pci-lnl.c            | 1 +
 sound/soc/sof/intel/pci-mtl.c            | 1 +
 sound/soc/sof/intel/pci-skl.c            | 1 +
 sound/soc/sof/intel/pci-tgl.c            | 1 +
 sound/soc/sof/intel/pci-tng.c            | 1 +
 sound/soc/sof/mediatek/mt8186/mt8186.c   | 3 ++-
 sound/soc/sof/mediatek/mt8195/mt8195.c   | 3 ++-
 sound/soc/sof/mediatek/mtk-adsp-common.c | 1 +
 sound/soc/sof/sof-acpi-dev.c             | 1 +
 sound/soc/sof/sof-of-dev.c               | 1 +
 sound/soc/sof/sof-pci-dev.c              | 1 +
 sound/soc/sof/sof-utils.c                | 1 +
 26 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/sound/soc/sof/imx/imx-common.c b/sound/soc/sof/imx/imx-common.c
index 2981aea123d9..fce6d9cf6a6b 100644
--- a/sound/soc/sof/imx/imx-common.c
+++ b/sound/soc/sof/imx/imx-common.c
@@ -75,3 +75,4 @@ void imx8_dump(struct snd_sof_dev *sdev, u32 flags)
 EXPORT_SYMBOL(imx8_dump);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF helpers for IMX platforms");
diff --git a/sound/soc/sof/imx/imx8.c b/sound/soc/sof/imx/imx8.c
index 3021dc87ab5a..9f24e3c283dd 100644
--- a/sound/soc/sof/imx/imx8.c
+++ b/sound/soc/sof/imx/imx8.c
@@ -667,5 +667,6 @@ static struct platform_driver snd_sof_of_imx8_driver = {
 };
 module_platform_driver(snd_sof_of_imx8_driver);
 
-MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for IMX8 platforms");
+MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
diff --git a/sound/soc/sof/imx/imx8m.c b/sound/soc/sof/imx/imx8m.c
index 4ed415f04345..1c7019c3cbd3 100644
--- a/sound/soc/sof/imx/imx8m.c
+++ b/sound/soc/sof/imx/imx8m.c
@@ -514,5 +514,6 @@ static struct platform_driver snd_sof_of_imx8m_driver = {
 };
 module_platform_driver(snd_sof_of_imx8m_driver);
 
-MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for IMX8M platforms");
+MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
diff --git a/sound/soc/sof/imx/imx8ulp.c b/sound/soc/sof/imx/imx8ulp.c
index 8adfdd00413a..2585b1beef23 100644
--- a/sound/soc/sof/imx/imx8ulp.c
+++ b/sound/soc/sof/imx/imx8ulp.c
@@ -516,5 +516,6 @@ static struct platform_driver snd_sof_of_imx8ulp_driver = {
 };
 module_platform_driver(snd_sof_of_imx8ulp_driver);
 
-MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for IMX8ULP platforms");
+MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
diff --git a/sound/soc/sof/intel/atom.c b/sound/soc/sof/intel/atom.c
index 86af4e9a716e..3505ac3a1b14 100644
--- a/sound/soc/sof/intel/atom.c
+++ b/sound/soc/sof/intel/atom.c
@@ -418,3 +418,4 @@ void atom_set_mach_params(struct snd_soc_acpi_mach *mach,
 EXPORT_SYMBOL_NS(atom_set_mach_params, SND_SOC_SOF_INTEL_ATOM_HIFI_EP);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for Atom platforms");
diff --git a/sound/soc/sof/intel/bdw.c b/sound/soc/sof/intel/bdw.c
index 3262286a9a9d..7f18080e4e19 100644
--- a/sound/soc/sof/intel/bdw.c
+++ b/sound/soc/sof/intel/bdw.c
@@ -694,6 +694,7 @@ static struct platform_driver snd_sof_acpi_intel_bdw_driver = {
 module_platform_driver(snd_sof_acpi_intel_bdw_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for Broadwell platforms");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HIFI_EP_IPC);
 MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
 MODULE_IMPORT_NS(SND_SOC_SOF_ACPI_DEV);
diff --git a/sound/soc/sof/intel/byt.c b/sound/soc/sof/intel/byt.c
index d78d11d4cfbf..7a57e162fb1c 100644
--- a/sound/soc/sof/intel/byt.c
+++ b/sound/soc/sof/intel/byt.c
@@ -475,6 +475,7 @@ static struct platform_driver snd_sof_acpi_intel_byt_driver = {
 module_platform_driver(snd_sof_acpi_intel_byt_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for Baytrail/Cherrytrail");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HIFI_EP_IPC);
 MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
 MODULE_IMPORT_NS(SND_SOC_SOF_ACPI_DEV);
diff --git a/sound/soc/sof/intel/hda-codec.c b/sound/soc/sof/intel/hda-codec.c
index da3db3ed379e..dc46888faa0d 100644
--- a/sound/soc/sof/intel/hda-codec.c
+++ b/sound/soc/sof/intel/hda-codec.c
@@ -457,3 +457,4 @@ EXPORT_SYMBOL_NS_GPL(hda_codec_i915_exit, SND_SOC_SOF_HDA_AUDIO_CODEC_I915);
 #endif
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for HDaudio codecs");
diff --git a/sound/soc/sof/intel/hda-ctrl.c b/sound/soc/sof/intel/hda-ctrl.c
index 262b482dc0a8..b9a02750ce61 100644
--- a/sound/soc/sof/intel/hda-ctrl.c
+++ b/sound/soc/sof/intel/hda-ctrl.c
@@ -328,6 +328,7 @@ void hda_dsp_ctrl_stop_chip(struct snd_sof_dev *sdev)
 }
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF helpers for HDaudio platforms");
 MODULE_IMPORT_NS(SND_SOC_SOF_HDA_MLINK);
 MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC);
 MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC_I915);
diff --git a/sound/soc/sof/intel/hda-mlink.c b/sound/soc/sof/intel/hda-mlink.c
index 04bbc5c9904c..9a3559c78b62 100644
--- a/sound/soc/sof/intel/hda-mlink.c
+++ b/sound/soc/sof/intel/hda-mlink.c
@@ -972,3 +972,4 @@ EXPORT_SYMBOL_NS(hdac_bus_eml_enable_offload, SND_SOC_SOF_HDA_MLINK);
 #endif
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for HDaudio multi-link");
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index e6a38de0a0aa..dead1c19558b 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -1522,6 +1522,7 @@ void hda_unregister_clients(struct snd_sof_dev *sdev)
 }
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for HDaudio platforms");
 MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
 MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC);
 MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC_I915);
diff --git a/sound/soc/sof/intel/pci-apl.c b/sound/soc/sof/intel/pci-apl.c
index df6d897da290..f006dcf5458a 100644
--- a/sound/soc/sof/intel/pci-apl.c
+++ b/sound/soc/sof/intel/pci-apl.c
@@ -105,6 +105,7 @@ static struct pci_driver snd_sof_pci_intel_apl_driver = {
 module_pci_driver(snd_sof_pci_intel_apl_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for ApolloLake platforms");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_GENERIC);
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
 MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/pci-cnl.c b/sound/soc/sof/intel/pci-cnl.c
index a39fa3657d55..a8406342f08b 100644
--- a/sound/soc/sof/intel/pci-cnl.c
+++ b/sound/soc/sof/intel/pci-cnl.c
@@ -143,6 +143,7 @@ static struct pci_driver snd_sof_pci_intel_cnl_driver = {
 module_pci_driver(snd_sof_pci_intel_cnl_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for CannonLake platforms");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_GENERIC);
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
 MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/pci-icl.c b/sound/soc/sof/intel/pci-icl.c
index 9f1fe47475fb..25effca50d9f 100644
--- a/sound/soc/sof/intel/pci-icl.c
+++ b/sound/soc/sof/intel/pci-icl.c
@@ -108,6 +108,7 @@ static struct pci_driver snd_sof_pci_intel_icl_driver = {
 module_pci_driver(snd_sof_pci_intel_icl_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for IceLake platforms");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_GENERIC);
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_CNL);
diff --git a/sound/soc/sof/intel/pci-lnl.c b/sound/soc/sof/intel/pci-lnl.c
index 68e5c90151b2..602c574064eb 100644
--- a/sound/soc/sof/intel/pci-lnl.c
+++ b/sound/soc/sof/intel/pci-lnl.c
@@ -70,6 +70,7 @@ static struct pci_driver snd_sof_pci_intel_lnl_driver = {
 module_pci_driver(snd_sof_pci_intel_lnl_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for LunarLake platforms");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_GENERIC);
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_MTL);
diff --git a/sound/soc/sof/intel/pci-mtl.c b/sound/soc/sof/intel/pci-mtl.c
index c685cb8d6171..8cb0333c033e 100644
--- a/sound/soc/sof/intel/pci-mtl.c
+++ b/sound/soc/sof/intel/pci-mtl.c
@@ -133,6 +133,7 @@ static struct pci_driver snd_sof_pci_intel_mtl_driver = {
 module_pci_driver(snd_sof_pci_intel_mtl_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for MeteorLake platforms");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_GENERIC);
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
 MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/pci-skl.c b/sound/soc/sof/intel/pci-skl.c
index 862da8009543..8ca0231d7e4f 100644
--- a/sound/soc/sof/intel/pci-skl.c
+++ b/sound/soc/sof/intel/pci-skl.c
@@ -89,6 +89,7 @@ static struct pci_driver snd_sof_pci_intel_skl_driver = {
 module_pci_driver(snd_sof_pci_intel_skl_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for SkyLake platforms");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_GENERIC);
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
 MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c
index f73bb47cd79e..ebe1a7d16689 100644
--- a/sound/soc/sof/intel/pci-tgl.c
+++ b/sound/soc/sof/intel/pci-tgl.c
@@ -317,6 +317,7 @@ static struct pci_driver snd_sof_pci_intel_tgl_driver = {
 module_pci_driver(snd_sof_pci_intel_tgl_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for TigerLake platforms");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_GENERIC);
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_CNL);
diff --git a/sound/soc/sof/intel/pci-tng.c b/sound/soc/sof/intel/pci-tng.c
index 5c3069588bb7..1375c393827e 100644
--- a/sound/soc/sof/intel/pci-tng.c
+++ b/sound/soc/sof/intel/pci-tng.c
@@ -244,6 +244,7 @@ static struct pci_driver snd_sof_pci_intel_tng_driver = {
 module_pci_driver(snd_sof_pci_intel_tng_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for Tangier platforms");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HIFI_EP_IPC);
 MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
 MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/mediatek/mt8186/mt8186.c b/sound/soc/sof/mediatek/mt8186/mt8186.c
index c63e0d2f4b96..bea1b9d9ca28 100644
--- a/sound/soc/sof/mediatek/mt8186/mt8186.c
+++ b/sound/soc/sof/mediatek/mt8186/mt8186.c
@@ -666,6 +666,7 @@ static struct platform_driver snd_sof_of_mt8186_driver = {
 };
 module_platform_driver(snd_sof_of_mt8186_driver);
 
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for MT8186/MT8188 platforms");
 MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
 MODULE_IMPORT_NS(SND_SOC_SOF_MTK_COMMON);
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/sound/soc/sof/mediatek/mt8195/mt8195.c b/sound/soc/sof/mediatek/mt8195/mt8195.c
index fc1c016104ae..31dc98d1b1d8 100644
--- a/sound/soc/sof/mediatek/mt8195/mt8195.c
+++ b/sound/soc/sof/mediatek/mt8195/mt8195.c
@@ -619,6 +619,7 @@ static struct platform_driver snd_sof_of_mt8195_driver = {
 };
 module_platform_driver(snd_sof_of_mt8195_driver);
 
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for MTL 8195 platforms");
 MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
 MODULE_IMPORT_NS(SND_SOC_SOF_MTK_COMMON);
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/sound/soc/sof/mediatek/mtk-adsp-common.c b/sound/soc/sof/mediatek/mtk-adsp-common.c
index de8dbe27cd0d..20bcf5590eb8 100644
--- a/sound/soc/sof/mediatek/mtk-adsp-common.c
+++ b/sound/soc/sof/mediatek/mtk-adsp-common.c
@@ -82,3 +82,4 @@ void mtk_adsp_dump(struct snd_sof_dev *sdev, u32 flags)
 EXPORT_SYMBOL(mtk_adsp_dump);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF helpers for MTK ADSP platforms");
diff --git a/sound/soc/sof/sof-acpi-dev.c b/sound/soc/sof/sof-acpi-dev.c
index 2d96d00f1c44..b196b2b74c26 100644
--- a/sound/soc/sof/sof-acpi-dev.c
+++ b/sound/soc/sof/sof-acpi-dev.c
@@ -100,3 +100,4 @@ void sof_acpi_remove(struct platform_device *pdev)
 EXPORT_SYMBOL_NS(sof_acpi_remove, SND_SOC_SOF_ACPI_DEV);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for ACPI platforms");
diff --git a/sound/soc/sof/sof-of-dev.c b/sound/soc/sof/sof-of-dev.c
index b9a499e92b9a..71f7153cf79c 100644
--- a/sound/soc/sof/sof-of-dev.c
+++ b/sound/soc/sof/sof-of-dev.c
@@ -93,3 +93,4 @@ void sof_of_shutdown(struct platform_device *pdev)
 EXPORT_SYMBOL(sof_of_shutdown);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for OF/DT platforms");
diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c
index 4365405783e6..38f2187da5de 100644
--- a/sound/soc/sof/sof-pci-dev.c
+++ b/sound/soc/sof/sof-pci-dev.c
@@ -304,3 +304,4 @@ void sof_pci_shutdown(struct pci_dev *pci)
 EXPORT_SYMBOL_NS(sof_pci_shutdown, SND_SOC_SOF_PCI_DEV);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF support for PCI platforms");
diff --git a/sound/soc/sof/sof-utils.c b/sound/soc/sof/sof-utils.c
index cad041bf56cc..44608682e9f8 100644
--- a/sound/soc/sof/sof-utils.c
+++ b/sound/soc/sof/sof-utils.c
@@ -73,3 +73,4 @@ int snd_sof_create_page_table(struct device *dev,
 EXPORT_SYMBOL(snd_sof_create_page_table);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("SOF utils");
-- 
cgit v1.2.3


From 7b05ab85e28f615e70520d24c075249b4512044e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 23 May 2024 14:02:57 +0300
Subject: ipv4: Fix address dump when IPv4 is disabled on an interface

Cited commit started returning an error when user space requests to dump
the interface's IPv4 addresses and IPv4 is disabled on the interface.
Restore the previous behavior and do not return an error.

Before cited commit:

 # ip address show dev dummy1
 10: dummy1: <BROADCAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
     link/ether e2:40:68:98:d0:18 brd ff:ff:ff:ff:ff:ff
     inet6 fe80::e040:68ff:fe98:d018/64 scope link proto kernel_ll
        valid_lft forever preferred_lft forever
 # ip link set dev dummy1 mtu 67
 # ip address show dev dummy1
 10: dummy1: <BROADCAST,NOARP,UP,LOWER_UP> mtu 67 qdisc noqueue state UNKNOWN group default qlen 1000
     link/ether e2:40:68:98:d0:18 brd ff:ff:ff:ff:ff:ff

After cited commit:

 # ip address show dev dummy1
 10: dummy1: <BROADCAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
     link/ether 32:2d:69:f2:9c:99 brd ff:ff:ff:ff:ff:ff
     inet6 fe80::302d:69ff:fef2:9c99/64 scope link proto kernel_ll
        valid_lft forever preferred_lft forever
 # ip link set dev dummy1 mtu 67
 # ip address show dev dummy1
 RTNETLINK answers: No such device
 Dump terminated

With this patch:

 # ip address show dev dummy1
 10: dummy1: <BROADCAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN group default qlen 1000
     link/ether de:17:56:bb:57:c0 brd ff:ff:ff:ff:ff:ff
     inet6 fe80::dc17:56ff:febb:57c0/64 scope link proto kernel_ll
        valid_lft forever preferred_lft forever
 # ip link set dev dummy1 mtu 67
 # ip address show dev dummy1
 10: dummy1: <BROADCAST,NOARP,UP,LOWER_UP> mtu 67 qdisc noqueue state UNKNOWN group default qlen 1000
     link/ether de:17:56:bb:57:c0 brd ff:ff:ff:ff:ff:ff

I fixed the exact same issue for IPv6 in commit c04f7dfe6ec2 ("ipv6: Fix
address dump when IPv6 is disabled on an interface"), but noted [1] that
I am not doing the change for IPv4 because I am not aware of a way to
disable IPv4 on an interface other than unregistering it. I clearly
missed the above case.

[1] https://lore.kernel.org/netdev/20240321173042.2151756-1-idosch@nvidia.com/

Fixes: cdb2f80f1c10 ("inet: use xa_array iterator to implement inet_dump_ifaddr()")
Reported-by: Carolina Jubran <cjubran@nvidia.com>
Reported-by: Yamen Safadi <ysafadi@nvidia.com>
Tested-by: Carolina Jubran <cjubran@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20240523110257.334315-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/devinet.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 96accde527da..e827da128c5f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1887,10 +1887,11 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 			goto done;
 
 		if (fillargs.ifindex) {
-			err = -ENODEV;
 			dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex);
-			if (!dev)
+			if (!dev) {
+				err = -ENODEV;
 				goto done;
+			}
 			in_dev = __in_dev_get_rcu(dev);
 			if (!in_dev)
 				goto done;
-- 
cgit v1.2.3


From be008726d0ac338a6bb19c2da2853e3e2112b055 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 23 May 2024 10:13:45 -0400
Subject: net: gro: initialize network_offset in network layer

Syzkaller was able to trigger

    kernel BUG at net/core/gro.c:424 !
    RIP: 0010:gro_pull_from_frag0 net/core/gro.c:424 [inline]
    RIP: 0010:gro_try_pull_from_frag0 net/core/gro.c:446 [inline]
    RIP: 0010:dev_gro_receive+0x242f/0x24b0 net/core/gro.c:571

Due to using an incorrect NAPI_GRO_CB(skb)->network_offset.

The referenced commit sets this offset to 0 in skb_gro_reset_offset.
That matches the expected case in dev_gro_receive:

        pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
                                ipv6_gro_receive, inet_gro_receive,
                                &gro_list->list, skb);

But syzkaller injected an skb with protocol ETH_P_TEB into an ip6gre
device (by writing the IP6GRE encapsulated version to a TAP device).
The result was a first call to eth_gro_receive, and thus an extra
ETH_HLEN in network_offset that should not be there. First issue hit
is when computing offset from network header in ipv6_gro_pull_exthdrs.

Initialize both offsets in the network layer gro_receive.

This pairs with all reads in gro_receive, which use
skb_gro_receive_network_offset().

Fixes: 186b1ea73ad8 ("net: gro: use cb instead of skb->network_header")
Reported-by: syzkaller <syzkaller@googlegroups.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
CC: Richard Gobert <richardbgobert@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240523141434.1752483-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/af_inet.c     | 2 +-
 net/ipv6/ip6_offload.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e03ba4a21c39..b24d74616637 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1532,7 +1532,7 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
 	}
 
 	NAPI_GRO_CB(skb)->flush |= flush;
-	NAPI_GRO_CB(skb)->inner_network_offset = off;
+	NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = off;
 
 	/* Note : No need to call skb_gro_postpull_rcsum() here,
 	 * as we already checked checksum over ipv4 header was 0
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index bd5aff97d8b1..9822163428b0 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -236,7 +236,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
 	if (unlikely(!iph))
 		goto out;
 
-	NAPI_GRO_CB(skb)->inner_network_offset = off;
+	NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = off;
 
 	flush += ntohs(iph->payload_len) != skb->len - hlen;
 
-- 
cgit v1.2.3


From f4dca95fc0f6350918f2e6727e35b41f7f86fcce Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 23 May 2024 13:05:27 +0000
Subject: tcp: reduce accepted window in NEW_SYN_RECV state

Jason commit made checks against ACK sequence less strict
and can be exploited by attackers to establish spoofed flows
with less probes.

Innocent users might use tcp_rmem[1] == 1,000,000,000,
or something more reasonable.

An attacker can use a regular TCP connection to learn the server
initial tp->rcv_wnd, and use it to optimize the attack.

If we make sure that only the announced window (smaller than 65535)
is used for ACK validation, we force an attacker to use
65537 packets to complete the 3WHS (assuming server ISN is unknown)

Fixes: 378979e94e95 ("tcp: remove 64 KByte limit for initial tp->rcv_wnd value")
Link: https://datatracker.ietf.org/meeting/119/materials/slides-119-tcpm-ghost-acks-00
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Link: https://lore.kernel.org/r/20240523130528.60376-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/request_sock.h | 12 ++++++++++++
 net/ipv4/tcp_ipv4.c        |  7 +------
 net/ipv4/tcp_minisocks.c   |  7 +++++--
 net/ipv6/tcp_ipv6.c        |  7 +------
 4 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index d88c0dfc2d46..ebcb8896bffc 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -285,4 +285,16 @@ static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
 	return atomic_read(&queue->young);
 }
 
+/* RFC 7323 2.3 Using the Window Scale Option
+ *  The window field (SEG.WND) of every outgoing segment, with the
+ *  exception of <SYN> segments, MUST be right-shifted by
+ *  Rcv.Wind.Shift bits.
+ *
+ * This means the SEG.WND carried in SYNACK can not exceed 65535.
+ * We use this property to harden TCP stack while in NEW_SYN_RECV state.
+ */
+static inline u32 tcp_synack_window(const struct request_sock *req)
+{
+	return min(req->rsk_rcv_wnd, 65535U);
+}
 #endif /* _REQUEST_SOCK_H */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 30ef0c8f5e92..b710958393e6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1144,14 +1144,9 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 #endif
 	}
 
-	/* RFC 7323 2.3
-	 * The window field (SEG.WND) of every outgoing segment, with the
-	 * exception of <SYN> segments, MUST be right-shifted by
-	 * Rcv.Wind.Shift bits:
-	 */
 	tcp_v4_send_ack(sk, skb, seq,
 			tcp_rsk(req)->rcv_nxt,
-			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+			tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
 			tcp_rsk_tsval(tcp_rsk(req)),
 			READ_ONCE(req->ts_recent),
 			0, &key,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b93619b2384b..538c06f95918 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -783,8 +783,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 
 	/* RFC793: "first check sequence number". */
 
-	if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-					  tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) {
+	if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq,
+					  TCP_SKB_CB(skb)->end_seq,
+					  tcp_rsk(req)->rcv_nxt,
+					  tcp_rsk(req)->rcv_nxt +
+					  tcp_synack_window(req))) {
 		/* Out of window: send ACK and drop. */
 		if (!(flg & TCP_FLAG_RST) &&
 		    !tcp_oow_rate_limited(sock_net(sk), skb,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4c3605485b68..8c577b651bfc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1272,15 +1272,10 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
 	 */
-	/* RFC 7323 2.3
-	 * The window field (SEG.WND) of every outgoing segment, with the
-	 * exception of <SYN> segments, MUST be right-shifted by
-	 * Rcv.Wind.Shift bits:
-	 */
 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
 			tcp_rsk(req)->rcv_nxt,
-			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+			tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
 			tcp_rsk_tsval(tcp_rsk(req)),
 			READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
 			&key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
-- 
cgit v1.2.3


From 0fe53c0ab018b3399b8d4be95f32fd017c9719e1 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Thu, 23 May 2024 12:17:31 -0500
Subject: dt-bindings: net: pse-pd: microchip,pd692x0: Fix missing
 "additionalProperties" constraints

The child nodes are missing "additionalProperties" constraints which
means any undocumented properties or child nodes are allowed. Add the
constraints, and fix the fallout of wrong manager node regex and
missing properties.

Fixes: 9c1de033afad ("dt-bindings: net: pse-pd: Add bindings for PD692x0 PSE controller")
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Acked-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://lore.kernel.org/r/20240523171732.2836880-1-robh@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml     | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml b/Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml
index 828439398fdf..fd4244fceced 100644
--- a/Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml
+++ b/Documentation/devicetree/bindings/net/pse-pd/microchip,pd692x0.yaml
@@ -24,6 +24,7 @@ properties:
 
   managers:
     type: object
+    additionalProperties: false
     description:
       List of the PD69208T4/PD69204T4/PD69208M PSE managers. Each manager
       have 4 or 8 physical ports according to the chip version. No need to
@@ -47,8 +48,9 @@ properties:
       - "#size-cells"
 
     patternProperties:
-      "^manager@0[0-9a-b]$":
+      "^manager@[0-9a-b]$":
         type: object
+        additionalProperties: false
         description:
           PD69208T4/PD69204T4/PD69208M PSE manager exposing 4 or 8 physical
           ports.
@@ -69,9 +71,14 @@ properties:
         patternProperties:
           '^port@[0-7]$':
             type: object
+            additionalProperties: false
+
+            properties:
+              reg:
+                maxItems: 1
+
             required:
               - reg
-            additionalProperties: false
 
         required:
           - reg
-- 
cgit v1.2.3


From 12f86b9af96a8b09969e4392311602f787b40834 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Thu, 23 May 2024 12:17:50 -0500
Subject: dt-bindings: net: pse-pd: ti,tps23881: Fix missing
 "additionalProperties" constraints

The child nodes are missing "additionalProperties" constraints which
means any undocumented properties or child nodes are allowed. Add the
constraints and all the undocumented properties exposed by the fix.

Fixes: f562202fedad ("dt-bindings: net: pse-pd: Add bindings for TPS23881 PSE controller")
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
Acked-by: Kory Maincent <kory.maincent@bootlin.com>
Link: https://lore.kernel.org/r/20240523171750.2837331-1-robh@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../devicetree/bindings/net/pse-pd/ti,tps23881.yaml    | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml b/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml
index 4147adb11e10..6992d56832bf 100644
--- a/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml
+++ b/Documentation/devicetree/bindings/net/pse-pd/ti,tps23881.yaml
@@ -29,13 +29,31 @@ properties:
       of the ports conversion matrix that establishes relationship between
       the logical ports and the physical channels.
     type: object
+    additionalProperties: false
+
+    properties:
+      "#address-cells":
+        const: 1
+
+      "#size-cells":
+        const: 0
 
     patternProperties:
       '^channel@[0-7]$':
         type: object
+        additionalProperties: false
+
+        properties:
+          reg:
+            maxItems: 1
+
         required:
           - reg
 
+    required:
+      - "#address-cells"
+      - "#size-cells"
+
 unevaluatedProperties: false
 
 required:
-- 
cgit v1.2.3


From bf0497f53c8535f99b72041529d3f7708a6e2c0d Mon Sep 17 00:00:00 2001
From: Xiaolei Wang <xiaolei.wang@windriver.com>
Date: Fri, 24 May 2024 13:05:28 +0800
Subject: net:fec: Add fec_enet_deinit()

When fec_probe() fails or fec_drv_remove() needs to release the
fec queue and remove a NAPI context, therefore add a function
corresponding to fec_enet_init() and call fec_enet_deinit() which
does the opposite to release memory and remove a NAPI context.

Fixes: 59d0f7465644 ("net: fec: init multi queue date structure")
Signed-off-by: Xiaolei Wang <xiaolei.wang@windriver.com>
Reviewed-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20240524050528.4115581-1-xiaolei.wang@windriver.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/fec_main.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index a72d8a2eb0b3..881ece735dcf 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -4130,6 +4130,14 @@ free_queue_mem:
 	return ret;
 }
 
+static void fec_enet_deinit(struct net_device *ndev)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+
+	netif_napi_del(&fep->napi);
+	fec_enet_free_queue(ndev);
+}
+
 #ifdef CONFIG_OF
 static int fec_reset_phy(struct platform_device *pdev)
 {
@@ -4524,6 +4532,7 @@ failed_register:
 	fec_enet_mii_remove(fep);
 failed_mii_init:
 failed_irq:
+	fec_enet_deinit(ndev);
 failed_init:
 	fec_ptp_stop(pdev);
 failed_reset:
@@ -4587,6 +4596,7 @@ fec_drv_remove(struct platform_device *pdev)
 	pm_runtime_put_noidle(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
+	fec_enet_deinit(ndev);
 	free_netdev(ndev);
 }
 
-- 
cgit v1.2.3


From 4fb679040d9f758eeb3b4d01bbde6405bf20e64e Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Fri, 24 May 2024 10:53:50 +0200
Subject: net: micrel: Fix lan8841_config_intr after getting out of sleep mode

When the interrupt is enabled, the function lan8841_config_intr tries to
clear any pending interrupts by reading the interrupt status, then
checks the return value for errors and then continue to enable the
interrupt. It has been seen that once the system gets out of sleep mode,
the interrupt status has the value 0x400 meaning that the PHY detected
that the link was in low power. That is correct value but the problem is
that the check is wrong.  We try to check for errors but we return an
error also in this case which is not an error. Therefore fix this by
returning only when there is an error.

Fixes: a8f1a19d27ef ("net: micrel: Add support for lan8841 PHY")
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Reviewed-by: Suman Ghosh <sumang@marvell.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://lore.kernel.org/r/20240524085350.359812-1-horatiu.vultur@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/micrel.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 1d769322b059..2b8f8b7f1517 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -4029,7 +4029,7 @@ static int lan8841_config_intr(struct phy_device *phydev)
 
 	if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
 		err = phy_read(phydev, LAN8814_INTS);
-		if (err)
+		if (err < 0)
 			return err;
 
 		/* Enable / disable interrupts. It is OK to enable PTP interrupt
@@ -4045,6 +4045,14 @@ static int lan8841_config_intr(struct phy_device *phydev)
 			return err;
 
 		err = phy_read(phydev, LAN8814_INTS);
+		if (err < 0)
+			return err;
+
+		/* Getting a positive value doesn't mean that is an error, it
+		 * just indicates what was the status. Therefore make sure to
+		 * clear the value and say that there is no error.
+		 */
+		err = 0;
 	}
 
 	return err;
-- 
cgit v1.2.3


From 266aa3b4812e97942a8ce5c7aafa7da059f7b5b8 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Fri, 24 May 2024 13:28:59 +0200
Subject: page_pool: fix &page_pool_params kdoc issues

After the tagged commit, @netdev got documented twice and the kdoc
script didn't notice that. Remove the second description added later
and move the initial one according to the field position.

After merging commit 5f8e4007c10d ("kernel-doc: fix
struct_group_tagged() parsing"), kdoc requires to describe struct
groups as well. &page_pool_params has 2 struct groups which
generated new warnings, describe them to resolve this.

Fixes: 403f11ac9ab7 ("page_pool: don't use driver-set flags field directly")
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Link: https://lore.kernel.org/r/20240524112859.2757403-1-aleksander.lobakin@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/page_pool/types.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index b088d131aeb0..7e8477057f3d 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -45,16 +45,17 @@ struct pp_alloc_cache {
 
 /**
  * struct page_pool_params - page pool parameters
+ * @fast:	params accessed frequently on hotpath
  * @order:	2^order pages on allocation
  * @pool_size:	size of the ptr_ring
  * @nid:	NUMA node id to allocate from pages from
  * @dev:	device, for DMA pre-mapping purposes
- * @netdev:	netdev this pool will serve (leave as NULL if none or multiple)
  * @napi:	NAPI which is the sole consumer of pages, otherwise NULL
  * @dma_dir:	DMA mapping direction
  * @max_len:	max DMA sync memory size for PP_FLAG_DMA_SYNC_DEV
  * @offset:	DMA sync address offset for PP_FLAG_DMA_SYNC_DEV
- * @netdev:	corresponding &net_device for Netlink introspection
+ * @slow:	params with slowpath access only (initialization and Netlink)
+ * @netdev:	netdev this pool will serve (leave as NULL if none or multiple)
  * @flags:	PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_SYSTEM_POOL
  */
 struct page_pool_params {
-- 
cgit v1.2.3


From d514c8b54209de7a95ab37259fe32c7406976bd9 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <aleksander.lobakin@intel.com>
Date: Thu, 23 May 2024 10:45:29 -0700
Subject: idpf: don't enable NAPI and interrupts prior to allocating Rx buffers

Currently, idpf enables NAPI and interrupts prior to allocating Rx
buffers.
This may lead to frame loss (there are no buffers to place incoming
frames) and even crashes on quick ifup-ifdown. Interrupts must be
enabled only after all the resources are here and available.
Split interrupt init into two phases: initialization and enabling,
and perform the second only after the queues are fully initialized.
Note that we can't just move interrupt initialization down the init
process, as the queues must have correct a ::q_vector pointer set
and NAPI already added in order to allocate buffers correctly.
Also, during the deinit process, disable HW interrupts first and
only then disable NAPI. Otherwise, there can be a HW event leading
to napi_schedule(), but the NAPI will already be unavailable.

Fixes: d4d558718266 ("idpf: initialize interrupts and enable vport")
Reported-by: Michal Kubiak <michal.kubiak@intel.com>
Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Krishneil Singh <krishneil.k.singh@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20240523-net-2024-05-23-intel-net-fixes-v1-1-17a923e0bb5f@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/idpf/idpf_lib.c  |  1 +
 drivers/net/ethernet/intel/idpf/idpf_txrx.c | 12 +++++++-----
 drivers/net/ethernet/intel/idpf/idpf_txrx.h |  1 +
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 52ceda6306a3..f1ee5584e8fa 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1394,6 +1394,7 @@ static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res)
 	}
 
 	idpf_rx_init_buf_tail(vport);
+	idpf_vport_intr_ena(vport);
 
 	err = idpf_send_config_queues_msg(vport);
 	if (err) {
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
index 285da2177ee4..b023704bbbda 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
@@ -3746,9 +3746,9 @@ static void idpf_vport_intr_ena_irq_all(struct idpf_vport *vport)
  */
 void idpf_vport_intr_deinit(struct idpf_vport *vport)
 {
+	idpf_vport_intr_dis_irq_all(vport);
 	idpf_vport_intr_napi_dis_all(vport);
 	idpf_vport_intr_napi_del_all(vport);
-	idpf_vport_intr_dis_irq_all(vport);
 	idpf_vport_intr_rel_irq(vport);
 }
 
@@ -4179,7 +4179,6 @@ int idpf_vport_intr_init(struct idpf_vport *vport)
 
 	idpf_vport_intr_map_vector_to_qs(vport);
 	idpf_vport_intr_napi_add_all(vport);
-	idpf_vport_intr_napi_ena_all(vport);
 
 	err = vport->adapter->dev_ops.reg_ops.intr_reg_init(vport);
 	if (err)
@@ -4193,17 +4192,20 @@ int idpf_vport_intr_init(struct idpf_vport *vport)
 	if (err)
 		goto unroll_vectors_alloc;
 
-	idpf_vport_intr_ena_irq_all(vport);
-
 	return 0;
 
 unroll_vectors_alloc:
-	idpf_vport_intr_napi_dis_all(vport);
 	idpf_vport_intr_napi_del_all(vport);
 
 	return err;
 }
 
+void idpf_vport_intr_ena(struct idpf_vport *vport)
+{
+	idpf_vport_intr_napi_ena_all(vport);
+	idpf_vport_intr_ena_irq_all(vport);
+}
+
 /**
  * idpf_config_rss - Send virtchnl messages to configure RSS
  * @vport: virtual port
diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
index 3d046b81e507..551391e20464 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -990,6 +990,7 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport);
 void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector);
 void idpf_vport_intr_deinit(struct idpf_vport *vport);
 int idpf_vport_intr_init(struct idpf_vport *vport);
+void idpf_vport_intr_ena(struct idpf_vport *vport);
 enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded);
 int idpf_config_rss(struct idpf_vport *vport);
 int idpf_init_rss(struct idpf_vport *vport);
-- 
cgit v1.2.3


From 82617b9a04649e83ee8731918aeadbb6e6d7cbc7 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Thu, 23 May 2024 10:45:30 -0700
Subject: ice: fix accounting if a VLAN already exists

The ice_vsi_add_vlan() function is used to add a VLAN filter for the target
VSI. This function prepares a filter in the switch table for the given VSI.
If it succeeds, the vsi->num_vlan counter is incremented.

It is not considered an error to add a VLAN which already exists in the
switch table, so the function explicitly checks and ignores -EEXIST. The
vsi->num_vlan counter is still incremented.

This seems incorrect, as it means we can double-count in the case where the
same VLAN is added twice by the caller. The actual table will have one less
filter than the count.

The ice_vsi_del_vlan() function similarly checks and handles the -ENOENT
condition for when deleting a filter that doesn't exist. This flow only
decrements the vsi->num_vlan if it actually deleted a filter.

The vsi->num_vlan counter is used only in a few places, primarily related
to tracking the number of non-zero VLANs. If the vsi->num_vlans gets out of
sync, then ice_vsi_num_non_zero_vlans() will incorrectly report more VLANs
than are present, and ice_vsi_has_non_zero_vlans() could return true
potentially in cases where there are only VLAN 0 filters left.

Fix this by only incrementing the vsi->num_vlan in the case where we
actually added an entry, and not in the case where the entry already
existed.

Fixes: a1ffafb0b4a4 ("ice: Support configuring the device to Double VLAN Mode")
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20240523-net-2024-05-23-intel-net-fixes-v1-2-17a923e0bb5f@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
index 2e9ad27cb9d1..6e8f2aab6080 100644
--- a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
@@ -45,14 +45,15 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
 		return -EINVAL;
 
 	err = ice_fltr_add_vlan(vsi, vlan);
-	if (err && err != -EEXIST) {
+	if (!err)
+		vsi->num_vlan++;
+	else if (err == -EEXIST)
+		err = 0;
+	else
 		dev_err(ice_pf_to_dev(vsi->back), "Failure Adding VLAN %d on VSI %i, status %d\n",
 			vlan->vid, vsi->vsi_num, err);
-		return err;
-	}
 
-	vsi->num_vlan++;
-	return 0;
+	return err;
 }
 
 /**
-- 
cgit v1.2.3


From 5597613fb3cf0e36d26cfd8fb2a63196da249333 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Fri, 24 May 2024 18:30:56 +0200
Subject: selftests: mptcp: lib: support flaky subtests

Some subtests can be unstable, failing once every X runs. Fixing them
can take time: there could be an issue in the kernel or in the subtest,
and it is then important to do a proper analysis, not to hide real bugs.

To avoid creating noises on the different CIs, it is important to have a
simple way to mark subtests as flaky, and ignore the errors. This is
what this patch introduces: subtests can be marked as flaky by setting
MPTCP_LIB_SUBTEST_FLAKY env var to 1, e.g.

  MPTCP_LIB_SUBTEST_FLAKY=1 <run flaky subtest>

The subtest will be executed, and errors (if any) will be ignored. It is
still good to run these subtests, as it exercises code, and the results
can still be useful for the on-going investigations.

Note that the MPTCP CI will continue to track these flaky subtests by
setting SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY env var to 1, and a ticket
has to be created before marking subtests as flaky.

Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://lore.kernel.org/r/20240524-upstream-net-20240524-selftests-mptcp-flaky-v1-1-a352362f3f8e@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_lib.sh | 30 ++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index ad2ebda5cb64..6ffa9b7a3260 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -21,6 +21,7 @@ declare -rx MPTCP_LIB_AF_INET6=10
 
 MPTCP_LIB_SUBTESTS=()
 MPTCP_LIB_SUBTESTS_DUPLICATED=0
+MPTCP_LIB_SUBTEST_FLAKY=0
 MPTCP_LIB_TEST_COUNTER=0
 MPTCP_LIB_TEST_FORMAT="%02u %-50s"
 MPTCP_LIB_IP_MPTCP=0
@@ -41,6 +42,16 @@ else
 	readonly MPTCP_LIB_COLOR_RESET=
 fi
 
+# SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY env var can be set not to ignore errors
+# from subtests marked as flaky
+mptcp_lib_override_flaky() {
+	[ "${SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY:-}" = 1 ]
+}
+
+mptcp_lib_subtest_is_flaky() {
+	[ "${MPTCP_LIB_SUBTEST_FLAKY}" = 1 ] && ! mptcp_lib_override_flaky
+}
+
 # $1: color, $2: text
 mptcp_lib_print_color() {
 	echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}"
@@ -72,7 +83,16 @@ mptcp_lib_pr_skip() {
 }
 
 mptcp_lib_pr_fail() {
-	mptcp_lib_print_err "[FAIL]${1:+ ${*}}"
+	local title cmt
+
+	if mptcp_lib_subtest_is_flaky; then
+		title="IGNO"
+		cmt=" (flaky)"
+	else
+		title="FAIL"
+	fi
+
+	mptcp_lib_print_err "[${title}]${cmt}${1:+ ${*}}"
 }
 
 mptcp_lib_pr_info() {
@@ -208,7 +228,13 @@ mptcp_lib_result_pass() {
 
 # $1: test name
 mptcp_lib_result_fail() {
-	__mptcp_lib_result_add "not ok" "${1}"
+	if mptcp_lib_subtest_is_flaky; then
+		# It might sound better to use 'not ok # TODO' or 'ok # SKIP',
+		# but some CIs don't understand 'TODO' and treat SKIP as errors.
+		__mptcp_lib_result_add "ok" "${1} # IGNORE Flaky"
+	else
+		__mptcp_lib_result_add "not ok" "${1}"
+	fi
 }
 
 # $1: test name
-- 
cgit v1.2.3


From cc73a6577ae64247898269d138dee6b73ff710cc Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Fri, 24 May 2024 18:30:57 +0200
Subject: selftests: mptcp: simult flows: mark 'unbalanced' tests as flaky

These tests are flaky since their introduction. This might be less or
not visible depending on the CI running the tests, especially if it is
also busy doing other tasks in parallel.

A first analysis shown that the transfer can be slowed down when there
are some re-injections at the MPTCP level. Such re-injections can of
course happen, and disturb the transfer, but it looks strange to have
them in this lab. That could be caused by the kernel having access to
less CPU cycles -- e.g. when other activities are executed in parallel
-- or by a misinterpretation on the MPTCP packet scheduler side.

While this is being investigated, the tests are marked as flaky not to
create noises in other CIs.

Fixes: 219d04992b68 ("mptcp: push pending frames when subflow has free space")
Link: https://github.com/multipath-tcp/mptcp_net-next/issues/475
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://lore.kernel.org/r/20240524-upstream-net-20240524-selftests-mptcp-flaky-v1-2-a352362f3f8e@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/simult_flows.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 4b14b4412166..f74e1c3c126d 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -244,7 +244,7 @@ run_test()
 	do_transfer $small $large $time
 	lret=$?
 	mptcp_lib_result_code "${lret}" "${msg}"
-	if [ $lret -ne 0 ]; then
+	if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then
 		ret=$lret
 		[ $bail -eq 0 ] || exit $ret
 	fi
@@ -254,7 +254,7 @@ run_test()
 	do_transfer $large $small $time
 	lret=$?
 	mptcp_lib_result_code "${lret}" "${msg}"
-	if [ $lret -ne 0 ]; then
+	if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then
 		ret=$lret
 		[ $bail -eq 0 ] || exit $ret
 	fi
@@ -290,7 +290,7 @@ run_test 10 10 0 0 "balanced bwidth"
 run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
 
 # we still need some additional infrastructure to pass the following test-cases
-run_test 10 3 0 0 "unbalanced bwidth"
+MPTCP_LIB_SUBTEST_FLAKY=1 run_test 10 3 0 0 "unbalanced bwidth"
 run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay"
 run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay"
 
-- 
cgit v1.2.3


From 8c06ac2178a9dee887929232226e35a5cdda1793 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Fri, 24 May 2024 18:30:58 +0200
Subject: selftests: mptcp: join: mark 'fastclose' tests as flaky

These tests are flaky since their introduction. This might be less or
not visible depending on the CI running the tests, especially if it is
also busy doing other tasks in parallel, and if a debug kernel config is
being used.

It looks like this issue is often present with the NetDev CI. While this
is being investigated, the tests are marked as flaky not to create
noises on such CIs.

Fixes: 01542c9bf9ab ("selftests: mptcp: add fastclose testcase")
Link: https://github.com/multipath-tcp/mptcp_net-next/issues/324
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://lore.kernel.org/r/20240524-upstream-net-20240524-selftests-mptcp-flaky-v1-3-a352362f3f8e@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index fefa9173bdaa..b869b46823d7 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -261,6 +261,8 @@ reset()
 
 	TEST_NAME="${1}"
 
+	MPTCP_LIB_SUBTEST_FLAKY=0 # reset if modified
+
 	if skip_test; then
 		MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1))
 		last_test_ignored=1
@@ -448,7 +450,9 @@ reset_with_tcp_filter()
 # $1: err msg
 fail_test()
 {
-	ret=${KSFT_FAIL}
+	if ! mptcp_lib_subtest_is_flaky; then
+		ret=${KSFT_FAIL}
+	fi
 
 	if [ ${#} -gt 0 ]; then
 		print_fail "${@}"
@@ -3069,6 +3073,7 @@ fullmesh_tests()
 fastclose_tests()
 {
 	if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
+		MPTCP_LIB_SUBTEST_FLAKY=1
 		test_linkfail=1024 fastclose=client \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 0 0 0
@@ -3077,6 +3082,7 @@ fastclose_tests()
 	fi
 
 	if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
+		MPTCP_LIB_SUBTEST_FLAKY=1
 		test_linkfail=1024 fastclose=server \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 0 0 0 0 0 0 1
-- 
cgit v1.2.3


From 38af56e6668b455f7dd0a8e2d9afe74100068e17 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Fri, 24 May 2024 18:30:59 +0200
Subject: selftests: mptcp: join: mark 'fail' tests as flaky

These tests are rarely unstable. It depends on the CI running the tests,
especially if it is also busy doing other tasks in parallel, and if a
debug kernel config is being used.

It looks like this issue is sometimes present with the NetDev CI. While
this is being investigated, the tests are marked as flaky not to create
noises on such CIs.

Fixes: b6e074e171bc ("selftests: mptcp: add infinite map testcase")
Link: https://github.com/multipath-tcp/mptcp_net-next/issues/491
Reviewed-by: Mat Martineau <martineau@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://lore.kernel.org/r/20240524-upstream-net-20240524-selftests-mptcp-flaky-v1-4-a352362f3f8e@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index b869b46823d7..2b66c5fa71eb 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -3101,6 +3101,7 @@ fail_tests()
 {
 	# single subflow
 	if reset_with_fail "Infinite map" 1; then
+		MPTCP_LIB_SUBTEST_FLAKY=1
 		test_linkfail=128 \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
@@ -3109,6 +3110,7 @@ fail_tests()
 
 	# multiple subflows
 	if reset_with_fail "MP_FAIL MP_RST" 2; then
+		MPTCP_LIB_SUBTEST_FLAKY=1
 		tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5ms
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
-- 
cgit v1.2.3


From c519cf9b7434183bb56ed1e200ac577a5fd34d9b Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@toblux.com>
Date: Mon, 27 May 2024 12:36:19 +0200
Subject: docs: netdev: Fix typo in Signed-off-by tag

s/of/off/

Signed-off-by: Thorsten Blum <thorsten.blum@toblux.com>
Fixes: e110ba659271 ("docs: netdev: add note about Changes Requested and revising commit messages")
Link: https://lore.kernel.org/r/20240527103618.265801-2-thorsten.blum@toblux.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/process/maintainer-netdev.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst
index fd96e4a3cef9..5e1fcfad1c4c 100644
--- a/Documentation/process/maintainer-netdev.rst
+++ b/Documentation/process/maintainer-netdev.rst
@@ -227,7 +227,7 @@ preferably including links to previous postings, for example::
   The amount of mooing will depend on packet rate so should match
   the diurnal cycle quite well.
 
-  Signed-of-by: Joe Defarmer <joe@barn.org>
+  Signed-off-by: Joe Defarmer <joe@barn.org>
   ---
   v3:
     - add a note about time-of-day mooing fluctuation to the commit message
-- 
cgit v1.2.3


From fd7ccfb112302c6941f03095544c10879ced98be Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Fri, 17 May 2024 09:42:04 +0300
Subject: media: Documentation: v4l: Fix ACTIVE route flag

The documentation in one occasion mentions the
VIDIOC_SUBDEV_STREAM_FL_ACTIVE flag. This was meant to be
V4L2_SUBDEV_STREAM_FL_ACTIVE as it's a flag, not an IOCTL. Fix it.

Fixes: cd2c75454d74 ("media: Documentation: Document S_ROUTING behaviour")
Reported-by: Samuel Wein PhD <sam@samwein.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 Documentation/userspace-api/media/v4l/dev-subdev.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/userspace-api/media/v4l/dev-subdev.rst b/Documentation/userspace-api/media/v4l/dev-subdev.rst
index 0f9eda3187f3..161b43f1ce66 100644
--- a/Documentation/userspace-api/media/v4l/dev-subdev.rst
+++ b/Documentation/userspace-api/media/v4l/dev-subdev.rst
@@ -582,7 +582,7 @@ depending on the hardware. In all cases, however, only routes that have the
 Devices generating the streams may allow enabling and disabling some of the
 routes or have a fixed routing configuration. If the routes can be disabled, not
 declaring the routes (or declaring them without
-``VIDIOC_SUBDEV_STREAM_FL_ACTIVE`` flag set) in ``VIDIOC_SUBDEV_S_ROUTING`` will
+``V4L2_SUBDEV_STREAM_FL_ACTIVE`` flag set) in ``VIDIOC_SUBDEV_S_ROUTING`` will
 disable the routes. ``VIDIOC_SUBDEV_S_ROUTING`` will still return such routes
 back to the user in the routes array, with the ``V4L2_SUBDEV_STREAM_FL_ACTIVE``
 flag unset.
-- 
cgit v1.2.3


From fe61b2906bd046535f4ef7dfcd69562f531ccd38 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 20 May 2024 11:55:54 +0200
Subject: media: intel/ipu6: Fix some redundant resources freeing in
 ipu6_pci_remove()

pcim_iomap_regions() and pcim_enable_device() are used in the probe. So
the corresponding managed resources don't need to be freed explicitly in
the remove function.

Remove the incorrect pci_release_regions() and pci_disable_device() calls.

Fixes: 25fedc021985 ("media: intel/ipu6: add Intel IPU6 PCI device driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Bingbu Cao <bingbu.cao@intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 drivers/media/pci/intel/ipu6/ipu6.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/media/pci/intel/ipu6/ipu6.c b/drivers/media/pci/intel/ipu6/ipu6.c
index d2bebd208461..f587f609259d 100644
--- a/drivers/media/pci/intel/ipu6/ipu6.c
+++ b/drivers/media/pci/intel/ipu6/ipu6.c
@@ -727,9 +727,6 @@ static void ipu6_pci_remove(struct pci_dev *pdev)
 	pm_runtime_forbid(&pdev->dev);
 	pm_runtime_get_noresume(&pdev->dev);
 
-	pci_release_regions(pdev);
-	pci_disable_device(pdev);
-
 	release_firmware(isp->cpd_fw);
 
 	ipu6_mmu_cleanup(psys_mmu);
-- 
cgit v1.2.3


From 266b44ec9a26dd2ad5d1dfcbb2f7f4e2c10da3fb Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 26 May 2024 09:59:56 +0200
Subject: media: intel/ipu6: Move isys_remove() close to isys_probe()

In preparation to fixing a leak in isys_probe(), move isys_remove().
The fix will introduce a new function that will also be called from
isys_remove(). The code needs to be rearranged to avoid a forward
declaration.

Having the .remove function close to the .probe function is also more
standard.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 drivers/media/pci/intel/ipu6/ipu6-isys.c | 70 ++++++++++++++++----------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys.c b/drivers/media/pci/intel/ipu6/ipu6-isys.c
index 5992138c7290..7ce2047a09b5 100644
--- a/drivers/media/pci/intel/ipu6/ipu6-isys.c
+++ b/drivers/media/pci/intel/ipu6/ipu6-isys.c
@@ -925,41 +925,6 @@ static const struct dev_pm_ops isys_pm_ops = {
 	.resume = isys_resume,
 };
 
-static void isys_remove(struct auxiliary_device *auxdev)
-{
-	struct ipu6_bus_device *adev = auxdev_to_adev(auxdev);
-	struct ipu6_isys *isys = dev_get_drvdata(&auxdev->dev);
-	struct ipu6_device *isp = adev->isp;
-	struct isys_fw_msgs *fwmsg, *safe;
-	unsigned int i;
-
-	list_for_each_entry_safe(fwmsg, safe, &isys->framebuflist, head)
-		dma_free_attrs(&auxdev->dev, sizeof(struct isys_fw_msgs),
-			       fwmsg, fwmsg->dma_addr, 0);
-
-	list_for_each_entry_safe(fwmsg, safe, &isys->framebuflist_fw, head)
-		dma_free_attrs(&auxdev->dev, sizeof(struct isys_fw_msgs),
-			       fwmsg, fwmsg->dma_addr, 0);
-
-	isys_unregister_devices(isys);
-	isys_notifier_cleanup(isys);
-
-	cpu_latency_qos_remove_request(&isys->pm_qos);
-
-	if (!isp->secure_mode) {
-		ipu6_cpd_free_pkg_dir(adev);
-		ipu6_buttress_unmap_fw_image(adev, &adev->fw_sgt);
-		release_firmware(adev->fw);
-	}
-
-	for (i = 0; i < IPU6_ISYS_MAX_STREAMS; i++)
-		mutex_destroy(&isys->streams[i].mutex);
-
-	isys_iwake_watermark_cleanup(isys);
-	mutex_destroy(&isys->stream_mutex);
-	mutex_destroy(&isys->mutex);
-}
-
 static int alloc_fw_msg_bufs(struct ipu6_isys *isys, int amount)
 {
 	struct device *dev = &isys->adev->auxdev.dev;
@@ -1167,6 +1132,41 @@ release_firmware:
 	return ret;
 }
 
+static void isys_remove(struct auxiliary_device *auxdev)
+{
+	struct ipu6_bus_device *adev = auxdev_to_adev(auxdev);
+	struct ipu6_isys *isys = dev_get_drvdata(&auxdev->dev);
+	struct ipu6_device *isp = adev->isp;
+	struct isys_fw_msgs *fwmsg, *safe;
+	unsigned int i;
+
+	list_for_each_entry_safe(fwmsg, safe, &isys->framebuflist, head)
+		dma_free_attrs(&auxdev->dev, sizeof(struct isys_fw_msgs),
+			       fwmsg, fwmsg->dma_addr, 0);
+
+	list_for_each_entry_safe(fwmsg, safe, &isys->framebuflist_fw, head)
+		dma_free_attrs(&auxdev->dev, sizeof(struct isys_fw_msgs),
+			       fwmsg, fwmsg->dma_addr, 0);
+
+	isys_unregister_devices(isys);
+	isys_notifier_cleanup(isys);
+
+	cpu_latency_qos_remove_request(&isys->pm_qos);
+
+	if (!isp->secure_mode) {
+		ipu6_cpd_free_pkg_dir(adev);
+		ipu6_buttress_unmap_fw_image(adev, &adev->fw_sgt);
+		release_firmware(adev->fw);
+	}
+
+	for (i = 0; i < IPU6_ISYS_MAX_STREAMS; i++)
+		mutex_destroy(&isys->streams[i].mutex);
+
+	isys_iwake_watermark_cleanup(isys);
+	mutex_destroy(&isys->stream_mutex);
+	mutex_destroy(&isys->mutex);
+}
+
 struct fwmsg {
 	int type;
 	char *msg;
-- 
cgit v1.2.3


From ab0ed481012811fed052f609b364c118ce8a576e Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 26 May 2024 09:59:57 +0200
Subject: media: intel/ipu6: Fix an error handling path in isys_probe()

If an error occurs after a successful alloc_fw_msg_bufs() call, some
resources should be released as already done in the remove function.

Add a new free_fw_msg_bufs() function that releases what has been allocated
by alloc_fw_msg_bufs().

Also use this new function in isys_remove() to avoid some code duplication.

Fixes: f50c4ca0a820 ("media: intel/ipu6: add the main input system driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 drivers/media/pci/intel/ipu6/ipu6-isys.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys.c b/drivers/media/pci/intel/ipu6/ipu6-isys.c
index 7ce2047a09b5..1998b72ac07d 100644
--- a/drivers/media/pci/intel/ipu6/ipu6-isys.c
+++ b/drivers/media/pci/intel/ipu6/ipu6-isys.c
@@ -925,6 +925,20 @@ static const struct dev_pm_ops isys_pm_ops = {
 	.resume = isys_resume,
 };
 
+static void free_fw_msg_bufs(struct ipu6_isys *isys)
+{
+	struct device *dev = &isys->adev->auxdev.dev;
+	struct isys_fw_msgs *fwmsg, *safe;
+
+	list_for_each_entry_safe(fwmsg, safe, &isys->framebuflist, head)
+		dma_free_attrs(dev, sizeof(struct isys_fw_msgs), fwmsg,
+			       fwmsg->dma_addr, 0);
+
+	list_for_each_entry_safe(fwmsg, safe, &isys->framebuflist_fw, head)
+		dma_free_attrs(dev, sizeof(struct isys_fw_msgs), fwmsg,
+			       fwmsg->dma_addr, 0);
+}
+
 static int alloc_fw_msg_bufs(struct ipu6_isys *isys, int amount)
 {
 	struct device *dev = &isys->adev->auxdev.dev;
@@ -1105,12 +1119,14 @@ static int isys_probe(struct auxiliary_device *auxdev,
 
 	ret = isys_register_devices(isys);
 	if (ret)
-		goto out_remove_pkg_dir_shared_buffer;
+		goto free_fw_msg_bufs;
 
 	ipu6_mmu_hw_cleanup(adev->mmu);
 
 	return 0;
 
+free_fw_msg_bufs:
+	free_fw_msg_bufs(isys);
 out_remove_pkg_dir_shared_buffer:
 	if (!isp->secure_mode)
 		ipu6_cpd_free_pkg_dir(adev);
@@ -1137,16 +1153,9 @@ static void isys_remove(struct auxiliary_device *auxdev)
 	struct ipu6_bus_device *adev = auxdev_to_adev(auxdev);
 	struct ipu6_isys *isys = dev_get_drvdata(&auxdev->dev);
 	struct ipu6_device *isp = adev->isp;
-	struct isys_fw_msgs *fwmsg, *safe;
 	unsigned int i;
 
-	list_for_each_entry_safe(fwmsg, safe, &isys->framebuflist, head)
-		dma_free_attrs(&auxdev->dev, sizeof(struct isys_fw_msgs),
-			       fwmsg, fwmsg->dma_addr, 0);
-
-	list_for_each_entry_safe(fwmsg, safe, &isys->framebuflist_fw, head)
-		dma_free_attrs(&auxdev->dev, sizeof(struct isys_fw_msgs),
-			       fwmsg, fwmsg->dma_addr, 0);
+	free_fw_msg_bufs(isys);
 
 	isys_unregister_devices(isys);
 	isys_notifier_cleanup(isys);
-- 
cgit v1.2.3


From c19fa08c1414644b0d9275d336bdaff90af57d0b Mon Sep 17 00:00:00 2001
From: Bingbu Cao <bingbu.cao@intel.com>
Date: Mon, 27 May 2024 18:48:57 +0800
Subject: media: intel/ipu6: fix the buffer flags caused by wrong parentheses

The buffer flags is set by wrong due to wrong parentheses, the
FL_INCOMING flag is never taken an account.
Fix it by wrapping the ternary conditional operation with parentheses.

Fixes: 3c1dfb5a69cf ("media: intel/ipu6: input system video nodes and buffer queues")
Signed-off-by: Bingbu Cao <bingbu.cao@intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 drivers/media/pci/intel/ipu6/ipu6-isys-queue.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys-queue.c b/drivers/media/pci/intel/ipu6/ipu6-isys-queue.c
index 40a8ebfcfce2..4bd4e324abc9 100644
--- a/drivers/media/pci/intel/ipu6/ipu6-isys-queue.c
+++ b/drivers/media/pci/intel/ipu6/ipu6-isys-queue.c
@@ -301,10 +301,10 @@ static int ipu6_isys_stream_start(struct ipu6_isys_video *av,
 out_requeue:
 	if (bl && bl->nbufs)
 		ipu6_isys_buffer_list_queue(bl,
-					    (IPU6_ISYS_BUFFER_LIST_FL_INCOMING |
-					     error) ?
+					    IPU6_ISYS_BUFFER_LIST_FL_INCOMING |
+					    (error ?
 					    IPU6_ISYS_BUFFER_LIST_FL_SET_STATE :
-					    0, error ? VB2_BUF_STATE_ERROR :
+					     0), error ? VB2_BUF_STATE_ERROR :
 					    VB2_BUF_STATE_QUEUED);
 	flush_firmware_streamon_fail(stream);
 
-- 
cgit v1.2.3


From 77b79df0268bee3ef38fd5e76e86a076ce02995d Mon Sep 17 00:00:00 2001
From: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Date: Wed, 8 May 2024 20:52:15 +0530
Subject: drm/xe: Change pcode timeout to 50msec while polling again
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Polling is initially attempted with timeout_base_ms enabled for
preemption, and if it exceeds this timeframe, another attempt is made
without preemption, allowing an additional 50 ms before timing out.

v2
- Rebase

v3
- Move warnings to separate patch (Lucas)

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Fixes: 7dc9b92dcfef ("drm/xe: Remove i915_utils dependency from xe_pcode.")
Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240508152216.3263109-2-himal.prasad.ghimiray@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
(cherry picked from commit c81858eb52266b3d6ba28ca4f62a198231a10cdc)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_pcode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index c010ef16fbf5..a5e7da8cf944 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -191,7 +191,7 @@ int xe_pcode_request(struct xe_gt *gt, u32 mbox, u32 request,
 	drm_WARN_ON_ONCE(&gt_to_xe(gt)->drm, timeout_base_ms > 1);
 	preempt_disable();
 	ret = pcode_try_request(gt, mbox, request, reply_mask, reply, &status,
-				true, timeout_base_ms * 1000, true);
+				true, 50 * 1000, true);
 	preempt_enable();
 
 out:
-- 
cgit v1.2.3


From c8ea2c31f5ea437199b239d76ad5db27343edb0c Mon Sep 17 00:00:00 2001
From: Matthew Brost <matthew.brost@intel.com>
Date: Mon, 15 Apr 2024 12:04:53 -0700
Subject: drm/xe: Only use reserved BCS instances for usm migrate exec queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The GuC context scheduling queue is 2 entires deep, thus it is possible
for a migration job to be stuck behind a fault if migration exec queue
shares engines with user jobs. This can deadlock as the migrate exec
queue is required to service page faults. Avoid deadlock by only using
reserved BCS instances for usm migrate exec queue.

Fixes: a043fbab7af5 ("drm/xe/pvc: Use fast copy engines as migrate engine on PVC")
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240415190453.696553-2-matthew.brost@intel.com
Reviewed-by: Brian Welty <brian.welty@intel.com>
(cherry picked from commit 04f4a70a183a688a60fe3882d6e4236ea02cfc67)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_migrate.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 9f6e9b7f11c8..65e5a3f4c340 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -34,7 +34,6 @@
 #include "xe_sync.h"
 #include "xe_trace.h"
 #include "xe_vm.h"
-#include "xe_wa.h"
 
 /**
  * struct xe_migrate - migrate context.
@@ -300,10 +299,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 }
 
 /*
- * Due to workaround 16017236439, odd instance hardware copy engines are
- * faster than even instance ones.
- * This function returns the mask involving all fast copy engines and the
- * reserved copy engine to be used as logical mask for migrate engine.
  * Including the reserved copy engine is required to avoid deadlocks due to
  * migrate jobs servicing the faults gets stuck behind the job that faulted.
  */
@@ -317,8 +312,7 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
 		if (hwe->class != XE_ENGINE_CLASS_COPY)
 			continue;
 
-		if (!XE_WA(gt, 16017236439) ||
-		    xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1)
+		if (xe_gt_is_usm_hwe(gt, hwe))
 			logical_mask |= BIT(hwe->logical_instance);
 	}
 
@@ -369,6 +363,10 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
 		if (!hwe || !logical_mask)
 			return ERR_PTR(-EINVAL);
 
+		/*
+		 * XXX: Currently only reserving 1 (likely slow) BCS instance on
+		 * PVC, may want to revisit if performance is needed.
+		 */
 		m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
 					    EXEC_QUEUE_FLAG_KERNEL |
 					    EXEC_QUEUE_FLAG_PERMANENT |
-- 
cgit v1.2.3


From 6c5cd0807c79eb4c0cda70b48f6be668a241d584 Mon Sep 17 00:00:00 2001
From: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Date: Tue, 21 May 2024 13:17:11 -0700
Subject: drm/xe: Properly handle alloc_guc_id() failure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Release the submission_state lock if alloc_guc_id() fails.

v2: Add Fixes tag and CC stable kernel

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Cc: <stable@vger.kernel.org> # v6.8+
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521201711.4934-1-niranjana.vishwanathapura@intel.com
(cherry picked from commit 40672b792a36894aff3a337b695f6136ee6ac5d4)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_guc_submit.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index c7d38469fb46..e4e3658e6a13 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -1240,6 +1240,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 	return 0;
 
 err_entity:
+	mutex_unlock(&guc->submission_state.lock);
 	xe_sched_entity_fini(&ge->entity);
 err_sched:
 	xe_sched_fini(&ge->sched);
-- 
cgit v1.2.3


From 195aba96b854dd664768f382cd1db375d8181f88 Mon Sep 17 00:00:00 2001
From: "Matthew R. Ochs" <mochs@nvidia.com>
Date: Wed, 22 May 2024 15:06:40 +0300
Subject: tpm_tis_spi: Account for SPI header when allocating TPM SPI xfer
 buffer

The TPM SPI transfer mechanism uses MAX_SPI_FRAMESIZE for computing the
maximum transfer length and the size of the transfer buffer. As such, it
does not account for the 4 bytes of header that prepends the SPI data
frame. This can result in out-of-bounds accesses and was confirmed with
KASAN.

Introduce SPI_HDRSIZE to account for the header and use to allocate the
transfer buffer.

Fixes: a86a42ac2bd6 ("tpm_tis_spi: Add hardware wait polling")
Signed-off-by: Matthew R. Ochs <mochs@nvidia.com>
Tested-by: Carol Soto <csoto@nvidia.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis_spi_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c
index 3f9eaf27b41b..c9eca24bbad4 100644
--- a/drivers/char/tpm/tpm_tis_spi_main.c
+++ b/drivers/char/tpm/tpm_tis_spi_main.c
@@ -37,6 +37,7 @@
 #include "tpm_tis_spi.h"
 
 #define MAX_SPI_FRAMESIZE 64
+#define SPI_HDRSIZE 4
 
 /*
  * TCG SPI flow control is documented in section 6.4 of the spec[1]. In short,
@@ -247,7 +248,7 @@ static int tpm_tis_spi_write_bytes(struct tpm_tis_data *data, u32 addr,
 int tpm_tis_spi_init(struct spi_device *spi, struct tpm_tis_spi_phy *phy,
 		     int irq, const struct tpm_tis_phy_ops *phy_ops)
 {
-	phy->iobuf = devm_kmalloc(&spi->dev, MAX_SPI_FRAMESIZE, GFP_KERNEL);
+	phy->iobuf = devm_kmalloc(&spi->dev, SPI_HDRSIZE + MAX_SPI_FRAMESIZE, GFP_KERNEL);
 	if (!phy->iobuf)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From f3d7ba9e1bc0c9080834f263d4887bd9c9ea491f Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Mon, 27 May 2024 13:56:27 +0300
Subject: tpm: Open code tpm_buf_parameters()

With only single call site, this makes no sense (slipped out of the
radar during the review). Open code and document the action directly
to the site, to make it more readable.

Fixes: 1b6d7f9eb150 ("tpm: add session encryption protection to tpm2_get_random()")
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm-buf.c  | 26 --------------------------
 drivers/char/tpm/tpm2-cmd.c | 10 +++++++++-
 include/linux/tpm.h         |  2 --
 3 files changed, 9 insertions(+), 29 deletions(-)

diff --git a/drivers/char/tpm/tpm-buf.c b/drivers/char/tpm/tpm-buf.c
index 647c6ca92ac3..cad0048bcc3c 100644
--- a/drivers/char/tpm/tpm-buf.c
+++ b/drivers/char/tpm/tpm-buf.c
@@ -223,30 +223,4 @@ u32 tpm_buf_read_u32(struct tpm_buf *buf, off_t *offset)
 }
 EXPORT_SYMBOL_GPL(tpm_buf_read_u32);
 
-static u16 tpm_buf_tag(struct tpm_buf *buf)
-{
-	struct tpm_header *head = (struct tpm_header *)buf->data;
-
-	return be16_to_cpu(head->tag);
-}
-
-/**
- * tpm_buf_parameters - return the TPM response parameters area of the tpm_buf
- * @buf: tpm_buf to use
- *
- * Where the parameters are located depends on the tag of a TPM
- * command (it's immediately after the header for TPM_ST_NO_SESSIONS
- * or 4 bytes after for TPM_ST_SESSIONS). Evaluate this and return a
- * pointer to the first byte of the parameters area.
- *
- * @return: pointer to parameters area
- */
-u8 *tpm_buf_parameters(struct tpm_buf *buf)
-{
-	int offset = TPM_HEADER_SIZE;
-
-	if (tpm_buf_tag(buf) == TPM2_ST_SESSIONS)
-		offset += 4;
 
-	return &buf->data[offset];
-}
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index 0cdf892ec2a7..1e856259219e 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -281,6 +281,7 @@ struct tpm2_get_random_out {
 int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max)
 {
 	struct tpm2_get_random_out *out;
+	struct tpm_header *head;
 	struct tpm_buf buf;
 	u32 recd;
 	u32 num_bytes = max;
@@ -288,6 +289,7 @@ int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max)
 	int total = 0;
 	int retries = 5;
 	u8 *dest_ptr = dest;
+	off_t offset;
 
 	if (!num_bytes || max > TPM_MAX_RNG_DATA)
 		return -EINVAL;
@@ -320,7 +322,13 @@ int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max)
 			goto out;
 		}
 
-		out = (struct tpm2_get_random_out *)tpm_buf_parameters(&buf);
+		head = (struct tpm_header *)buf.data;
+		offset = TPM_HEADER_SIZE;
+		/* Skip the parameter size field: */
+		if (be16_to_cpu(head->tag) == TPM2_ST_SESSIONS)
+			offset += 4;
+
+		out = (struct tpm2_get_random_out *)&buf.data[offset];
 		recd = min_t(u32, be16_to_cpu(out->size), num_bytes);
 		if (tpm_buf_length(&buf) <
 		    TPM_HEADER_SIZE +
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index c17e4efbb2e5..b3217200df28 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -437,8 +437,6 @@ u8 tpm_buf_read_u8(struct tpm_buf *buf, off_t *offset);
 u16 tpm_buf_read_u16(struct tpm_buf *buf, off_t *offset);
 u32 tpm_buf_read_u32(struct tpm_buf *buf, off_t *offset);
 
-u8 *tpm_buf_parameters(struct tpm_buf *buf);
-
 /*
  * Check if TPM device is in the firmware upgrade mode.
  */
-- 
cgit v1.2.3


From 4b4647add7d3c8530493f7247d11e257ee425bf0 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Date: Fri, 24 May 2024 11:47:02 -0300
Subject: sock_map: avoid race between sock_map_close and sk_psock_put

sk_psock_get will return NULL if the refcount of psock has gone to 0, which
will happen when the last call of sk_psock_put is done. However,
sk_psock_drop may not have finished yet, so the close callback will still
point to sock_map_close despite psock being NULL.

This can be reproduced with a thread deleting an element from the sock map,
while the second one creates a socket, adds it to the map and closes it.

That will trigger the WARN_ON_ONCE:

------------[ cut here ]------------
WARNING: CPU: 1 PID: 7220 at net/core/sock_map.c:1701 sock_map_close+0x2a2/0x2d0 net/core/sock_map.c:1701
Modules linked in:
CPU: 1 PID: 7220 Comm: syz-executor380 Not tainted 6.9.0-syzkaller-07726-g3c999d1ae3c7 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/02/2024
RIP: 0010:sock_map_close+0x2a2/0x2d0 net/core/sock_map.c:1701
Code: df e8 92 29 88 f8 48 8b 1b 48 89 d8 48 c1 e8 03 42 80 3c 20 00 74 08 48 89 df e8 79 29 88 f8 4c 8b 23 eb 89 e8 4f 15 23 f8 90 <0f> 0b 90 48 83 c4 08 5b 41 5c 41 5d 41 5e 41 5f 5d e9 13 26 3d 02
RSP: 0018:ffffc9000441fda8 EFLAGS: 00010293
RAX: ffffffff89731ae1 RBX: ffffffff94b87540 RCX: ffff888029470000
RDX: 0000000000000000 RSI: ffffffff8bcab5c0 RDI: ffffffff8c1faba0
RBP: 0000000000000000 R08: ffffffff92f9b61f R09: 1ffffffff25f36c3
R10: dffffc0000000000 R11: fffffbfff25f36c4 R12: ffffffff89731840
R13: ffff88804b587000 R14: ffff88804b587000 R15: ffffffff89731870
FS:  000055555e080380(0000) GS:ffff8880b9500000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 00000000207d4000 CR4: 0000000000350ef0
Call Trace:
 <TASK>
 unix_release+0x87/0xc0 net/unix/af_unix.c:1048
 __sock_release net/socket.c:659 [inline]
 sock_close+0xbe/0x240 net/socket.c:1421
 __fput+0x42b/0x8a0 fs/file_table.c:422
 __do_sys_close fs/open.c:1556 [inline]
 __se_sys_close fs/open.c:1541 [inline]
 __x64_sys_close+0x7f/0x110 fs/open.c:1541
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7fb37d618070
Code: 00 00 48 c7 c2 b8 ff ff ff f7 d8 64 89 02 b8 ff ff ff ff eb d4 e8 10 2c 00 00 80 3d 31 f0 07 00 00 74 17 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 48 c3 0f 1f 80 00 00 00 00 48 83 ec 18 89 7c
RSP: 002b:00007ffcd4a525d8 EFLAGS: 00000202 ORIG_RAX: 0000000000000003
RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00007fb37d618070
RDX: 0000000000000010 RSI: 00000000200001c0 RDI: 0000000000000004
RBP: 0000000000000000 R08: 0000000100000000 R09: 0000000100000000
R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
 </TASK>

Use sk_psock, which will only check that the pointer is not been set to
NULL yet, which should only happen after the callbacks are restored. If,
then, a reference can still be gotten, we may call sk_psock_stop and cancel
psock->work.

As suggested by Paolo Abeni, reorder the condition so the control flow is
less convoluted.

After that change, the reproducer does not trigger the WARN_ON_ONCE
anymore.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Reported-by: syzbot+07a2e4a1a57118ef7355@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=07a2e4a1a57118ef7355
Fixes: aadb2bb83ff7 ("sock_map: Fix a potential use-after-free in sock_map_close()")
Fixes: 5b4a79ba65a1 ("bpf, sockmap: Don't let sock_map_{close,destroy,unhash} call itself")
Cc: stable@vger.kernel.org
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/r/20240524144702.1178377-1-cascardo@igalia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/core/sock_map.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 63c016b4c169..d3dbb92153f2 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1674,19 +1674,23 @@ void sock_map_close(struct sock *sk, long timeout)
 
 	lock_sock(sk);
 	rcu_read_lock();
-	psock = sk_psock_get(sk);
-	if (unlikely(!psock)) {
-		rcu_read_unlock();
-		release_sock(sk);
-		saved_close = READ_ONCE(sk->sk_prot)->close;
-	} else {
+	psock = sk_psock(sk);
+	if (likely(psock)) {
 		saved_close = psock->saved_close;
 		sock_map_remove_links(sk, psock);
+		psock = sk_psock_get(sk);
+		if (unlikely(!psock))
+			goto no_psock;
 		rcu_read_unlock();
 		sk_psock_stop(psock);
 		release_sock(sk);
 		cancel_delayed_work_sync(&psock->work);
 		sk_psock_put(sk, psock);
+	} else {
+		saved_close = READ_ONCE(sk->sk_prot)->close;
+no_psock:
+		rcu_read_unlock();
+		release_sock(sk);
 	}
 
 	/* Make sure we do not recurse. This is a bug.
-- 
cgit v1.2.3


From f09fc6cee0dcfc38148ee6b6dd04f93e353d22f2 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Tue, 28 May 2024 12:52:21 +0300
Subject: tpm: Rename TPM2_OA_TMPL to TPM2_OA_NULL_KEY and make it local

Rename and document TPM2_OA_TMPL, as originally requested in the patch
set review, but left unaddressed without any appropriate reasoning. The
new name is TPM2_OA_NULL_KEY, has a documentation and is local only to
tpm2-sessions.c.

Link: https://lore.kernel.org/linux-integrity/ddbeb8111f48a8ddb0b8fca248dff6cc9d7079b2.camel@HansenPartnership.com/
Link: https://lore.kernel.org/linux-integrity/CZCKTWU6ZCC9.2UTEQPEVICYHL@suppilovahvero/
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm2-sessions.c | 21 +++++++++++++++++++--
 include/linux/tpm.h              | 15 ---------------
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
index ea8860661876..907ac9956a78 100644
--- a/drivers/char/tpm/tpm2-sessions.c
+++ b/drivers/char/tpm/tpm2-sessions.c
@@ -80,6 +80,9 @@
 /* maximum number of names the TPM must remember for authorization */
 #define AUTH_MAX_NAMES	3
 
+#define AES_KEY_BYTES	AES_KEYSIZE_128
+#define AES_KEY_BITS	(AES_KEY_BYTES*8)
+
 static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy,
 			       u32 *handle, u8 *name);
 
@@ -954,6 +957,20 @@ int tpm2_start_auth_session(struct tpm_chip *chip)
 }
 EXPORT_SYMBOL(tpm2_start_auth_session);
 
+/*
+ * A mask containing the object attributes for the kernel held null primary key
+ * used in HMAC encryption. For more information on specific attributes look up
+ * to "8.3 TPMA_OBJECT (Object Attributes)".
+ */
+#define TPM2_OA_NULL_KEY ( \
+	TPM2_OA_NO_DA | \
+	TPM2_OA_FIXED_TPM | \
+	TPM2_OA_FIXED_PARENT | \
+	TPM2_OA_SENSITIVE_DATA_ORIGIN |	\
+	TPM2_OA_USER_WITH_AUTH | \
+	TPM2_OA_DECRYPT | \
+	TPM2_OA_RESTRICTED)
+
 /**
  * tpm2_parse_create_primary() - parse the data returned from TPM_CC_CREATE_PRIMARY
  *
@@ -1018,7 +1035,7 @@ static int tpm2_parse_create_primary(struct tpm_chip *chip, struct tpm_buf *buf,
 	val = tpm_buf_read_u32(buf, &offset_t);
 
 	/* object properties */
-	if (val != TPM2_OA_TMPL)
+	if (val != TPM2_OA_NULL_KEY)
 		return -EINVAL;
 
 	/* auth policy (empty) */
@@ -1178,7 +1195,7 @@ static int tpm2_create_primary(struct tpm_chip *chip, u32 hierarchy,
 	tpm_buf_append_u16(&template, TPM_ALG_SHA256);
 
 	/* object properties */
-	tpm_buf_append_u32(&template, TPM2_OA_TMPL);
+	tpm_buf_append_u32(&template, TPM2_OA_NULL_KEY);
 
 	/* sauth policy (empty) */
 	tpm_buf_append_u16(&template, 0);
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index b3217200df28..21a67dc9efe8 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -394,21 +394,6 @@ enum tpm2_object_attributes {
 	TPM2_OA_SIGN			= BIT(18),
 };
 
-/*
- * definitions for the canonical template.  These are mandated
- * by the TCG key template documents
- */
-
-#define AES_KEY_BYTES	AES_KEYSIZE_128
-#define AES_KEY_BITS	(AES_KEY_BYTES*8)
-#define TPM2_OA_TMPL	(TPM2_OA_NO_DA |			\
-			 TPM2_OA_FIXED_TPM |			\
-			 TPM2_OA_FIXED_PARENT |			\
-			 TPM2_OA_SENSITIVE_DATA_ORIGIN |	\
-			 TPM2_OA_USER_WITH_AUTH |		\
-			 TPM2_OA_DECRYPT |			\
-			 TPM2_OA_RESTRICTED)
-
 enum tpm2_session_attributes {
 	TPM2_SA_CONTINUE_SESSION	= BIT(0),
 	TPM2_SA_AUDIT_EXCLUSIVE		= BIT(1),
-- 
cgit v1.2.3


From d3e43a8fa43effdbb62c7edc206df7ac67772205 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Tue, 28 May 2024 12:58:41 +0300
Subject: tpm: Enable TCG_TPM2_HMAC by default only for X86_64

Given the not fully root caused performance issues on non-x86 platforms,
enable the feature by default only for x86-64. That is the platform it
brings the most value and has gone most of the QA. Can be reconsidered
later and can be obviously opt-in enabled too on any arch.

Link: https://lore.kernel.org/linux-integrity/bf67346ef623ff3c452c4f968b7d900911e250c3.camel@gmail.com/#t
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index e63a6a17793c..cf0be8a7939d 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -29,7 +29,7 @@ if TCG_TPM
 
 config TCG_TPM2_HMAC
 	bool "Use HMAC and encrypted transactions on the TPM bus"
-	default y
+	default X86_64
 	select CRYPTO_ECDH
 	select CRYPTO_LIB_AESCFB
 	select CRYPTO_LIB_SHA256
-- 
cgit v1.2.3


From 8a42886cae307663f3f999846926bd6e64392000 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 27 May 2024 17:18:49 +0200
Subject: ALSA: seq: Fix missing bank setup between MIDI1/MIDI2 UMP conversion

When a UMP packet is converted between MIDI1 and MIDI2 protocols, the
bank selection may be lost.  The conversion from MIDI1 to MIDI2 needs
the encoding of the bank into UMP_MSG_STATUS_PROGRAM bits, while the
conversion from MIDI2 to MIDI1 needs the extraction from that
instead.

This patch implements the missing bank selection mechanism in those
conversions.

Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events")
Link: https://lore.kernel.org/r/20240527151852.29036-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_ump_convert.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c
index ee6ac649df83..c21be87f5da9 100644
--- a/sound/core/seq/seq_ump_convert.c
+++ b/sound/core/seq/seq_ump_convert.c
@@ -368,6 +368,7 @@ static int cvt_ump_midi1_to_midi2(struct snd_seq_client *dest,
 	struct snd_seq_ump_event ev_cvt;
 	const union snd_ump_midi1_msg *midi1 = (const union snd_ump_midi1_msg *)event->ump;
 	union snd_ump_midi2_msg *midi2 = (union snd_ump_midi2_msg *)ev_cvt.ump;
+	struct snd_seq_ump_midi2_bank *cc;
 
 	ev_cvt = *event;
 	memset(&ev_cvt.ump, 0, sizeof(ev_cvt.ump));
@@ -387,11 +388,29 @@ static int cvt_ump_midi1_to_midi2(struct snd_seq_client *dest,
 		midi2->paf.data = upscale_7_to_32bit(midi1->paf.data);
 		break;
 	case UMP_MSG_STATUS_CC:
+		cc = &dest_port->midi2_bank[midi1->note.channel];
+		switch (midi1->cc.index) {
+		case UMP_CC_BANK_SELECT:
+			cc->bank_set = 1;
+			cc->cc_bank_msb = midi1->cc.data;
+			return 0; // skip
+		case UMP_CC_BANK_SELECT_LSB:
+			cc->bank_set = 1;
+			cc->cc_bank_lsb = midi1->cc.data;
+			return 0; // skip
+		}
 		midi2->cc.index = midi1->cc.index;
 		midi2->cc.data = upscale_7_to_32bit(midi1->cc.data);
 		break;
 	case UMP_MSG_STATUS_PROGRAM:
 		midi2->pg.program = midi1->pg.program;
+		cc = &dest_port->midi2_bank[midi1->note.channel];
+		if (cc->bank_set) {
+			midi2->pg.bank_valid = 1;
+			midi2->pg.bank_msb = cc->cc_bank_msb;
+			midi2->pg.bank_lsb = cc->cc_bank_lsb;
+			cc->bank_set = 0;
+		}
 		break;
 	case UMP_MSG_STATUS_CHANNEL_PRESSURE:
 		midi2->caf.data = upscale_7_to_32bit(midi1->caf.data);
@@ -419,6 +438,7 @@ static int cvt_ump_midi2_to_midi1(struct snd_seq_client *dest,
 	struct snd_seq_ump_event ev_cvt;
 	union snd_ump_midi1_msg *midi1 = (union snd_ump_midi1_msg *)ev_cvt.ump;
 	const union snd_ump_midi2_msg *midi2 = (const union snd_ump_midi2_msg *)event->ump;
+	int err;
 	u16 v;
 
 	ev_cvt = *event;
@@ -443,6 +463,24 @@ static int cvt_ump_midi2_to_midi1(struct snd_seq_client *dest,
 		midi1->cc.data = downscale_32_to_7bit(midi2->cc.data);
 		break;
 	case UMP_MSG_STATUS_PROGRAM:
+		if (midi2->pg.bank_valid) {
+			midi1->cc.status = UMP_MSG_STATUS_CC;
+			midi1->cc.index = UMP_CC_BANK_SELECT;
+			midi1->cc.data = midi2->pg.bank_msb;
+			err = __snd_seq_deliver_single_event(dest, dest_port,
+							     (struct snd_seq_event *)&ev_cvt,
+							     atomic, hop);
+			if (err < 0)
+				return err;
+			midi1->cc.index = UMP_CC_BANK_SELECT_LSB;
+			midi1->cc.data = midi2->pg.bank_lsb;
+			err = __snd_seq_deliver_single_event(dest, dest_port,
+							     (struct snd_seq_event *)&ev_cvt,
+							     atomic, hop);
+			if (err < 0)
+				return err;
+			midi1->note.status = midi2->note.status;
+		}
 		midi1->pg.program = midi2->pg.program;
 		break;
 	case UMP_MSG_STATUS_CHANNEL_PRESSURE:
-- 
cgit v1.2.3


From a200df7deb3186cd7b55abb77ab96dfefb8a4f09 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 27 May 2024 17:18:50 +0200
Subject: ALSA: seq: Don't clear bank selection at event -> UMP MIDI2
 conversion

The current code to convert from a legacy sequencer event to UMP MIDI2
clears the bank selection at each time the program change is
submitted.  This is confusing and may lead to incorrect bank values
tranmitted to the destination in the end.

Drop the line to clear the bank info and keep the provided values.

Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events")
Link: https://lore.kernel.org/r/20240527151852.29036-2-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_ump_convert.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c
index c21be87f5da9..f5d22dd00842 100644
--- a/sound/core/seq/seq_ump_convert.c
+++ b/sound/core/seq/seq_ump_convert.c
@@ -892,7 +892,6 @@ static int pgm_ev_to_ump_midi2(const struct snd_seq_event *event,
 		data->pg.bank_msb = cc->cc_bank_msb;
 		data->pg.bank_lsb = cc->cc_bank_lsb;
 		cc->bank_set = 0;
-		cc->cc_bank_msb = cc->cc_bank_lsb = 0;
 	}
 	return 1;
 }
-- 
cgit v1.2.3


From e662c90a6debc3bd8d8eff916ad21d9ec458dfcd Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 27 May 2024 14:38:08 -0500
Subject: ALSA/hda: intel-dsp-config: reduce log verbosity

The information on PCI class/subclass was interesting in the Skylake
timeframe, since the DSP was only enabled on a limited number of
platforms. Now most Intel platforms do enable the DSP, so the
information is less interesting to log.

When a DSP driver is used, the common helper may be called multiple
times due to deferred probes, but there's no reason to print the same
information multiple times. Using dev_info_once() covers all the
existing usages for internal cards with DSPs. External cards don't
rely on DSPs so far.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20240527193808.165652-1-pierre-louis.bossart@linux.intel.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/hda/intel-dsp-config.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c
index cfdb1b73c88c..537863447358 100644
--- a/sound/hda/intel-dsp-config.c
+++ b/sound/hda/intel-dsp-config.c
@@ -668,7 +668,7 @@ int snd_intel_dsp_driver_probe(struct pci_dev *pci)
 		return SND_INTEL_DSP_DRIVER_LEGACY;
 	}
 
-	dev_info(&pci->dev, "DSP detected with PCI class/subclass/prog-if info 0x%06x\n", pci->class);
+	dev_dbg(&pci->dev, "DSP detected with PCI class/subclass/prog-if info 0x%06x\n", pci->class);
 
 	/* find the configuration for the specific device */
 	cfg = snd_intel_dsp_find_config(pci, config_table, ARRAY_SIZE(config_table));
@@ -678,12 +678,12 @@ int snd_intel_dsp_driver_probe(struct pci_dev *pci)
 	if (cfg->flags & FLAG_SOF) {
 		if (cfg->flags & FLAG_SOF_ONLY_IF_SOUNDWIRE &&
 		    snd_intel_dsp_check_soundwire(pci) > 0) {
-			dev_info(&pci->dev, "SoundWire enabled on CannonLake+ platform, using SOF driver\n");
+			dev_info_once(&pci->dev, "SoundWire enabled on CannonLake+ platform, using SOF driver\n");
 			return SND_INTEL_DSP_DRIVER_SOF;
 		}
 		if (cfg->flags & FLAG_SOF_ONLY_IF_DMIC &&
 		    snd_intel_dsp_check_dmic(pci)) {
-			dev_info(&pci->dev, "Digital mics found on Skylake+ platform, using SOF driver\n");
+			dev_info_once(&pci->dev, "Digital mics found on Skylake+ platform, using SOF driver\n");
 			return SND_INTEL_DSP_DRIVER_SOF;
 		}
 		if (!(cfg->flags & FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE))
@@ -694,7 +694,7 @@ int snd_intel_dsp_driver_probe(struct pci_dev *pci)
 	if (cfg->flags & FLAG_SST) {
 		if (cfg->flags & FLAG_SST_ONLY_IF_DMIC) {
 			if (snd_intel_dsp_check_dmic(pci)) {
-				dev_info(&pci->dev, "Digital mics found on Skylake+ platform, using SST driver\n");
+				dev_info_once(&pci->dev, "Digital mics found on Skylake+ platform, using SST driver\n");
 				return SND_INTEL_DSP_DRIVER_SST;
 			}
 		} else {
-- 
cgit v1.2.3


From 72b6a2d6506843375c7b91197f49ef38ca0c6d0f Mon Sep 17 00:00:00 2001
From: Alina Yu <alina_yu@richtek.com>
Date: Tue, 28 May 2024 14:01:13 +0800
Subject: regulator: rtq2208: Fix invalid memory access when
 devm_of_regulator_put_matches is called

In this patch, a software bug has been fixed.
rtq2208_ldo_match is no longer a local variable.
It prevents invalid memory access when devm_of_regulator_put_matches
 is called.

Signed-off-by: Alina Yu <alina_yu@richtek.com>
Link: https://msgid.link/r/4ce8c4f16f1cf3aa4e5f36c0694dd3c5ccf3cd1c.1716870419.git.alina_yu@richtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rtq2208-regulator.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/regulator/rtq2208-regulator.c b/drivers/regulator/rtq2208-regulator.c
index b90e53d922d6..c31b6dc3229c 100644
--- a/drivers/regulator/rtq2208-regulator.c
+++ b/drivers/regulator/rtq2208-regulator.c
@@ -228,6 +228,11 @@ static const struct regulator_ops rtq2208_regulator_ldo_ops = {
 	.set_suspend_disable = rtq2208_set_suspend_disable,
 };
 
+static struct of_regulator_match rtq2208_ldo_match[] = {
+	{.name = "ldo2", },
+	{.name = "ldo1", },
+};
+
 static unsigned int rtq2208_of_map_mode(unsigned int mode)
 {
 	switch (mode) {
@@ -322,8 +327,7 @@ static irqreturn_t rtq2208_irq_handler(int irqno, void *devid)
 	return IRQ_HANDLED;
 }
 
-static int rtq2208_of_get_fixed_voltage(struct device *dev,
-					struct of_regulator_match *rtq2208_ldo_match, int n_fixed)
+static int rtq2208_of_get_ldo_dvs_ability(struct device *dev)
 {
 	struct device_node *np;
 	struct of_regulator_match *match;
@@ -338,14 +342,14 @@ static int rtq2208_of_get_fixed_voltage(struct device *dev,
 	if (!np)
 		np = dev->of_node;
 
-	ret = of_regulator_match(dev, np, rtq2208_ldo_match, n_fixed);
+	ret = of_regulator_match(dev, np, rtq2208_ldo_match, ARRAY_SIZE(rtq2208_ldo_match));
 
 	of_node_put(np);
 
 	if (ret < 0)
 		return ret;
 
-	for (i = 0; i < n_fixed; i++) {
+	for (i = 0; i < ARRAY_SIZE(rtq2208_ldo_match); i++) {
 		match = rtq2208_ldo_match + i;
 		init_data = match->init_data;
 		rdesc = (struct rtq2208_regulator_desc *)match->driver_data;
@@ -388,8 +392,7 @@ static const struct linear_range rtq2208_vout_range[] = {
 	REGULATOR_LINEAR_RANGE(1310000, 181, 255, 10000),
 };
 
-static void rtq2208_init_regulator_desc(struct rtq2208_regulator_desc *rdesc, int mtp_sel,
-					int idx, struct of_regulator_match *rtq2208_ldo_match, int *ldo_idx)
+static void rtq2208_init_regulator_desc(struct rtq2208_regulator_desc *rdesc, int mtp_sel, int idx)
 {
 	struct regulator_desc *desc;
 	static const struct {
@@ -461,8 +464,7 @@ static void rtq2208_init_regulator_desc(struct rtq2208_regulator_desc *rdesc, in
 static int rtq2208_parse_regulator_dt_data(int n_regulator, const unsigned int *regulator_idx_table,
 		struct rtq2208_regulator_desc *rdesc[RTQ2208_LDO_MAX], struct device *dev)
 {
-	struct of_regulator_match rtq2208_ldo_match[2];
-	int mtp_sel, ret, i, idx, ldo_idx = 0;
+	int mtp_sel, i, idx, ret;
 
 	/* get mtp_sel0 or mtp_sel1 */
 	mtp_sel = device_property_read_bool(dev, "richtek,mtp-sel-high");
@@ -474,7 +476,7 @@ static int rtq2208_parse_regulator_dt_data(int n_regulator, const unsigned int *
 		if (!rdesc[i])
 			return -ENOMEM;
 
-		rtq2208_init_regulator_desc(rdesc[i], mtp_sel, idx, rtq2208_ldo_match, &ldo_idx);
+		rtq2208_init_regulator_desc(rdesc[i], mtp_sel, idx);
 
 		/* init ldo dvs ability */
 		if (idx >= RTQ2208_LDO2)
@@ -482,7 +484,7 @@ static int rtq2208_parse_regulator_dt_data(int n_regulator, const unsigned int *
 	}
 
 	/* init ldo fixed_uV */
-	ret = rtq2208_of_get_fixed_voltage(dev, rtq2208_ldo_match, ldo_idx);
+	ret = rtq2208_of_get_ldo_dvs_ability(dev);
 	if (ret)
 		return dev_err_probe(dev, ret, "Failed to get ldo fixed_uV\n");
 
-- 
cgit v1.2.3


From 233e27b4d21c3e44eb863f03e566d3a22e81a7ae Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Tue, 28 May 2024 15:28:52 +0900
Subject: null_blk: Print correct max open zones limit in null_init_zoned_dev()

When changing the maximum number of open zones, print that number
instead of the total number of zones.

Fixes: dc4d137ee3b7 ("null_blk: add support for max open/active zone limit for zoned devices")
Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
Link: https://lore.kernel.org/r/20240528062852.437599-1-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/zoned.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 5b5a63adacc1..79c8e5e99f7f 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -108,7 +108,7 @@ int null_init_zoned_dev(struct nullb_device *dev,
 	if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) {
 		dev->zone_max_open = dev->zone_max_active;
 		pr_info("changed the maximum number of open zones to %u\n",
-			dev->nr_zones);
+			dev->zone_max_open);
 	} else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) {
 		dev->zone_max_open = 0;
 		pr_info("zone_max_open limit disabled, limit >= zone count\n");
-- 
cgit v1.2.3


From bafea1c58b24be594d97841ced1b7ae0347bf6e3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 May 2024 20:26:13 +0200
Subject: sd: also set max_user_sectors when setting max_sectors

sd can set a max_sectors value that is lower than the max_hw_sectors
limit based on the block limits VPD page.   While this is rather unusual,
it used to work until the max_user_sectors field was split out to cleanly
deal with conflicting hardware and user limits when the hardware limit
changes.  Also set max_user_sectors to ensure the limit can properly be
stacked.

Fixes: 4f563a64732d ("block: add a max_user_discard_sectors queue limit")
Reported-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240523182618.602003-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/scsi/sd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 332eb9dac22d..f6c822c9cbd2 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3700,8 +3700,10 @@ static int sd_revalidate_disk(struct gendisk *disk)
 	 */
 	if (sdkp->first_scan ||
 	    q->limits.max_sectors > q->limits.max_dev_sectors ||
-	    q->limits.max_sectors > q->limits.max_hw_sectors)
+	    q->limits.max_sectors > q->limits.max_hw_sectors) {
 		q->limits.max_sectors = rw_max;
+		q->limits.max_user_sectors = rw_max;
+	}
 
 	sdkp->first_scan = 0;
 
-- 
cgit v1.2.3


From e528bede6f4e6822afdf0fa80be46ea9199f0911 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 May 2024 20:26:14 +0200
Subject: block: stack max_user_sectors

The max_user_sectors is one of the three factors determining the actual
max_sectors limit for READ/WRITE requests.  Because of that it needs to
be stacked at least for the device mapper multi-path case where requests
are directly inserted on the lower device.  For SCSI disks this is
important because the sd driver actually sets it's own advisory limit
that is lower than max_hw_sectors based on the block limits VPD page.
While this is a bit odd an unusual, the same effect can happen if a
user or udev script tweaks the value manually.

Fixes: 4f563a64732d ("block: add a max_user_discard_sectors queue limit")
Reported-by: Mike Snitzer <snitzer@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Snitzer <snitzer@kernel.org>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240523182618.602003-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-settings.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index a7fe8e90240a..7a672021daee 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -611,6 +611,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	unsigned int top, bottom, alignment, ret = 0;
 
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
+	t->max_user_sectors = min_not_zero(t->max_user_sectors,
+			b->max_user_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
 	t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
 	t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
-- 
cgit v1.2.3


From e993db2d6e5207f1ae061c2ac554ab1f714c741d Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@kernel.org>
Date: Fri, 24 May 2024 12:46:51 +0200
Subject: block: check for max_hw_sectors underflow

The logical block size need to be smaller than the max_hw_sector
setting, otherwise we can't even transfer a single LBA.

Signed-off-by: Hannes Reinecke <hare@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-settings.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 7a672021daee..effeb9a639bb 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -104,6 +104,7 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
 static int blk_validate_limits(struct queue_limits *lim)
 {
 	unsigned int max_hw_sectors;
+	unsigned int logical_block_sectors;
 
 	/*
 	 * Unless otherwise specified, default to 512 byte logical blocks and a
@@ -134,8 +135,11 @@ static int blk_validate_limits(struct queue_limits *lim)
 		lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
 	if (WARN_ON_ONCE(lim->max_hw_sectors < PAGE_SECTORS))
 		return -EINVAL;
+	logical_block_sectors = lim->logical_block_size >> SECTOR_SHIFT;
+	if (WARN_ON_ONCE(logical_block_sectors > lim->max_hw_sectors))
+		return -EINVAL;
 	lim->max_hw_sectors = round_down(lim->max_hw_sectors,
-			lim->logical_block_size >> SECTOR_SHIFT);
+			logical_block_sectors);
 
 	/*
 	 * The actual max_sectors value is a complex beast and also takes the
@@ -153,7 +157,7 @@ static int blk_validate_limits(struct queue_limits *lim)
 		lim->max_sectors = min(max_hw_sectors, BLK_DEF_MAX_SECTORS_CAP);
 	}
 	lim->max_sectors = round_down(lim->max_sectors,
-			lim->logical_block_size >> SECTOR_SHIFT);
+			logical_block_sectors);
 
 	/*
 	 * Random default for the maximum number of segments.  Driver should not
-- 
cgit v1.2.3


From a14a68b76954e73031ca6399abace17dcb77c17a Mon Sep 17 00:00:00 2001
From: Dongsheng Yang <dongsheng.yang@easystack.cn>
Date: Tue, 28 May 2024 20:09:12 +0800
Subject: bcache: allow allocator to invalidate bucket in gc

Currently, if the gc is running, when the allocator found free_inc
is empty, allocator has to wait the gc finish. Before that, the
IO is blocked.

But actually, there would be some buckets is reclaimable before gc,
and gc will never mark this kind of bucket to be unreclaimable.

So we can put these buckets into free_inc in gc running to avoid
IO being blocked.

Signed-off-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn>
Signed-off-by: Coly Li <colyli@suse.de>
Link: https://lore.kernel.org/r/20240528120914.28705-2-colyli@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/alloc.c  | 13 +++++--------
 drivers/md/bcache/bcache.h |  1 +
 drivers/md/bcache/btree.c  |  7 ++++++-
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index ce13c272c387..32a46343097d 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -129,12 +129,9 @@ static inline bool can_inc_bucket_gen(struct bucket *b)
 
 bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b)
 {
-	BUG_ON(!ca->set->gc_mark_valid);
-
-	return (!GC_MARK(b) ||
-		GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
-		!atomic_read(&b->pin) &&
-		can_inc_bucket_gen(b);
+	return (ca->set->gc_mark_valid || b->reclaimable_in_gc) &&
+	       ((!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE) &&
+	       !atomic_read(&b->pin) && can_inc_bucket_gen(b));
 }
 
 void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
@@ -148,6 +145,7 @@ void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
 	bch_inc_gen(ca, b);
 	b->prio = INITIAL_PRIO;
 	atomic_inc(&b->pin);
+	b->reclaimable_in_gc = 0;
 }
 
 static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
@@ -352,8 +350,7 @@ static int bch_allocator_thread(void *arg)
 		 */
 
 retry_invalidate:
-		allocator_wait(ca, ca->set->gc_mark_valid &&
-			       !ca->invalidate_needs_gc);
+		allocator_wait(ca, !ca->invalidate_needs_gc);
 		invalidate_buckets(ca);
 
 		/*
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 4e6afa89921f..1d33e40d26ea 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -200,6 +200,7 @@ struct bucket {
 	uint8_t		gen;
 	uint8_t		last_gc; /* Most out of date gen in the btree */
 	uint16_t	gc_mark; /* Bitfield used by GC. See below for field */
+	uint16_t	reclaimable_in_gc:1;
 };
 
 /*
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index d011a7154d33..4e6ccf2c8a0b 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1741,18 +1741,20 @@ static void btree_gc_start(struct cache_set *c)
 
 	mutex_lock(&c->bucket_lock);
 
-	c->gc_mark_valid = 0;
 	c->gc_done = ZERO_KEY;
 
 	ca = c->cache;
 	for_each_bucket(b, ca) {
 		b->last_gc = b->gen;
+		if (bch_can_invalidate_bucket(ca, b))
+			b->reclaimable_in_gc = 1;
 		if (!atomic_read(&b->pin)) {
 			SET_GC_MARK(b, 0);
 			SET_GC_SECTORS_USED(b, 0);
 		}
 	}
 
+	c->gc_mark_valid = 0;
 	mutex_unlock(&c->bucket_lock);
 }
 
@@ -1809,6 +1811,9 @@ static void bch_btree_gc_finish(struct cache_set *c)
 	for_each_bucket(b, ca) {
 		c->need_gc	= max(c->need_gc, bucket_gc_gen(b));
 
+		if (b->reclaimable_in_gc)
+			b->reclaimable_in_gc = 0;
+
 		if (atomic_read(&b->pin))
 			continue;
 
-- 
cgit v1.2.3


From 05356938a4be356adde4eab4425c6822f3c7d706 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 28 May 2024 20:09:13 +0800
Subject: bcache: call force_wake_up_gc() if necessary in check_should_bypass()

If there are extreme heavy write I/O continuously hit on relative small
cache device (512GB in my testing), it is possible to make counter
c->gc_stats.in_use continue to increase and exceed CUTOFF_CACHE_ADD.

If 'c->gc_stats.in_use > CUTOFF_CACHE_ADD' happens, all following write
requests will bypass the cache device because check_should_bypass()
returns 'true'. Because all writes bypass the cache device, counter
c->sectors_to_gc has no chance to be negative value, and garbage
collection thread won't be waken up even the whole cache becomes clean
after writeback accomplished. The aftermath is that all write I/Os go
directly into backing device even the cache device is clean.

To avoid the above situation, this patch uses a quite conservative way
to fix: if 'c->gc_stats.in_use > CUTOFF_CACHE_ADD' happens, only wakes
up garbage collection thread when the whole cache device is clean.

Before the fix, the writes-always-bypass situation happens after 10+
hours write I/O pressure on 512GB Intel optane memory which acts as
cache device. After this fix, such situation doesn't happen after 36+
hours testing.

Signed-off-by: Coly Li <colyli@suse.de>
Link: https://lore.kernel.org/r/20240528120914.28705-3-colyli@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/request.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 83d112bd2b1c..af345dc6fde1 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -369,10 +369,24 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
 	struct io *i;
 
 	if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
-	    c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
 	    (bio_op(bio) == REQ_OP_DISCARD))
 		goto skip;
 
+	if (c->gc_stats.in_use > CUTOFF_CACHE_ADD) {
+		/*
+		 * If cached buckets are all clean now, 'true' will be
+		 * returned and all requests will bypass the cache device.
+		 * Then c->sectors_to_gc has no chance to be negative, and
+		 * gc thread won't wake up and caching won't work forever.
+		 * Here call force_wake_up_gc() to avoid such aftermath.
+		 */
+		if (BDEV_STATE(&dc->sb) == BDEV_STATE_CLEAN &&
+		    c->gc_mark_valid)
+			force_wake_up_gc(c);
+
+		goto skip;
+	}
+
 	if (mode == CACHE_MODE_NONE ||
 	    (mode == CACHE_MODE_WRITEAROUND &&
 	     op_is_write(bio_op(bio))))
-- 
cgit v1.2.3


From 74d4ce92e08d5669d66fd890403724faa4286c21 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Tue, 28 May 2024 20:09:14 +0800
Subject: bcache: code cleanup in __bch_bucket_alloc_set()

In __bch_bucket_alloc_set() the lines after lable 'err:' indeed do
nothing useful after multiple cache devices are removed from bcache
code. This cleanup patch drops the useless code to save a bit CPU
cycles.

Signed-off-by: Coly Li <colyli@suse.de>
Link: https://lore.kernel.org/r/20240528120914.28705-4-colyli@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/alloc.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 32a46343097d..48ce750bf70a 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -498,8 +498,8 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
 
 	ca = c->cache;
 	b = bch_bucket_alloc(ca, reserve, wait);
-	if (b == -1)
-		goto err;
+	if (b < 0)
+		return -1;
 
 	k->ptr[0] = MAKE_PTR(ca->buckets[b].gen,
 			     bucket_to_sector(c, b),
@@ -508,10 +508,6 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
 	SET_KEY_PTRS(k, 1);
 
 	return 0;
-err:
-	bch_bucket_free(c, k);
-	bkey_put(c, k);
-	return -1;
 }
 
 int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
-- 
cgit v1.2.3


From 56a5cf538c3f2d935b0d81040a8303b6e7fc5fd8 Mon Sep 17 00:00:00 2001
From: MD Danish Anwar <danishanwar@ti.com>
Date: Mon, 27 May 2024 12:00:15 +0530
Subject: net: ti: icssg-prueth: Fix start counter for ft1 filter

The start counter for FT1 filter is wrongly set to 0 in the driver.
FT1 is used for source address violation (SAV) check and source address
starts at Byte 6 not Byte 0. Fix this by changing start counter to
ETH_ALEN in icssg_ft1_set_mac_addr().

Fixes: e9b4ece7d74b ("net: ti: icssg-prueth: Add Firmware config and classification APIs.")
Signed-off-by: MD Danish Anwar <danishanwar@ti.com>
Link: https://lore.kernel.org/r/20240527063015.263748-1-danishanwar@ti.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/ti/icssg/icssg_classifier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/icssg/icssg_classifier.c b/drivers/net/ethernet/ti/icssg/icssg_classifier.c
index 79ba47bb3602..f7d21da1a0fb 100644
--- a/drivers/net/ethernet/ti/icssg/icssg_classifier.c
+++ b/drivers/net/ethernet/ti/icssg/icssg_classifier.c
@@ -455,7 +455,7 @@ void icssg_ft1_set_mac_addr(struct regmap *miig_rt, int slice, u8 *mac_addr)
 {
 	const u8 mask_addr[] = { 0, 0, 0, 0, 0, 0, };
 
-	rx_class_ft1_set_start_len(miig_rt, slice, 0, 6);
+	rx_class_ft1_set_start_len(miig_rt, slice, ETH_ALEN, ETH_ALEN);
 	rx_class_ft1_set_da(miig_rt, slice, 0, mac_addr);
 	rx_class_ft1_set_da_mask(miig_rt, slice, 0, mask_addr);
 	rx_class_ft1_cfg_set_type(miig_rt, slice, 0, FT1_CFG_TYPE_EQ);
-- 
cgit v1.2.3


From da42b5229b27bb5c0eff3408c92f025e6041dad3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 28 May 2024 11:01:20 -0300
Subject: tools headers: Update the syscall tables and unistd.h, mostly to
 support the new 'mseal' syscall

But also to wire up shadow stacks on 32-bit x86, picking up those
changes from these csets:

  ff388fe5c481d39c ("mseal: wire up mseal syscall")
  2883f01ec37dd866 ("x86/shstk: Enable shadow stacks for x32")

This makes 'perf trace' support it, now its possible, for instance to
do:

  # perf trace -e mseal --max-stack=16

Here is an example with the 'sendmmsg' syscall:

  root@x1:~# perf trace -e sendmmsg --max-stack 16 --max-events=1
       0.000 ( 0.062 ms): dbus-broker/1012 sendmmsg(fd: 150, mmsg: 0x7ffef57cca50, vlen: 1, flags: DONTWAIT|NOSIGNAL) = 1
                                         syscall_exit_to_user_mode_prepare ([kernel.kallsyms])
                                         syscall_exit_to_user_mode_prepare ([kernel.kallsyms])
                                         syscall_exit_to_user_mode ([kernel.kallsyms])
                                         do_syscall_64 ([kernel.kallsyms])
                                         entry_SYSCALL_64 ([kernel.kallsyms])
                                         [0x117ce7] (/usr/lib64/libc.so.6 (deleted))
  root@x1:~#

To do a system wide tracing of the new 'mseal' syscall with a backtrace
of at most 16 entries.

This addresses these perf tools build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h
    diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
    diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl
    diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl
    diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H J Lu <hjl.tools@gmail.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jeff Xu <jeffxu@chromium.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/lkml/ZlXlo4TNcba4wnVZ@x1
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/asm-generic/unistd.h             | 5 ++++-
 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 1 +
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl  | 1 +
 tools/perf/arch/s390/entry/syscalls/syscall.tbl     | 1 +
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl   | 3 ++-
 5 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 75f00965ab15..d983c48a3b6a 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -842,8 +842,11 @@ __SYSCALL(__NR_lsm_set_self_attr, sys_lsm_set_self_attr)
 #define __NR_lsm_list_modules 461
 __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules)
 
+#define __NR_mseal 462
+__SYSCALL(__NR_mseal, sys_mseal)
+
 #undef __NR_syscalls
-#define __NR_syscalls 462
+#define __NR_syscalls 463
 
 /*
  * 32 bit systems traditionally used different
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 532b855df589..1464c6be6eb3 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -376,3 +376,4 @@
 459	n64	lsm_get_self_attr		sys_lsm_get_self_attr
 460	n64	lsm_set_self_attr		sys_lsm_set_self_attr
 461	n64	lsm_list_modules		sys_lsm_list_modules
+462	n64	mseal				sys_mseal
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 17173b82ca21..3656f1ca7a21 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -548,3 +548,4 @@
 459	common	lsm_get_self_attr		sys_lsm_get_self_attr
 460	common	lsm_set_self_attr		sys_lsm_set_self_attr
 461	common	lsm_list_modules		sys_lsm_list_modules
+462	common	mseal				sys_mseal
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 095bb86339a7..bd0fee24ad10 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -464,3 +464,4 @@
 459  common	lsm_get_self_attr	sys_lsm_get_self_attr		sys_lsm_get_self_attr
 460  common	lsm_set_self_attr	sys_lsm_set_self_attr		sys_lsm_set_self_attr
 461  common	lsm_list_modules	sys_lsm_list_modules		sys_lsm_list_modules
+462  common	mseal			sys_mseal			sys_mseal
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 7e8d46f4147f..a396f6e6ab5b 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -374,7 +374,7 @@
 450	common	set_mempolicy_home_node	sys_set_mempolicy_home_node
 451	common	cachestat		sys_cachestat
 452	common	fchmodat2		sys_fchmodat2
-453	64	map_shadow_stack	sys_map_shadow_stack
+453	common	map_shadow_stack	sys_map_shadow_stack
 454	common	futex_wake		sys_futex_wake
 455	common	futex_wait		sys_futex_wait
 456	common	futex_requeue		sys_futex_requeue
@@ -383,6 +383,7 @@
 459	common	lsm_get_self_attr	sys_lsm_get_self_attr
 460	common	lsm_set_self_attr	sys_lsm_set_self_attr
 461	common	lsm_list_modules	sys_lsm_list_modules
+462 	common  mseal			sys_mseal
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
-- 
cgit v1.2.3


From f13e01b89daf42330a4a722f451e48c3e2edfc8d Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 17 May 2024 12:48:25 +0100
Subject: btrfs: ensure fast fsync waits for ordered extents after a write
 failure

If a write path in COW mode fails, either before submitting a bio for the
new extents or an actual IO error happens, we can end up allowing a fast
fsync to log file extent items that point to unwritten extents.

This is because dropping the extent maps happens when completing ordered
extents, at btrfs_finish_one_ordered(), and the completion of an ordered
extent is executed in a work queue.

This can result in a fast fsync to start logging file extent items based
on existing extent maps before the ordered extents complete, therefore
resulting in a log that has file extent items that point to unwritten
extents, resulting in a corrupt file if a crash happens after and the log
tree is replayed the next time the fs is mounted.

This can happen for both direct IO writes and buffered writes.

For example consider a direct IO write, in COW mode, that fails at
btrfs_dio_submit_io() because btrfs_extract_ordered_extent() returned an
error:

1) We call btrfs_finish_ordered_extent() with the 'uptodate' parameter
   set to false, meaning an error happened;

2) That results in marking the ordered extent with the BTRFS_ORDERED_IOERR
   flag;

3) btrfs_finish_ordered_extent() queues the completion of the ordered
   extent - so that btrfs_finish_one_ordered() will be executed later in
   a work queue. That function will drop extent maps in the range when
   it's executed, since the extent maps point to unwritten locations
   (signaled by the BTRFS_ORDERED_IOERR flag);

4) After calling btrfs_finish_ordered_extent() we keep going down the
   write path and unlock the inode;

5) After that a fast fsync starts and locks the inode;

6) Before the work queue executes btrfs_finish_one_ordered(), the fsync
   task sees the extent maps that point to the unwritten locations and
   logs file extent items based on them - it does not know they are
   unwritten, and the fast fsync path does not wait for ordered extents
   to complete, which is an intentional behaviour in order to reduce
   latency.

For the buffered write case, here's one example:

1) A fast fsync begins, and it starts by flushing delalloc and waiting for
   the writeback to complete by calling filemap_fdatawait_range();

2) Flushing the dellaloc created a new extent map X;

3) During the writeback some IO error happened, and at the end io callback
   (end_bbio_data_write()) we call btrfs_finish_ordered_extent(), which
   sets the BTRFS_ORDERED_IOERR flag in the ordered extent and queues its
   completion;

4) After queuing the ordered extent completion, the end io callback clears
   the writeback flag from all pages (or folios), and from that moment the
   fast fsync can proceed;

5) The fast fsync proceeds sees extent map X and logs a file extent item
   based on extent map X, resulting in a log that points to an unwritten
   data extent - because the ordered extent completion hasn't run yet, it
   happens only after the logging.

To fix this make btrfs_finish_ordered_extent() set the inode flag
BTRFS_INODE_NEEDS_FULL_SYNC in case an error happened for a COW write,
so that a fast fsync will wait for ordered extent completion.

Note that this issues of using extent maps that point to unwritten
locations can not happen for reads, because in read paths we start by
locking the extent range and wait for any ordered extents in the range
to complete before looking for extent maps.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/btrfs_inode.h  | 10 ++++++++++
 fs/btrfs/file.c         | 16 ++++++++++++++++
 fs/btrfs/ordered-data.c | 31 +++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 91c994b569f3..6ed495ca7a31 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -89,6 +89,16 @@ enum {
 	BTRFS_INODE_FREE_SPACE_INODE,
 	/* Set when there are no capabilities in XATTs for the inode. */
 	BTRFS_INODE_NO_CAP_XATTR,
+	/*
+	 * Set if an error happened when doing a COW write before submitting a
+	 * bio or during writeback. Used for both buffered writes and direct IO
+	 * writes. This is to signal a fast fsync that it has to wait for
+	 * ordered extents to complete and therefore not log extent maps that
+	 * point to unwritten extents (when an ordered extent completes and it
+	 * has the BTRFS_ORDERED_IOERR flag set, it drops extent maps in its
+	 * range).
+	 */
+	BTRFS_INODE_COW_WRITE_ERROR,
 };
 
 /* in memory btrfs inode */
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0c7c1b42028e..00670596bf06 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1885,6 +1885,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	 */
 	if (full_sync || btrfs_is_zoned(fs_info)) {
 		ret = btrfs_wait_ordered_range(inode, start, len);
+		clear_bit(BTRFS_INODE_COW_WRITE_ERROR, &BTRFS_I(inode)->runtime_flags);
 	} else {
 		/*
 		 * Get our ordered extents as soon as possible to avoid doing
@@ -1894,6 +1895,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 		btrfs_get_ordered_extents_for_logging(BTRFS_I(inode),
 						      &ctx.ordered_extents);
 		ret = filemap_fdatawait_range(inode->i_mapping, start, end);
+		if (ret)
+			goto out_release_extents;
+
+		/*
+		 * Check and clear the BTRFS_INODE_COW_WRITE_ERROR now after
+		 * starting and waiting for writeback, because for buffered IO
+		 * it may have been set during the end IO callback
+		 * (end_bbio_data_write() -> btrfs_finish_ordered_extent()) in
+		 * case an error happened and we need to wait for ordered
+		 * extents to complete so that any extent maps that point to
+		 * unwritten locations are dropped and we don't log them.
+		 */
+		if (test_and_clear_bit(BTRFS_INODE_COW_WRITE_ERROR,
+				       &BTRFS_I(inode)->runtime_flags))
+			ret = btrfs_wait_ordered_range(inode, start, len);
 	}
 
 	if (ret)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index c5bdd674f55c..35a413ce935d 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -388,6 +388,37 @@ bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
 	ret = can_finish_ordered_extent(ordered, page, file_offset, len, uptodate);
 	spin_unlock_irqrestore(&inode->ordered_tree_lock, flags);
 
+	/*
+	 * If this is a COW write it means we created new extent maps for the
+	 * range and they point to unwritten locations if we got an error either
+	 * before submitting a bio or during IO.
+	 *
+	 * We have marked the ordered extent with BTRFS_ORDERED_IOERR, and we
+	 * are queuing its completion below. During completion, at
+	 * btrfs_finish_one_ordered(), we will drop the extent maps for the
+	 * unwritten extents.
+	 *
+	 * However because completion runs in a work queue we can end up having
+	 * a fast fsync running before that. In the case of direct IO, once we
+	 * unlock the inode the fsync might start, and we queue the completion
+	 * before unlocking the inode. In the case of buffered IO when writeback
+	 * finishes (end_bbio_data_write()) we queue the completion, so if the
+	 * writeback was triggered by a fast fsync, the fsync might start
+	 * logging before ordered extent completion runs in the work queue.
+	 *
+	 * The fast fsync will log file extent items based on the extent maps it
+	 * finds, so if by the time it collects extent maps the ordered extent
+	 * completion didn't happen yet, it will log file extent items that
+	 * point to unwritten extents, resulting in a corruption if a crash
+	 * happens and the log tree is replayed. Note that a fast fsync does not
+	 * wait for completion of ordered extents in order to reduce latency.
+	 *
+	 * Set a flag in the inode so that the next fast fsync will wait for
+	 * ordered extents to complete before starting to log.
+	 */
+	if (!uptodate && !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
+		set_bit(BTRFS_INODE_COW_WRITE_ERROR, &inode->runtime_flags);
+
 	if (ret)
 		btrfs_queue_ordered_fn(ordered);
 	return ret;
-- 
cgit v1.2.3


From 43cad521c6d228ea0c51e248f8e5b3a6295a2849 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Date: Tue, 30 Apr 2024 14:07:06 +0530
Subject: tools/power/cpupower: Fix Pstate frequency reporting on AMD Family
 1Ah CPUs

Update cpupower's P-State frequency calculation and reporting with AMD
Family 1Ah+ processors, when using the acpi-cpufreq driver. This is due
to a change in the PStateDef MSR layout in AMD Family 1Ah+.

Tested on 4th and 5th Gen AMD EPYC system

Signed-off-by: Ananth Narayan <Ananth.Narayan@amd.com>
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/power/cpupower/utils/helpers/amd.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index c519cc89c97f..0a56e22240fc 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -41,6 +41,16 @@ union core_pstate {
 		unsigned res1:31;
 		unsigned en:1;
 	} pstatedef;
+	/* since fam 1Ah: */
+	struct {
+		unsigned fid:12;
+		unsigned res1:2;
+		unsigned vid:8;
+		unsigned iddval:8;
+		unsigned idddiv:2;
+		unsigned res2:31;
+		unsigned en:1;
+	} pstatedef2;
 	unsigned long long val;
 };
 
@@ -48,6 +58,10 @@ static int get_did(union core_pstate pstate)
 {
 	int t;
 
+	/* Fam 1Ah onward do not use did */
+	if (cpupower_cpu_info.family >= 0x1A)
+		return 0;
+
 	if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATEDEF)
 		t = pstate.pstatedef.did;
 	else if (cpupower_cpu_info.family == 0x12)
@@ -61,12 +75,18 @@ static int get_did(union core_pstate pstate)
 static int get_cof(union core_pstate pstate)
 {
 	int t;
-	int fid, did, cof;
+	int fid, did, cof = 0;
 
 	did = get_did(pstate);
 	if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_PSTATEDEF) {
-		fid = pstate.pstatedef.fid;
-		cof = 200 * fid / did;
+		if (cpupower_cpu_info.family >= 0x1A) {
+			fid = pstate.pstatedef2.fid;
+			if (fid > 0x0f)
+				cof = (fid * 5);
+		} else {
+			fid = pstate.pstatedef.fid;
+			cof = 200 * fid / did;
+		}
 	} else {
 		t = 0x10;
 		fid = pstate.pstate.fid;
-- 
cgit v1.2.3


From 1292bc2ebf63e705ae18bbaaf9cea21b68d37ee6 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Fri, 24 May 2024 13:59:34 -0400
Subject: bcachefs: Plumb bkey into __btree_err()

It can be useful to know the exact byte offset within a btree node where
an error occured.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/btree_io.c | 85 ++++++++++++++++++++++++++------------------------
 1 file changed, 45 insertions(+), 40 deletions(-)

diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index cbf8f5d90602..829c1b91477d 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -519,7 +519,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
 
 static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
 			  struct bch_dev *ca,
-			  struct btree *b, struct bset *i,
+			  struct btree *b, struct bset *i, struct bkey_packed *k,
 			  unsigned offset, int write)
 {
 	prt_printf(out, bch2_log_msg(c, "%s"),
@@ -537,15 +537,20 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
 		   b->written, btree_ptr_sectors_written(&b->key));
 	if (i)
 		prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s));
+	if (k)
+		prt_printf(out, " bset byte offset %lu",
+			   (unsigned long)(void *)k -
+			   ((unsigned long)(void *)i & ~511UL));
 	prt_str(out, ": ");
 }
 
-__printf(9, 10)
+__printf(10, 11)
 static int __btree_err(int ret,
 		       struct bch_fs *c,
 		       struct bch_dev *ca,
 		       struct btree *b,
 		       struct bset *i,
+		       struct bkey_packed *k,
 		       int write,
 		       bool have_retry,
 		       enum bch_sb_error_id err_type,
@@ -555,7 +560,7 @@ static int __btree_err(int ret,
 	bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes;
 	va_list args;
 
-	btree_err_msg(&out, c, ca, b, i, b->written, write);
+	btree_err_msg(&out, c, ca, b, i, k, b->written, write);
 
 	va_start(args, fmt);
 	prt_vprintf(&out, fmt, args);
@@ -611,9 +616,9 @@ fsck_err:
 	return ret;
 }
 
-#define btree_err(type, c, ca, b, i, _err_type, msg, ...)		\
+#define btree_err(type, c, ca, b, i, k, _err_type, msg, ...)		\
 ({									\
-	int _ret = __btree_err(type, c, ca, b, i, write, have_retry,	\
+	int _ret = __btree_err(type, c, ca, b, i, k, write, have_retry,	\
 			       BCH_FSCK_ERR_##_err_type,		\
 			       msg, ##__VA_ARGS__);			\
 									\
@@ -690,7 +695,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 
 	btree_err_on(!bch2_version_compatible(version),
 		     -BCH_ERR_btree_node_read_err_incompatible,
-		     c, ca, b, i,
+		     c, ca, b, i, NULL,
 		     btree_node_unsupported_version,
 		     "unsupported bset version %u.%u",
 		     BCH_VERSION_MAJOR(version),
@@ -698,7 +703,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 
 	if (btree_err_on(version < c->sb.version_min,
 			 -BCH_ERR_btree_node_read_err_fixable,
-			 c, NULL, b, i,
+			 c, NULL, b, i, NULL,
 			 btree_node_bset_older_than_sb_min,
 			 "bset version %u older than superblock version_min %u",
 			 version, c->sb.version_min)) {
@@ -711,7 +716,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 	if (btree_err_on(BCH_VERSION_MAJOR(version) >
 			 BCH_VERSION_MAJOR(c->sb.version),
 			 -BCH_ERR_btree_node_read_err_fixable,
-			 c, NULL, b, i,
+			 c, NULL, b, i, NULL,
 			 btree_node_bset_newer_than_sb,
 			 "bset version %u newer than superblock version %u",
 			 version, c->sb.version)) {
@@ -723,13 +728,13 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 
 	btree_err_on(BSET_SEPARATE_WHITEOUTS(i),
 		     -BCH_ERR_btree_node_read_err_incompatible,
-		     c, ca, b, i,
+		     c, ca, b, i, NULL,
 		     btree_node_unsupported_version,
 		     "BSET_SEPARATE_WHITEOUTS no longer supported");
 
 	if (btree_err_on(offset + sectors > btree_sectors(c),
 			 -BCH_ERR_btree_node_read_err_fixable,
-			 c, ca, b, i,
+			 c, ca, b, i, NULL,
 			 bset_past_end_of_btree_node,
 			 "bset past end of btree node")) {
 		i->u64s = 0;
@@ -739,13 +744,13 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 
 	btree_err_on(offset && !i->u64s,
 		     -BCH_ERR_btree_node_read_err_fixable,
-		     c, ca, b, i,
+		     c, ca, b, i, NULL,
 		     bset_empty,
 		     "empty bset");
 
 	btree_err_on(BSET_OFFSET(i) && BSET_OFFSET(i) != offset,
 		     -BCH_ERR_btree_node_read_err_want_retry,
-		     c, ca, b, i,
+		     c, ca, b, i, NULL,
 		     bset_wrong_sector_offset,
 		     "bset at wrong sector offset");
 
@@ -761,20 +766,20 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 			/* XXX endianness */
 			btree_err_on(bp->seq != bn->keys.seq,
 				     -BCH_ERR_btree_node_read_err_must_retry,
-				     c, ca, b, NULL,
+				     c, ca, b, NULL, NULL,
 				     bset_bad_seq,
 				     "incorrect sequence number (wrong btree node)");
 		}
 
 		btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id,
 			     -BCH_ERR_btree_node_read_err_must_retry,
-			     c, ca, b, i,
+			     c, ca, b, i, NULL,
 			     btree_node_bad_btree,
 			     "incorrect btree id");
 
 		btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level,
 			     -BCH_ERR_btree_node_read_err_must_retry,
-			     c, ca, b, i,
+			     c, ca, b, i, NULL,
 			     btree_node_bad_level,
 			     "incorrect level");
 
@@ -793,7 +798,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 
 			btree_err_on(!bpos_eq(b->data->min_key, bp->min_key),
 				     -BCH_ERR_btree_node_read_err_must_retry,
-				     c, ca, b, NULL,
+				     c, ca, b, NULL, NULL,
 				     btree_node_bad_min_key,
 				     "incorrect min_key: got %s should be %s",
 				     (printbuf_reset(&buf1),
@@ -804,7 +809,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 
 		btree_err_on(!bpos_eq(bn->max_key, b->key.k.p),
 			     -BCH_ERR_btree_node_read_err_must_retry,
-			     c, ca, b, i,
+			     c, ca, b, i, NULL,
 			     btree_node_bad_max_key,
 			     "incorrect max key %s",
 			     (printbuf_reset(&buf1),
@@ -816,7 +821,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
 
 		btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1),
 			     -BCH_ERR_btree_node_read_err_bad_node,
-			     c, ca, b, i,
+			     c, ca, b, i, NULL,
 			     btree_node_bad_format,
 			     "invalid bkey format: %s\n  %s", buf1.buf,
 			     (printbuf_reset(&buf2),
@@ -883,7 +888,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
 
 		if (btree_err_on(bkey_p_next(k) > vstruct_last(i),
 				 -BCH_ERR_btree_node_read_err_fixable,
-				 c, NULL, b, i,
+				 c, NULL, b, i, k,
 				 btree_node_bkey_past_bset_end,
 				 "key extends past end of bset")) {
 			i->u64s = cpu_to_le16((u64 *) k - i->_data);
@@ -892,14 +897,14 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
 
 		if (btree_err_on(k->format > KEY_FORMAT_CURRENT,
 				 -BCH_ERR_btree_node_read_err_fixable,
-				 c, NULL, b, i,
+				 c, NULL, b, i, k,
 				 btree_node_bkey_bad_format,
 				 "invalid bkey format %u", k->format))
 			goto drop_this_key;
 
 		if (btree_err_on(!bkeyp_u64s_valid(&b->format, k),
 				 -BCH_ERR_btree_node_read_err_fixable,
-				 c, NULL, b, i,
+				 c, NULL, b, i, k,
 				 btree_node_bkey_bad_u64s,
 				 "bad k->u64s %u (min %u max %zu)", k->u64s,
 				 bkeyp_key_u64s(&b->format, k),
@@ -921,7 +926,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
 			bch2_bkey_val_to_text(&buf, c, u.s_c);
 
 			btree_err(-BCH_ERR_btree_node_read_err_fixable,
-				  c, NULL, b, i,
+				  c, NULL, b, i, k,
 				  btree_node_bad_bkey,
 				  "invalid bkey: %s", buf.buf);
 			goto drop_this_key;
@@ -942,7 +947,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
 			bch2_bkey_to_text(&buf, u.k);
 
 			if (btree_err(-BCH_ERR_btree_node_read_err_fixable,
-				      c, NULL, b, i,
+				      c, NULL, b, i, k,
 				      btree_node_bkey_out_of_order,
 				      "%s", buf.buf))
 				goto drop_this_key;
@@ -1011,13 +1016,13 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
 	if (bch2_meta_read_fault("btree"))
 		btree_err(-BCH_ERR_btree_node_read_err_must_retry,
-			  c, ca, b, NULL,
+			  c, ca, b, NULL, NULL,
 			  btree_node_fault_injected,
 			  "dynamic fault");
 
 	btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c),
 		     -BCH_ERR_btree_node_read_err_must_retry,
-		     c, ca, b, NULL,
+		     c, ca, b, NULL, NULL,
 		     btree_node_bad_magic,
 		     "bad magic: want %llx, got %llx",
 		     bset_magic(c), le64_to_cpu(b->data->magic));
@@ -1032,7 +1037,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
 		btree_err_on(b->data->keys.seq != bp->seq,
 			     -BCH_ERR_btree_node_read_err_must_retry,
-			     c, ca, b, NULL,
+			     c, ca, b, NULL, NULL,
 			     btree_node_bad_seq,
 			     "got wrong btree node: got\n%s",
 			     (printbuf_reset(&buf),
@@ -1041,7 +1046,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 	} else {
 		btree_err_on(!b->data->keys.seq,
 			     -BCH_ERR_btree_node_read_err_must_retry,
-			     c, ca, b, NULL,
+			     c, ca, b, NULL, NULL,
 			     btree_node_bad_seq,
 			     "bad btree header: seq 0\n%s",
 			     (printbuf_reset(&buf),
@@ -1060,7 +1065,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
 			btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
 				     -BCH_ERR_btree_node_read_err_want_retry,
-				     c, ca, b, i,
+				     c, ca, b, i, NULL,
 				     bset_unknown_csum,
 				     "unknown checksum type %llu", BSET_CSUM_TYPE(i));
 
@@ -1073,7 +1078,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
 			btree_err_on(csum_bad,
 				     -BCH_ERR_btree_node_read_err_want_retry,
-				     c, ca, b, i,
+				     c, ca, b, i, NULL,
 				     bset_bad_csum,
 				     "%s",
 				     (printbuf_reset(&buf),
@@ -1088,7 +1093,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 			btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
 				     !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
 				     -BCH_ERR_btree_node_read_err_incompatible,
-				     c, NULL, b, NULL,
+				     c, NULL, b, NULL, NULL,
 				     btree_node_unsupported_version,
 				     "btree node does not have NEW_EXTENT_OVERWRITE set");
 
@@ -1102,7 +1107,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
 			btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
 				     -BCH_ERR_btree_node_read_err_want_retry,
-				     c, ca, b, i,
+				     c, ca, b, i, NULL,
 				     bset_unknown_csum,
 				     "unknown checksum type %llu", BSET_CSUM_TYPE(i));
 
@@ -1114,7 +1119,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
 			btree_err_on(csum_bad,
 				     -BCH_ERR_btree_node_read_err_want_retry,
-				     c, ca, b, i,
+				     c, ca, b, i, NULL,
 				     bset_bad_csum,
 				     "%s",
 				     (printbuf_reset(&buf),
@@ -1152,14 +1157,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 
 		btree_err_on(blacklisted && first,
 			     -BCH_ERR_btree_node_read_err_fixable,
-			     c, ca, b, i,
+			     c, ca, b, i, NULL,
 			     bset_blacklisted_journal_seq,
 			     "first btree node bset has blacklisted journal seq (%llu)",
 			     le64_to_cpu(i->journal_seq));
 
 		btree_err_on(blacklisted && ptr_written,
 			     -BCH_ERR_btree_node_read_err_fixable,
-			     c, ca, b, i,
+			     c, ca, b, i, NULL,
 			     first_bset_blacklisted_journal_seq,
 			     "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u",
 			     le64_to_cpu(i->journal_seq),
@@ -1178,7 +1183,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 	if (ptr_written) {
 		btree_err_on(b->written < ptr_written,
 			     -BCH_ERR_btree_node_read_err_want_retry,
-			     c, ca, b, NULL,
+			     c, ca, b, NULL, NULL,
 			     btree_node_data_missing,
 			     "btree node data missing: expected %u sectors, found %u",
 			     ptr_written, b->written);
@@ -1191,7 +1196,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 								      le64_to_cpu(bne->keys.journal_seq),
 								      true),
 				     -BCH_ERR_btree_node_read_err_want_retry,
-				     c, ca, b, NULL,
+				     c, ca, b, NULL, NULL,
 				     btree_node_bset_after_end,
 				     "found bset signature after last bset");
 	}
@@ -1235,7 +1240,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
 			bch2_bkey_val_to_text(&buf, c, u.s_c);
 
 			btree_err(-BCH_ERR_btree_node_read_err_fixable,
-				  c, NULL, b, i,
+				  c, NULL, b, i, k,
 				  btree_node_bad_bkey,
 				  "%s", buf.buf);
 
@@ -1471,18 +1476,18 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
 
 		written2 = btree_node_sectors_written(c, ra->buf[i]);
 		if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable,
-				 c, NULL, b, NULL,
+				 c, NULL, b, NULL, NULL,
 				 btree_node_replicas_sectors_written_mismatch,
 				 "btree node sectors written mismatch: %u != %u",
 				 written, written2) ||
 		    btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]),
 				 -BCH_ERR_btree_node_read_err_fixable,
-				 c, NULL, b, NULL,
+				 c, NULL, b, NULL, NULL,
 				 btree_node_bset_after_end,
 				 "found bset signature after last bset") ||
 		    btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9),
 				 -BCH_ERR_btree_node_read_err_fixable,
-				 c, NULL, b, NULL,
+				 c, NULL, b, NULL, NULL,
 				 btree_node_replicas_data_mismatch,
 				 "btree node replicas content mismatch"))
 			dump_bset_maps = true;
-- 
cgit v1.2.3


From 9e1a66e66870ebeebea9f674550118df3c12eaf6 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 26 May 2024 13:24:31 -0400
Subject: bcachefs: Fix lookup_first_inode() when inode_generations are present

This function is used for finding the hash seed (which is the same in
all versions of an inode in different snapshots): ff an inode has been
deleted in a child snapshot we need to iterate until we find a live
version.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/fsck.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index c8f57465131c..4cd28db9bad8 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -77,21 +77,17 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
 	struct bkey_s_c k;
 	int ret;
 
-	bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes,
-			     POS(0, inode_nr),
-			     BTREE_ITER_all_snapshots);
-	k = bch2_btree_iter_peek(&iter);
-	ret = bkey_err(k);
-	if (ret)
-		goto err;
-
-	if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) {
-		ret = -BCH_ERR_ENOENT_inode;
-		goto err;
+	for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr),
+				     BTREE_ITER_all_snapshots, k, ret) {
+		if (k.k->p.offset != inode_nr)
+			break;
+		if (!bkey_is_inode(k.k))
+			continue;
+		ret = bch2_inode_unpack(k, inode);
+		goto found;
 	}
-
-	ret = bch2_inode_unpack(k, inode);
-err:
+	ret = -BCH_ERR_ENOENT_inode;
+found:
 	bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
 	bch2_trans_iter_exit(trans, &iter);
 	return ret;
-- 
cgit v1.2.3


From 218e5e0c2a3acdb29ccbdfbfdd5e2def27d3aae2 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Mon, 27 May 2024 02:40:06 -0400
Subject: bcachefs: Fix locking assert

We now track whether a transaction is locked, and verify that we don't
have nodes locked when the transaction isn't locked; reorder relocks to
not pop the new assert.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/btree_key_cache.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 75f5e6fe4634..34056aaece00 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -424,16 +424,16 @@ static int btree_key_cache_fill(struct btree_trans *trans,
 				goto err;
 			}
 
-			if (!bch2_btree_node_relock(trans, ck_path, 0)) {
+			ret = bch2_trans_relock(trans);
+			if (ret) {
 				kfree(new_k);
-				trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
-				ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
 				goto err;
 			}
 
-			ret = bch2_trans_relock(trans);
-			if (ret) {
+			if (!bch2_btree_node_relock(trans, ck_path, 0)) {
 				kfree(new_k);
+				trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
+				ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
 				goto err;
 			}
 		}
-- 
cgit v1.2.3


From 82af5ceb5d9d1f0613be3b9161ec1104b85f00b8 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 26 May 2024 22:22:30 -0400
Subject: bcachefs: Refactor delete_dead_snapshots()

Consolidate per-key work into delete_dead_snapshots_process_key(), so we
now walk all keys once, not twice.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/snapshot.c | 65 +++++++++++++++++++-------------------------------
 1 file changed, 25 insertions(+), 40 deletions(-)

diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 629900a5e641..466fa3e6a4b6 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -1351,7 +1351,7 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
  * that key to snapshot leaf nodes, where we can mutate it
  */
 
-static int snapshot_delete_key(struct btree_trans *trans,
+static int delete_dead_snapshots_process_key(struct btree_trans *trans,
 			       struct btree_iter *iter,
 			       struct bkey_s_c k,
 			       snapshot_id_list *deleted,
@@ -1360,26 +1360,26 @@ static int snapshot_delete_key(struct btree_trans *trans,
 {
 	struct bch_fs *c = trans->c;
 	u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
+	if (!equiv) /* key for invalid snapshot node, but we chose not to delete */
+		return 0;
 
 	if (!bkey_eq(k.k->p, *last_pos))
 		equiv_seen->nr = 0;
-	*last_pos = k.k->p;
 
-	if (snapshot_list_has_id(deleted, k.k->p.snapshot) ||
-	    snapshot_list_has_id(equiv_seen, equiv)) {
+	if (snapshot_list_has_id(deleted, k.k->p.snapshot))
 		return bch2_btree_delete_at(trans, iter,
 					    BTREE_UPDATE_internal_snapshot_node);
-	} else {
-		return snapshot_list_add(c, equiv_seen, equiv);
-	}
-}
 
-static int move_key_to_correct_snapshot(struct btree_trans *trans,
-			       struct btree_iter *iter,
-			       struct bkey_s_c k)
-{
-	struct bch_fs *c = trans->c;
-	u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
+	if (!bpos_eq(*last_pos, k.k->p) &&
+	    snapshot_list_has_id(equiv_seen, equiv))
+		return bch2_btree_delete_at(trans, iter,
+					    BTREE_UPDATE_internal_snapshot_node);
+
+	*last_pos = k.k->p;
+
+	int ret = snapshot_list_add_nodup(c, equiv_seen, equiv);
+	if (ret)
+		return ret;
 
 	/*
 	 * When we have a linear chain of snapshot nodes, we consider
@@ -1389,21 +1389,20 @@ static int move_key_to_correct_snapshot(struct btree_trans *trans,
 	 *
 	 * If there are multiple keys in different snapshots at the same
 	 * position, we're only going to keep the one in the newest
-	 * snapshot - the rest have been overwritten and are redundant,
-	 * and for the key we're going to keep we need to move it to the
-	 * equivalance class ID if it's not there already.
+	 * snapshot (we delete the others above) - the rest have been
+	 * overwritten and are redundant, and for the key we're going to keep we
+	 * need to move it to the equivalance class ID if it's not there
+	 * already.
 	 */
 	if (equiv != k.k->p.snapshot) {
 		struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
-		struct btree_iter new_iter;
-		int ret;
-
-		ret = PTR_ERR_OR_ZERO(new);
+		int ret = PTR_ERR_OR_ZERO(new);
 		if (ret)
 			return ret;
 
 		new->k.p.snapshot = equiv;
 
+		struct btree_iter new_iter;
 		bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p,
 				     BTREE_ITER_all_snapshots|
 				     BTREE_ITER_cached|
@@ -1538,7 +1537,6 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
 	struct btree_trans *trans;
 	snapshot_id_list deleted = { 0 };
 	snapshot_id_list deleted_interior = { 0 };
-	u32 id;
 	int ret = 0;
 
 	if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
@@ -1585,33 +1583,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
 	if (ret)
 		goto err;
 
-	for (id = 0; id < BTREE_ID_NR; id++) {
+	for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
 		struct bpos last_pos = POS_MIN;
 		snapshot_id_list equiv_seen = { 0 };
 		struct disk_reservation res = { 0 };
 
-		if (!btree_type_has_snapshots(id))
-			continue;
-
-		/*
-		 * deleted inodes btree is maintained by a trigger on the inodes
-		 * btree - no work for us to do here, and it's not safe to scan
-		 * it because we'll see out of date keys due to the btree write
-		 * buffer:
-		 */
-		if (id == BTREE_ID_deleted_inodes)
+		if (!btree_type_has_snapshots(btree))
 			continue;
 
 		ret = for_each_btree_key_commit(trans, iter,
-				id, POS_MIN,
-				BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
-				&res, NULL, BCH_TRANS_COMMIT_no_enospc,
-			snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
-		      for_each_btree_key_commit(trans, iter,
-				id, POS_MIN,
+				btree, POS_MIN,
 				BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
 				&res, NULL, BCH_TRANS_COMMIT_no_enospc,
-			move_key_to_correct_snapshot(trans, &iter, k));
+			delete_dead_snapshots_process_key(trans, &iter, k, &deleted,
+							  &equiv_seen, &last_pos));
 
 		bch2_disk_reservation_put(c, &res);
 		darray_exit(&equiv_seen);
-- 
cgit v1.2.3


From 08f50005e09f3bf74a7cb5fd86335d3c4077df51 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 26 May 2024 12:38:30 -0400
Subject: bcachefs: Run check_key_has_snapshot in snapshot_delete_keys()

delete_dead_snapshots now runs before the main fsck.c passes which check
for keys for invalid snapshots; thus, it needs those checks as well.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/fsck.c     | 27 ++++-----------------------
 fs/bcachefs/snapshot.c | 25 ++++++++++++++++++++++++-
 fs/bcachefs/snapshot.h |  1 +
 3 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 4cd28db9bad8..fd277bd58ed3 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -766,25 +766,6 @@ static int get_visible_inodes(struct btree_trans *trans,
 	return ret;
 }
 
-static int check_key_has_snapshot(struct btree_trans *trans,
-				  struct btree_iter *iter,
-				  struct bkey_s_c k)
-{
-	struct bch_fs *c = trans->c;
-	struct printbuf buf = PRINTBUF;
-	int ret = 0;
-
-	if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
-				bkey_in_missing_snapshot,
-				"key in missing snapshot: %s",
-				(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
-		ret = bch2_btree_delete_at(trans, iter,
-					    BTREE_UPDATE_internal_snapshot_node) ?: 1;
-fsck_err:
-	printbuf_exit(&buf);
-	return ret;
-}
-
 static int hash_redo_key(struct btree_trans *trans,
 			 const struct bch_hash_desc desc,
 			 struct bch_hash_info *hash_info,
@@ -979,7 +960,7 @@ static int check_inode(struct btree_trans *trans,
 	bool do_update = false;
 	int ret;
 
-	ret = check_key_has_snapshot(trans, iter, k);
+	ret = bch2_check_key_has_snapshot(trans, iter, k);
 	if (ret < 0)
 		goto err;
 	if (ret)
@@ -1483,7 +1464,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
 	struct printbuf buf = PRINTBUF;
 	int ret = 0;
 
-	ret = check_key_has_snapshot(trans, iter, k);
+	ret = bch2_check_key_has_snapshot(trans, iter, k);
 	if (ret) {
 		ret = ret < 0 ? ret : 0;
 		goto out;
@@ -2006,7 +1987,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
 	struct printbuf buf = PRINTBUF;
 	int ret = 0;
 
-	ret = check_key_has_snapshot(trans, iter, k);
+	ret = bch2_check_key_has_snapshot(trans, iter, k);
 	if (ret) {
 		ret = ret < 0 ? ret : 0;
 		goto out;
@@ -2161,7 +2142,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
 	struct inode_walker_entry *i;
 	int ret;
 
-	ret = check_key_has_snapshot(trans, iter, k);
+	ret = bch2_check_key_has_snapshot(trans, iter, k);
 	if (ret < 0)
 		return ret;
 	if (ret)
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 466fa3e6a4b6..51918acfd726 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -1042,6 +1042,25 @@ err:
 	return ret;
 }
 
+int bch2_check_key_has_snapshot(struct btree_trans *trans,
+				struct btree_iter *iter,
+				struct bkey_s_c k)
+{
+	struct bch_fs *c = trans->c;
+	struct printbuf buf = PRINTBUF;
+	int ret = 0;
+
+	if (fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
+			bkey_in_missing_snapshot,
+			"key in missing snapshot %s, delete?",
+			(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+		ret = bch2_btree_delete_at(trans, iter,
+					    BTREE_UPDATE_internal_snapshot_node) ?: 1;
+fsck_err:
+	printbuf_exit(&buf);
+	return ret;
+}
+
 /*
  * Mark a snapshot as deleted, for future cleanup:
  */
@@ -1358,6 +1377,10 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans,
 			       snapshot_id_list *equiv_seen,
 			       struct bpos *last_pos)
 {
+	int ret = bch2_check_key_has_snapshot(trans, iter, k);
+	if (ret)
+		return ret < 0 ? ret : 0;
+
 	struct bch_fs *c = trans->c;
 	u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
 	if (!equiv) /* key for invalid snapshot node, but we chose not to delete */
@@ -1377,7 +1400,7 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans,
 
 	*last_pos = k.k->p;
 
-	int ret = snapshot_list_add_nodup(c, equiv_seen, equiv);
+	ret = snapshot_list_add_nodup(c, equiv_seen, equiv);
 	if (ret)
 		return ret;
 
diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h
index ab13d8f4b41e..31b0ee03e962 100644
--- a/fs/bcachefs/snapshot.h
+++ b/fs/bcachefs/snapshot.h
@@ -242,6 +242,7 @@ int bch2_snapshot_node_create(struct btree_trans *, u32,
 int bch2_check_snapshot_trees(struct bch_fs *);
 int bch2_check_snapshots(struct bch_fs *);
 int bch2_reconstruct_snapshots(struct bch_fs *);
+int bch2_check_key_has_snapshot(struct btree_trans *, struct btree_iter *, struct bkey_s_c);
 
 int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
 void bch2_delete_dead_snapshots_work(struct work_struct *);
-- 
cgit v1.2.3


From 247c056bde2ebc9fad2fc62332dc7cc99b58d720 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Mon, 27 May 2024 16:30:19 -0400
Subject: bcachefs: Fix setting of downgrade recovery passes/errors

bch2_check_version_downgrade() was setting c->sb.version, which
bch2_sb_set_downgrade() expects to be at the previous version; and it
shouldn't even have been set directly because c->sb.version is updated
by write_super().

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/super-io.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index f1bee6c5222d..d73a0222f709 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -1132,18 +1132,12 @@ bool bch2_check_version_downgrade(struct bch_fs *c)
 	 * c->sb will be checked before we write the superblock, so update it as
 	 * well:
 	 */
-	if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) {
+	if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current)
 		SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
-		c->sb.version_upgrade_complete = bcachefs_metadata_version_current;
-	}
-	if (c->sb.version > bcachefs_metadata_version_current) {
+	if (c->sb.version > bcachefs_metadata_version_current)
 		c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
-		c->sb.version = bcachefs_metadata_version_current;
-	}
-	if (c->sb.version_min > bcachefs_metadata_version_current) {
+	if (c->sb.version_min > bcachefs_metadata_version_current)
 		c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
-		c->sb.version_min = bcachefs_metadata_version_current;
-	}
 	c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
 	return ret;
 }
-- 
cgit v1.2.3


From b4131076c16fdd2bc6cb09cfa7e0cfe278aa49a1 Mon Sep 17 00:00:00 2001
From: Jeff Johnson <quic_jjohnson@quicinc.com>
Date: Mon, 27 May 2024 12:01:18 -0700
Subject: bcachefs: add missing MODULE_DESCRIPTION()

Fix the 'make W=1' warning:
WARNING: modpost: missing MODULE_DESCRIPTION() in fs/bcachefs/mean_and_variance_test.o

Signed-off-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/mean_and_variance_test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/bcachefs/mean_and_variance_test.c b/fs/bcachefs/mean_and_variance_test.c
index 4c298e74723d..e9d9c0212e44 100644
--- a/fs/bcachefs/mean_and_variance_test.c
+++ b/fs/bcachefs/mean_and_variance_test.c
@@ -217,4 +217,5 @@ static struct kunit_suite mean_and_variance_test_suite = {
 kunit_test_suite(mean_and_variance_test_suite);
 
 MODULE_AUTHOR("Daniel B. Hill");
+MODULE_DESCRIPTION("bcachefs filesystem mean and variance unit tests");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 088d0de81220a74d7d553febb81656927f10bb16 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Mon, 27 May 2024 18:40:50 -0400
Subject: bcachefs: btree_gc can now handle unknown btrees

Compatibility fix - we no longer have a separate table for which order
gc walks btrees in, and special case the stripes btree directly.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bcachefs.h       | 44 +-------------------------------------------
 fs/bcachefs/btree_gc.c       |  9 ++++-----
 fs/bcachefs/btree_gc.h       | 44 ++++++++++++++++++++------------------------
 fs/bcachefs/btree_gc_types.h | 29 +++++++++++++++++++++++++++++
 fs/bcachefs/ec.c             |  2 +-
 5 files changed, 55 insertions(+), 73 deletions(-)
 create mode 100644 fs/bcachefs/btree_gc_types.h

diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index bc0ea2c4efef..2a538eb2af11 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -457,6 +457,7 @@ enum bch_time_stats {
 };
 
 #include "alloc_types.h"
+#include "btree_gc_types.h"
 #include "btree_types.h"
 #include "btree_node_scan_types.h"
 #include "btree_write_buffer_types.h"
@@ -488,49 +489,6 @@ enum bch_time_stats {
 
 struct btree;
 
-enum gc_phase {
-	GC_PHASE_NOT_RUNNING,
-	GC_PHASE_START,
-	GC_PHASE_SB,
-
-	GC_PHASE_BTREE_stripes,
-	GC_PHASE_BTREE_extents,
-	GC_PHASE_BTREE_inodes,
-	GC_PHASE_BTREE_dirents,
-	GC_PHASE_BTREE_xattrs,
-	GC_PHASE_BTREE_alloc,
-	GC_PHASE_BTREE_quotas,
-	GC_PHASE_BTREE_reflink,
-	GC_PHASE_BTREE_subvolumes,
-	GC_PHASE_BTREE_snapshots,
-	GC_PHASE_BTREE_lru,
-	GC_PHASE_BTREE_freespace,
-	GC_PHASE_BTREE_need_discard,
-	GC_PHASE_BTREE_backpointers,
-	GC_PHASE_BTREE_bucket_gens,
-	GC_PHASE_BTREE_snapshot_trees,
-	GC_PHASE_BTREE_deleted_inodes,
-	GC_PHASE_BTREE_logged_ops,
-	GC_PHASE_BTREE_rebalance_work,
-	GC_PHASE_BTREE_subvolume_children,
-
-	GC_PHASE_PENDING_DELETE,
-};
-
-struct gc_pos {
-	enum gc_phase		phase;
-	u16			level;
-	struct bpos		pos;
-};
-
-struct reflink_gc {
-	u64		offset;
-	u32		size;
-	u32		refcount;
-};
-
-typedef GENRADIX(struct reflink_gc) reflink_gc_table;
-
 struct io_count {
 	u64			sectors[2][BCH_DATA_NR];
 };
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 8035c8b797ab..e9e901feda29 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -673,8 +673,7 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in
 
 static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
 {
-	return  (int) btree_id_to_gc_phase(l) -
-		(int) btree_id_to_gc_phase(r);
+	return cmp_int(gc_btree_order(l), gc_btree_order(r));
 }
 
 static int bch2_gc_btrees(struct bch_fs *c)
@@ -711,7 +710,7 @@ fsck_err:
 static int bch2_mark_superblocks(struct bch_fs *c)
 {
 	mutex_lock(&c->sb_lock);
-	gc_pos_set(c, gc_phase(GC_PHASE_SB));
+	gc_pos_set(c, gc_phase(GC_PHASE_sb));
 
 	int ret = bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc);
 	mutex_unlock(&c->sb_lock);
@@ -1209,7 +1208,7 @@ int bch2_check_allocations(struct bch_fs *c)
 	if (ret)
 		goto out;
 
-	gc_pos_set(c, gc_phase(GC_PHASE_START));
+	gc_pos_set(c, gc_phase(GC_PHASE_start));
 
 	ret = bch2_mark_superblocks(c);
 	BUG_ON(ret);
@@ -1231,7 +1230,7 @@ out:
 
 	percpu_down_write(&c->mark_lock);
 	/* Indicates that gc is no longer in progress: */
-	__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
+	__gc_pos_set(c, gc_phase(GC_PHASE_not_running));
 
 	bch2_gc_free(c);
 	percpu_up_write(&c->mark_lock);
diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h
index 1b6489d8e0f4..876d81e2017d 100644
--- a/fs/bcachefs/btree_gc.h
+++ b/fs/bcachefs/btree_gc.h
@@ -3,6 +3,7 @@
 #define _BCACHEFS_BTREE_GC_H
 
 #include "bkey.h"
+#include "btree_gc_types.h"
 #include "btree_types.h"
 
 int bch2_check_topology(struct bch_fs *);
@@ -32,36 +33,15 @@ int bch2_check_allocations(struct bch_fs *);
 /* Position of (the start of) a gc phase: */
 static inline struct gc_pos gc_phase(enum gc_phase phase)
 {
-	return (struct gc_pos) {
-		.phase	= phase,
-		.level	= 0,
-		.pos	= POS_MIN,
-	};
-}
-
-static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
-{
-	return   cmp_int(l.phase, r.phase) ?:
-		-cmp_int(l.level, r.level) ?:
-		 bpos_cmp(l.pos, r.pos);
-}
-
-static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id)
-{
-	switch (id) {
-#define x(name, v, ...) case BTREE_ID_##name: return GC_PHASE_BTREE_##name;
-	BCH_BTREE_IDS()
-#undef x
-	default:
-		BUG();
-	}
+	return (struct gc_pos) { .phase	= phase, };
 }
 
 static inline struct gc_pos gc_pos_btree(enum btree_id btree, unsigned level,
 					 struct bpos pos)
 {
 	return (struct gc_pos) {
-		.phase	= btree_id_to_gc_phase(btree),
+		.phase	= GC_PHASE_btree,
+		.btree	= btree,
 		.level	= level,
 		.pos	= pos,
 	};
@@ -76,6 +56,22 @@ static inline struct gc_pos gc_pos_btree_node(struct btree *b)
 	return gc_pos_btree(b->c.btree_id, b->c.level, b->key.k.p);
 }
 
+static inline int gc_btree_order(enum btree_id btree)
+{
+	if (btree == BTREE_ID_stripes)
+		return -1;
+	return btree;
+}
+
+static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
+{
+	return   cmp_int(l.phase, r.phase) ?:
+		 cmp_int(gc_btree_order(l.btree),
+			 gc_btree_order(r.btree)) ?:
+		-cmp_int(l.level, r.level) ?:
+		 bpos_cmp(l.pos, r.pos);
+}
+
 static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
 {
 	unsigned seq;
diff --git a/fs/bcachefs/btree_gc_types.h b/fs/bcachefs/btree_gc_types.h
new file mode 100644
index 000000000000..b82c24bcc088
--- /dev/null
+++ b/fs/bcachefs/btree_gc_types.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_GC_TYPES_H
+#define _BCACHEFS_BTREE_GC_TYPES_H
+
+#include <linux/generic-radix-tree.h>
+
+enum gc_phase {
+	GC_PHASE_not_running,
+	GC_PHASE_start,
+	GC_PHASE_sb,
+	GC_PHASE_btree,
+};
+
+struct gc_pos {
+	enum gc_phase		phase:8;
+	enum btree_id		btree:8;
+	u16			level;
+	struct bpos		pos;
+};
+
+struct reflink_gc {
+	u64		offset;
+	u32		size;
+	u32		refcount;
+};
+
+typedef GENRADIX(struct reflink_gc) reflink_gc_table;
+
+#endif /* _BCACHEFS_BTREE_GC_TYPES_H */
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index b26dc7424662..d8b9beca3776 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -908,7 +908,7 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
 	if (!genradix_ptr_alloc(&c->stripes, idx, gfp))
 		return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
 
-	if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING &&
+	if (c->gc_pos.phase != GC_PHASE_not_running &&
 	    !genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
 		return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
 
-- 
cgit v1.2.3


From f1d4fed13fa0c0f8c99cdbbc27460f1e1f46fd4e Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Mon, 27 May 2024 19:17:09 -0400
Subject: bcachefs: Better fsck error message for key version

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/btree_gc.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index e9e901feda29..dc97991bcd6a 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -585,16 +585,17 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
 
 		if (fsck_err_on(k.k->version.lo > atomic64_read(&c->key_version), c,
 				bkey_version_in_future,
-				"key version number higher than recorded: %llu > %llu",
-				k.k->version.lo,
-				atomic64_read(&c->key_version)))
+				"key version number higher than recorded %llu\n  %s",
+				atomic64_read(&c->key_version),
+				(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
 			atomic64_set(&c->key_version, k.k->version.lo);
 	}
 
 	if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k),
 				c, btree_bitmap_not_marked,
 				"btree ptr not marked in member info btree allocated bitmap\n  %s",
-				(bch2_bkey_val_to_text(&buf, c, k),
+				(printbuf_reset(&buf),
+				 bch2_bkey_val_to_text(&buf, c, k),
 				 buf.buf))) {
 		mutex_lock(&c->sb_lock);
 		bch2_dev_btree_bitmap_mark(c, k);
-- 
cgit v1.2.3


From be647e2c76b27f409cdd520f66c95be888b553a3 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Tue, 21 May 2024 06:41:45 -0700
Subject: nvme: use srcu for iterating namespace list

The nvme pci driver synchronizes with all the namespace queues during a
reset to ensure that there's no pending timeout work.

Meanwhile the timeout work potentially iterates those same namespaces to
freeze their queues.

Each of those namespace iterations use the same read lock. If a write
lock should somehow get between the synchronize and freeze steps, then
forward progress is deadlocked.

We had been relying on the nvme controller state machine to ensure the
reset work wouldn't conflict with timeout work. That guarantee may be a
bit fragile to rely on, so iterate the namespace lists without taking
potentially circular locks, as reported by lockdep.

Link: https://lore.kernel.org/all/20220930001943.zdbvolc3gkekfmcv@shindev/
Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Tested-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/core.c      | 99 ++++++++++++++++++++++++++-----------------
 drivers/nvme/host/ioctl.c     | 15 ++++---
 drivers/nvme/host/multipath.c | 21 +++++----
 drivers/nvme/host/nvme.h      |  4 +-
 4 files changed, 83 insertions(+), 56 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7706df237349..f5d150c62955 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -678,7 +678,7 @@ static void nvme_free_ns(struct kref *kref)
 	kfree(ns);
 }
 
-static inline bool nvme_get_ns(struct nvme_ns *ns)
+bool nvme_get_ns(struct nvme_ns *ns)
 {
 	return kref_get_unless_zero(&ns->kref);
 }
@@ -3684,9 +3684,10 @@ out_unlock:
 struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 {
 	struct nvme_ns *ns, *ret = NULL;
+	int srcu_idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list) {
+	srcu_idx = srcu_read_lock(&ctrl->srcu);
+	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
 		if (ns->head->ns_id == nsid) {
 			if (!nvme_get_ns(ns))
 				continue;
@@ -3696,7 +3697,7 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 		if (ns->head->ns_id > nsid)
 			break;
 	}
-	up_read(&ctrl->namespaces_rwsem);
+	srcu_read_unlock(&ctrl->srcu, srcu_idx);
 	return ret;
 }
 EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
@@ -3710,7 +3711,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
 
 	list_for_each_entry_reverse(tmp, &ns->ctrl->namespaces, list) {
 		if (tmp->head->ns_id < ns->head->ns_id) {
-			list_add(&ns->list, &tmp->list);
+			list_add_rcu(&ns->list, &tmp->list);
 			return;
 		}
 	}
@@ -3776,17 +3777,18 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
 	if (nvme_update_ns_info(ns, info))
 		goto out_unlink_ns;
 
-	down_write(&ctrl->namespaces_rwsem);
+	mutex_lock(&ctrl->namespaces_lock);
 	/*
 	 * Ensure that no namespaces are added to the ctrl list after the queues
 	 * are frozen, thereby avoiding a deadlock between scan and reset.
 	 */
 	if (test_bit(NVME_CTRL_FROZEN, &ctrl->flags)) {
-		up_write(&ctrl->namespaces_rwsem);
+		mutex_unlock(&ctrl->namespaces_lock);
 		goto out_unlink_ns;
 	}
 	nvme_ns_add_to_ctrl_list(ns);
-	up_write(&ctrl->namespaces_rwsem);
+	mutex_unlock(&ctrl->namespaces_lock);
+	synchronize_srcu(&ctrl->srcu);
 	nvme_get_ctrl(ctrl);
 
 	if (device_add_disk(ctrl->device, ns->disk, nvme_ns_attr_groups))
@@ -3809,9 +3811,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
 
  out_cleanup_ns_from_list:
 	nvme_put_ctrl(ctrl);
-	down_write(&ctrl->namespaces_rwsem);
-	list_del_init(&ns->list);
-	up_write(&ctrl->namespaces_rwsem);
+	mutex_lock(&ctrl->namespaces_lock);
+	list_del_rcu(&ns->list);
+	mutex_unlock(&ctrl->namespaces_lock);
+	synchronize_srcu(&ctrl->srcu);
  out_unlink_ns:
 	mutex_lock(&ctrl->subsys->lock);
 	list_del_rcu(&ns->siblings);
@@ -3861,9 +3864,10 @@ static void nvme_ns_remove(struct nvme_ns *ns)
 		nvme_cdev_del(&ns->cdev, &ns->cdev_device);
 	del_gendisk(ns->disk);
 
-	down_write(&ns->ctrl->namespaces_rwsem);
-	list_del_init(&ns->list);
-	up_write(&ns->ctrl->namespaces_rwsem);
+	mutex_lock(&ns->ctrl->namespaces_lock);
+	list_del_rcu(&ns->list);
+	mutex_unlock(&ns->ctrl->namespaces_lock);
+	synchronize_srcu(&ns->ctrl->srcu);
 
 	if (last_path)
 		nvme_mpath_shutdown_disk(ns->head);
@@ -3953,16 +3957,17 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 	struct nvme_ns *ns, *next;
 	LIST_HEAD(rm_list);
 
-	down_write(&ctrl->namespaces_rwsem);
+	mutex_lock(&ctrl->namespaces_lock);
 	list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
 		if (ns->head->ns_id > nsid)
-			list_move_tail(&ns->list, &rm_list);
+			list_splice_init_rcu(&ns->list, &rm_list,
+					     synchronize_rcu);
 	}
-	up_write(&ctrl->namespaces_rwsem);
+	mutex_unlock(&ctrl->namespaces_lock);
+	synchronize_srcu(&ctrl->srcu);
 
 	list_for_each_entry_safe(ns, next, &rm_list, list)
 		nvme_ns_remove(ns);
-
 }
 
 static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
@@ -4132,9 +4137,10 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 	/* this is a no-op when called from the controller reset handler */
 	nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
 
-	down_write(&ctrl->namespaces_rwsem);
-	list_splice_init(&ctrl->namespaces, &ns_list);
-	up_write(&ctrl->namespaces_rwsem);
+	mutex_lock(&ctrl->namespaces_lock);
+	list_splice_init_rcu(&ctrl->namespaces, &ns_list, synchronize_rcu);
+	mutex_unlock(&ctrl->namespaces_lock);
+	synchronize_srcu(&ctrl->srcu);
 
 	list_for_each_entry_safe(ns, next, &ns_list, list)
 		nvme_ns_remove(ns);
@@ -4582,6 +4588,7 @@ static void nvme_free_ctrl(struct device *dev)
 	key_put(ctrl->tls_key);
 	nvme_free_cels(ctrl);
 	nvme_mpath_uninit(ctrl);
+	cleanup_srcu_struct(&ctrl->srcu);
 	nvme_auth_stop(ctrl);
 	nvme_auth_free(ctrl);
 	__free_page(ctrl->discard_page);
@@ -4614,10 +4621,15 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 	ctrl->passthru_err_log_enabled = false;
 	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
 	spin_lock_init(&ctrl->lock);
+	mutex_init(&ctrl->namespaces_lock);
+
+	ret = init_srcu_struct(&ctrl->srcu);
+	if (ret)
+		return ret;
+
 	mutex_init(&ctrl->scan_lock);
 	INIT_LIST_HEAD(&ctrl->namespaces);
 	xa_init(&ctrl->cels);
-	init_rwsem(&ctrl->namespaces_rwsem);
 	ctrl->dev = dev;
 	ctrl->ops = ops;
 	ctrl->quirks = quirks;
@@ -4697,6 +4709,7 @@ out_release_instance:
 out:
 	if (ctrl->discard_page)
 		__free_page(ctrl->discard_page);
+	cleanup_srcu_struct(&ctrl->srcu);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nvme_init_ctrl);
@@ -4705,22 +4718,24 @@ EXPORT_SYMBOL_GPL(nvme_init_ctrl);
 void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	int srcu_idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	srcu_idx = srcu_read_lock(&ctrl->srcu);
+	list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
 		blk_mark_disk_dead(ns->disk);
-	up_read(&ctrl->namespaces_rwsem);
+	srcu_read_unlock(&ctrl->srcu, srcu_idx);
 }
 EXPORT_SYMBOL_GPL(nvme_mark_namespaces_dead);
 
 void nvme_unfreeze(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	int srcu_idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	srcu_idx = srcu_read_lock(&ctrl->srcu);
+	list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
 		blk_mq_unfreeze_queue(ns->queue);
-	up_read(&ctrl->namespaces_rwsem);
+	srcu_read_unlock(&ctrl->srcu, srcu_idx);
 	clear_bit(NVME_CTRL_FROZEN, &ctrl->flags);
 }
 EXPORT_SYMBOL_GPL(nvme_unfreeze);
@@ -4728,14 +4743,15 @@ EXPORT_SYMBOL_GPL(nvme_unfreeze);
 int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
 {
 	struct nvme_ns *ns;
+	int srcu_idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list) {
+	srcu_idx = srcu_read_lock(&ctrl->srcu);
+	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
 		timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
 		if (timeout <= 0)
 			break;
 	}
-	up_read(&ctrl->namespaces_rwsem);
+	srcu_read_unlock(&ctrl->srcu, srcu_idx);
 	return timeout;
 }
 EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
@@ -4743,23 +4759,25 @@ EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
 void nvme_wait_freeze(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	int srcu_idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	srcu_idx = srcu_read_lock(&ctrl->srcu);
+	list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
 		blk_mq_freeze_queue_wait(ns->queue);
-	up_read(&ctrl->namespaces_rwsem);
+	srcu_read_unlock(&ctrl->srcu, srcu_idx);
 }
 EXPORT_SYMBOL_GPL(nvme_wait_freeze);
 
 void nvme_start_freeze(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	int srcu_idx;
 
 	set_bit(NVME_CTRL_FROZEN, &ctrl->flags);
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	srcu_idx = srcu_read_lock(&ctrl->srcu);
+	list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
 		blk_freeze_queue_start(ns->queue);
-	up_read(&ctrl->namespaces_rwsem);
+	srcu_read_unlock(&ctrl->srcu, srcu_idx);
 }
 EXPORT_SYMBOL_GPL(nvme_start_freeze);
 
@@ -4802,11 +4820,12 @@ EXPORT_SYMBOL_GPL(nvme_unquiesce_admin_queue);
 void nvme_sync_io_queues(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	int srcu_idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list)
+	srcu_idx = srcu_read_lock(&ctrl->srcu);
+	list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
 		blk_sync_queue(ns->queue);
-	up_read(&ctrl->namespaces_rwsem);
+	srcu_read_unlock(&ctrl->srcu, srcu_idx);
 }
 EXPORT_SYMBOL_GPL(nvme_sync_io_queues);
 
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 499a8bb7cac7..9d9d2a127c4e 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -789,15 +789,15 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
 		bool open_for_write)
 {
 	struct nvme_ns *ns;
-	int ret;
+	int ret, srcu_idx;
 
-	down_read(&ctrl->namespaces_rwsem);
+	srcu_idx = srcu_read_lock(&ctrl->srcu);
 	if (list_empty(&ctrl->namespaces)) {
 		ret = -ENOTTY;
 		goto out_unlock;
 	}
 
-	ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
+	ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list);
 	if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
 		dev_warn(ctrl->device,
 			"NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
@@ -807,15 +807,18 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
 
 	dev_warn(ctrl->device,
 		"using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
-	kref_get(&ns->kref);
-	up_read(&ctrl->namespaces_rwsem);
+	if (!nvme_get_ns(ns)) {
+		ret = -ENXIO;
+		goto out_unlock;
+	}
+	srcu_read_unlock(&ctrl->srcu, srcu_idx);
 
 	ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write);
 	nvme_put_ns(ns);
 	return ret;
 
 out_unlock:
-	up_read(&ctrl->namespaces_rwsem);
+	srcu_read_unlock(&ctrl->srcu, srcu_idx);
 	return ret;
 }
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 1bee176fd850..d8b6b4648eaf 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -151,16 +151,17 @@ void nvme_mpath_end_request(struct request *rq)
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	int srcu_idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list) {
+	srcu_idx = srcu_read_lock(&ctrl->srcu);
+	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
 		if (!ns->head->disk)
 			continue;
 		kblockd_schedule_work(&ns->head->requeue_work);
 		if (nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE)
 			disk_uevent(ns->head->disk, KOBJ_CHANGE);
 	}
-	up_read(&ctrl->namespaces_rwsem);
+	srcu_read_unlock(&ctrl->srcu, srcu_idx);
 }
 
 static const char *nvme_ana_state_names[] = {
@@ -194,13 +195,14 @@ out:
 void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns;
+	int srcu_idx;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list) {
+	srcu_idx = srcu_read_lock(&ctrl->srcu);
+	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
 		nvme_mpath_clear_current_path(ns);
 		kblockd_schedule_work(&ns->head->requeue_work);
 	}
-	up_read(&ctrl->namespaces_rwsem);
+	srcu_read_unlock(&ctrl->srcu, srcu_idx);
 }
 
 void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
@@ -681,6 +683,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
 	u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
 	unsigned *nr_change_groups = data;
 	struct nvme_ns *ns;
+	int srcu_idx;
 
 	dev_dbg(ctrl->device, "ANA group %d: %s.\n",
 			le32_to_cpu(desc->grpid),
@@ -692,8 +695,8 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
 	if (!nr_nsids)
 		return 0;
 
-	down_read(&ctrl->namespaces_rwsem);
-	list_for_each_entry(ns, &ctrl->namespaces, list) {
+	srcu_idx = srcu_read_lock(&ctrl->srcu);
+	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
 		unsigned nsid;
 again:
 		nsid = le32_to_cpu(desc->nsids[n]);
@@ -706,7 +709,7 @@ again:
 		if (ns->head->ns_id > nsid)
 			goto again;
 	}
-	up_read(&ctrl->namespaces_rwsem);
+	srcu_read_unlock(&ctrl->srcu, srcu_idx);
 	return 0;
 }
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index c43a30753d87..f3a41133ac3f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -282,7 +282,8 @@ struct nvme_ctrl {
 	struct blk_mq_tag_set *tagset;
 	struct blk_mq_tag_set *admin_tagset;
 	struct list_head namespaces;
-	struct rw_semaphore namespaces_rwsem;
+	struct mutex namespaces_lock;
+	struct srcu_struct srcu;
 	struct device ctrl_device;
 	struct device *device;	/* char device */
 #ifdef CONFIG_NVME_HWMON
@@ -1160,6 +1161,7 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
 		       struct nvme_command *cmd, int status);
 struct nvme_ctrl *nvme_ctrl_from_file(struct file *file);
 struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid);
+bool nvme_get_ns(struct nvme_ns *ns);
 void nvme_put_ns(struct nvme_ns *ns);
 
 static inline bool nvme_multi_css(struct nvme_ctrl *ctrl)
-- 
cgit v1.2.3


From c758b77d4a0a0ed3a1292b3fd7a2aeccd1a169a4 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 27 May 2024 22:38:52 +0300
Subject: nvmet: fix a possible leak when destroy a ctrl during qp
 establishment

In nvmet_sq_destroy we capture sq->ctrl early and if it is non-NULL we
know that a ctrl was allocated (in the admin connect request handler)
and we need to release pending AERs, clear ctrl->sqs and sq->ctrl
(for nvme-loop primarily), and drop the final reference on the ctrl.

However, a small window is possible where nvmet_sq_destroy starts (as
a result of the client giving up and disconnecting) concurrently with
the nvme admin connect cmd (which may be in an early stage). But *before*
kill_and_confirm of sq->ref (i.e. the admin connect managed to get an sq
live reference). In this case, sq->ctrl was allocated however after it was
captured in a local variable in nvmet_sq_destroy.
This prevented the final reference drop on the ctrl.

Solve this by re-capturing the sq->ctrl after all inflight request has
completed, where for sure sq->ctrl reference is final, and move forward
based on that.

This issue was observed in an environment with many hosts connecting
multiple ctrls simoutanuosly, creating a delay in allocating a ctrl
leading up to this race window.

Reported-by: Alex Turin <alex@vastdata.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/target/core.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 2fde22323622..06f0c587f343 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -818,6 +818,15 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
 	percpu_ref_exit(&sq->ref);
 	nvmet_auth_sq_free(sq);
 
+	/*
+	 * we must reference the ctrl again after waiting for inflight IO
+	 * to complete. Because admin connect may have sneaked in after we
+	 * store sq->ctrl locally, but before we killed the percpu_ref. the
+	 * admin connect allocates and assigns sq->ctrl, which now needs a
+	 * final ref put, as this ctrl is going away.
+	 */
+	ctrl = sq->ctrl;
+
 	if (ctrl) {
 		/*
 		 * The teardown flow may take some time, and the host may not
-- 
cgit v1.2.3


From a0fc1a053b7a212244ff109e44469b8deff280c5 Mon Sep 17 00:00:00 2001
From: Jeff Johnson <quic_jjohnson@quicinc.com>
Date: Fri, 24 May 2024 16:58:26 -0700
Subject: of: of_test: add MODULE_DESCRIPTION()

Fix the 'make W=1' warning:
WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/of/of_test.o

Signed-off-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Link: https://lore.kernel.org/r/20240524-md-of-of_test-v1-1-6ebd078d620f@quicinc.com
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 drivers/of/of_test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/of/of_test.c b/drivers/of/of_test.c
index a9301d293f01..c85a258bc6ae 100644
--- a/drivers/of/of_test.c
+++ b/drivers/of/of_test.c
@@ -54,4 +54,5 @@ static struct kunit_suite of_dtb_suite = {
 kunit_test_suites(
 	&of_dtb_suite,
 );
+MODULE_DESCRIPTION("KUnit tests for OF APIs");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From ac4b069035783f7a54b3ab841119f4b6bf435f98 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 28 May 2024 15:10:28 -0300
Subject: tools arch x86: Sync the msr-index.h copy with the kernel sources

To pick up the changes from these csets:

  53bc516ade85a764 ("x86/msr: Move ARCH_CAP_XAPIC_DISABLE bit definition to its rightful place")

That patch just move definitions around, so this just silences this perf
build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Link: https://lore.kernel.org/lkml/ZlYe8jOzd1_DyA7X@x1
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/msr-index.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index e72c2b872957..e022e6eb766c 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -170,6 +170,10 @@
 						 * CPU is not affected by Branch
 						 * History Injection.
 						 */
+#define ARCH_CAP_XAPIC_DISABLE		BIT(21)	/*
+						 * IA32_XAPIC_DISABLE_STATUS MSR
+						 * supported
+						 */
 #define ARCH_CAP_PBRSB_NO		BIT(24)	/*
 						 * Not susceptible to Post-Barrier
 						 * Return Stack Buffer Predictions.
@@ -192,11 +196,6 @@
 						 * File.
 						 */
 
-#define ARCH_CAP_XAPIC_DISABLE		BIT(21)	/*
-						 * IA32_XAPIC_DISABLE_STATUS MSR
-						 * supported
-						 */
-
 #define MSR_IA32_FLUSH_CMD		0x0000010b
 #define L1D_FLUSH			BIT(0)	/*
 						 * Writeback and invalidate the
-- 
cgit v1.2.3


From 88e520512a68b4e724cb0f4281c79ae28673ccb1 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 28 May 2024 16:24:15 -0300
Subject: tools headers UAPI: Sync kvm headers with the kernel sources

To pick the changes in:

  4af663c2f64a8d25 ("KVM: SEV: Allow per-guest configuration of GHCB protocol version")
  4f5defae708992dd ("KVM: SEV: introduce KVM_SEV_INIT2 operation")
  26c44aa9e076ed83 ("KVM: SEV: define VM types for SEV and SEV-ES")
  ac5c48027bacb1b5 ("KVM: SEV: publish supported VMSA features")
  651d61bc8b7d8bb6 ("KVM: PPC: Fix documentation for ppc mmu caps")

That don't change functionality in tools/perf, as no new ioctl is added
for the 'perf trace' scripts to harvest.

This addresses these perf build warnings:

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h
    diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Joel Stanley <joel@jms.id.au>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michael Roth <michael.roth@amd.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lore.kernel.org/lkml/ZlYxAdHjyAkvGtMW@x1
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/uapi/asm/kvm.h | 22 ++++++++++++++++++++--
 tools/include/uapi/linux/kvm.h        |  4 ++--
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index ef11aa4cab42..9fae1b73b529 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -457,8 +457,13 @@ struct kvm_sync_regs {
 
 #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE	0x00000001
 
-/* attributes for system fd (group 0) */
-#define KVM_X86_XCOMP_GUEST_SUPP	0
+/* vendor-independent attributes for system fd (group 0) */
+#define KVM_X86_GRP_SYSTEM		0
+#  define KVM_X86_XCOMP_GUEST_SUPP	0
+
+/* vendor-specific groups and attributes for system fd */
+#define KVM_X86_GRP_SEV			1
+#  define KVM_X86_SEV_VMSA_FEATURES	0
 
 struct kvm_vmx_nested_state_data {
 	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
@@ -689,6 +694,9 @@ enum sev_cmd_id {
 	/* Guest Migration Extension */
 	KVM_SEV_SEND_CANCEL,
 
+	/* Second time is the charm; improved versions of the above ioctls.  */
+	KVM_SEV_INIT2,
+
 	KVM_SEV_NR_MAX,
 };
 
@@ -700,6 +708,14 @@ struct kvm_sev_cmd {
 	__u32 sev_fd;
 };
 
+struct kvm_sev_init {
+	__u64 vmsa_features;
+	__u32 flags;
+	__u16 ghcb_version;
+	__u16 pad1;
+	__u32 pad2[8];
+};
+
 struct kvm_sev_launch_start {
 	__u32 handle;
 	__u32 policy;
@@ -856,5 +872,7 @@ struct kvm_hyperv_eventfd {
 
 #define KVM_X86_DEFAULT_VM	0
 #define KVM_X86_SW_PROTECTED_VM	1
+#define KVM_X86_SEV_VM		2
+#define KVM_X86_SEV_ES_VM	3
 
 #endif /* _ASM_X86_KVM_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index ea32b101b999..d03842abae57 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1221,9 +1221,9 @@ struct kvm_vfio_spapr_tce {
 /* Available with KVM_CAP_SPAPR_RESIZE_HPT */
 #define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt)
 #define KVM_PPC_RESIZE_HPT_COMMIT  _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt)
-/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_MMU_HASH_V3 */
+/* Available with KVM_CAP_PPC_MMU_RADIX or KVM_CAP_PPC_MMU_HASH_V3 */
 #define KVM_PPC_CONFIGURE_V3_MMU  _IOW(KVMIO,  0xaf, struct kvm_ppc_mmuv3_cfg)
-/* Available with KVM_CAP_PPC_RADIX_MMU */
+/* Available with KVM_CAP_PPC_MMU_RADIX */
 #define KVM_PPC_GET_RMMU_INFO	  _IOW(KVMIO,  0xb0, struct kvm_ppc_rmmu_info)
 /* Available with KVM_CAP_PPC_GET_CPU_CHAR */
 #define KVM_PPC_GET_CPU_CHAR	  _IOR(KVMIO,  0xb1, struct kvm_ppc_cpu_char)
-- 
cgit v1.2.3


From c7a5096781732e0f9784551309484f3e103f6750 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Tue, 28 May 2024 13:25:02 +0300
Subject: PNP: Make dev_is_pnp() to be a function and export it for modules

Since we have a dev_is_pnp() macro that utilises the address of the
pnp_bus_type variable, the users, which can be compiled as modules,
will fail to build. Convert the macro to be a function and export it
to the modules to prevent build breakage.

Reported-by: Woody Suwalski <terraluna977@gmail.com>
Closes: https://lore.kernel.org/r/cc8a93b2-2504-9754-e26c-5d5c3bd1265c@gmail.com
Fixes: 2a49b45cd0e7 ("PNP: Add dev_is_pnp() macro")
Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pnp/driver.c | 6 ++++++
 include/linux/pnp.h  | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index 0a5d0d8befa8..3483e52e3a81 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -266,6 +266,12 @@ const struct bus_type pnp_bus_type = {
 	.dev_groups = pnp_dev_groups,
 };
 
+bool dev_is_pnp(const struct device *dev)
+{
+	return dev->bus == &pnp_bus_type;
+}
+EXPORT_SYMBOL_GPL(dev_is_pnp);
+
 int pnp_register_driver(struct pnp_driver *drv)
 {
 	drv->driver.name = drv->name;
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index 82561242cda4..a8def1cea32c 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -469,7 +469,7 @@ int compare_pnp_id(struct pnp_id *pos, const char *id);
 int pnp_register_driver(struct pnp_driver *drv);
 void pnp_unregister_driver(struct pnp_driver *drv);
 
-#define dev_is_pnp(d) ((d)->bus == &pnp_bus_type)
+bool dev_is_pnp(const struct device *dev);
 
 #else
 
@@ -502,7 +502,7 @@ static inline int compare_pnp_id(struct pnp_id *pos, const char *id) { return -E
 static inline int pnp_register_driver(struct pnp_driver *drv) { return -ENODEV; }
 static inline void pnp_unregister_driver(struct pnp_driver *drv) { }
 
-#define dev_is_pnp(d) false
+static inline bool dev_is_pnp(const struct device *dev) { return false; }
 
 #endif /* CONFIG_PNP */
 
-- 
cgit v1.2.3


From edcde848c01eb071a91d479a6b3101d9cf48e905 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Tue, 28 May 2024 13:25:03 +0300
Subject: PNP: Hide pnp_bus_type from the non-PNP code

The pnp_bus_type is defined only when CONFIG_PNP=y, while being
not guarded by ifdeffery in the header. Moreover, it's not used
outside of the PNP code. Move it to the internal header to make
sure no-one will try to (ab)use it.

Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pnp/base.h  | 1 +
 include/linux/pnp.h | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h
index e74a0f6a3157..4e80273dfb1e 100644
--- a/drivers/pnp/base.h
+++ b/drivers/pnp/base.h
@@ -6,6 +6,7 @@
 
 extern struct mutex pnp_lock;
 extern const struct attribute_group *pnp_dev_groups[];
+extern const struct bus_type pnp_bus_type;
 
 int pnp_register_protocol(struct pnp_protocol *protocol);
 void pnp_unregister_protocol(struct pnp_protocol *protocol);
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index a8def1cea32c..7f2ff95d2deb 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -435,8 +435,6 @@ struct pnp_protocol {
 #define protocol_for_each_dev(protocol, dev)	\
 	list_for_each_entry(dev, &(protocol)->devices, protocol_list)
 
-extern const struct bus_type pnp_bus_type;
-
 #if defined(CONFIG_PNP)
 
 /* device management */
-- 
cgit v1.2.3


From 2f523f29d3b19a668b8d4ce6f768d8faff976b3a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 28 May 2024 16:54:31 -0300
Subject: tools headers UAPI: Update i915_drm.h with the kernel sources

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/lkml/
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/drm/i915_drm.h | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 2ee338860b7e..d4d86e566e07 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -806,6 +806,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_PXP_STATUS		 58
 
+/*
+ * Query if kernel allows marking a context to send a Freq hint to SLPC. This
+ * will enable use of the strategies allowed by the SLPC algorithm.
+ */
+#define I915_PARAM_HAS_CONTEXT_FREQ_HINT	59
+
 /* Must be kept compact -- no holes and well documented */
 
 /**
@@ -2148,6 +2154,15 @@ struct drm_i915_gem_context_param {
  * -EIO: The firmware did not succeed in creating the protected context.
  */
 #define I915_CONTEXT_PARAM_PROTECTED_CONTENT    0xd
+
+/*
+ * I915_CONTEXT_PARAM_LOW_LATENCY:
+ *
+ * Mark this context as a low latency workload which requires aggressive GT
+ * frequency scaling. Use I915_PARAM_HAS_CONTEXT_FREQ_HINT to check if the kernel
+ * supports this per context flag.
+ */
+#define I915_CONTEXT_PARAM_LOW_LATENCY		0xe
 /* Must be kept compact -- no holes and well documented */
 
 	/** @value: Context parameter value to be set or queried */
@@ -2623,19 +2638,29 @@ struct drm_i915_reg_read {
  *
  */
 
+/*
+ * struct drm_i915_reset_stats - Return global reset and other context stats
+ *
+ * Driver keeps few stats for each contexts and also global reset count.
+ * This struct can be used to query those stats.
+ */
 struct drm_i915_reset_stats {
+	/** @ctx_id: ID of the requested context */
 	__u32 ctx_id;
+
+	/** @flags: MBZ */
 	__u32 flags;
 
-	/* All resets since boot/module reload, for all contexts */
+	/** @reset_count: All resets since boot/module reload, for all contexts */
 	__u32 reset_count;
 
-	/* Number of batches lost when active in GPU, for this context */
+	/** @batch_active: Number of batches lost when active in GPU, for this context */
 	__u32 batch_active;
 
-	/* Number of batches lost pending for execution, for this context */
+	/** @batch_pending: Number of batches lost pending for execution, for this context */
 	__u32 batch_pending;
 
+	/** @pad: MBZ */
 	__u32 pad;
 };
 
-- 
cgit v1.2.3


From ac62f52138f752d6c74adc6321e4996d84caf5bb Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <linux@weissschuh.net>
Date: Sun, 26 May 2024 23:40:01 +0200
Subject: ACPI: AC: Properly notify powermanagement core about changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The powermanagement core does various actions when a powersupply changes.
It calls into notifiers, LED triggers, other power supplies and emits an uevent.

To make sure that all these actions happen properly call power_supply_changed().

Reported-by: Rajas Paranjpe <paranjperajas@gmail.com>
Closes: https://github.com/MrChromebox/firmware/issues/420#issuecomment-2132251318
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ac.c  | 4 ++--
 drivers/acpi/sbs.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 2d4a35e6dd18..09a87fa222c7 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -145,7 +145,7 @@ static void acpi_ac_notify(acpi_handle handle, u32 event, void *data)
 						  dev_name(&adev->dev), event,
 						  (u32) ac->state);
 		acpi_notifier_call_chain(adev, event, (u32) ac->state);
-		kobject_uevent(&ac->charger->dev.kobj, KOBJ_CHANGE);
+		power_supply_changed(ac->charger);
 	}
 }
 
@@ -268,7 +268,7 @@ static int acpi_ac_resume(struct device *dev)
 	if (acpi_ac_get_state(ac))
 		return 0;
 	if (old_state != ac->state)
-		kobject_uevent(&ac->charger->dev.kobj, KOBJ_CHANGE);
+		power_supply_changed(ac->charger);
 
 	return 0;
 }
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 94e3c000df2e..dc8164b182dc 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -610,7 +610,7 @@ static void acpi_sbs_callback(void *context)
 	if (sbs->charger_exists) {
 		acpi_ac_get_present(sbs);
 		if (sbs->charger_present != saved_charger_state)
-			kobject_uevent(&sbs->charger->dev.kobj, KOBJ_CHANGE);
+			power_supply_changed(sbs->charger);
 	}
 
 	if (sbs->manager_present) {
@@ -622,7 +622,7 @@ static void acpi_sbs_callback(void *context)
 			acpi_battery_read(bat);
 			if (saved_battery_state == bat->present)
 				continue;
-			kobject_uevent(&bat->bat->dev.kobj, KOBJ_CHANGE);
+			power_supply_changed(bat->bat);
 		}
 	}
 }
-- 
cgit v1.2.3


From 779b8a14afde110dd3502566be907289eba72447 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 May 2024 14:09:23 +0200
Subject: cpufreq: amd-pstate: remove global header file

When extra warnings are enabled, gcc points out a global variable
definition in a header:

In file included from drivers/cpufreq/amd-pstate-ut.c:29:
include/linux/amd-pstate.h:123:27: error: 'amd_pstate_mode_string' defined but not used [-Werror=unused-const-variable=]
  123 | static const char * const amd_pstate_mode_string[] = {
      |                           ^~~~~~~~~~~~~~~~~~~~~~

This header is only included from two files in the same directory,
and one of them uses only a single definition from it, so clean it
up by moving most of the contents into the driver that uses them,
and making shared bits a local header file.

Fixes: 36c5014e5460 ("cpufreq: amd-pstate: optimize driver working mode selection in amd_pstate_param()")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 MAINTAINERS                     |   1 -
 drivers/cpufreq/amd-pstate-ut.c |   3 +-
 drivers/cpufreq/amd-pstate.c    |  34 +++++++++-
 drivers/cpufreq/amd-pstate.h    | 104 ++++++++++++++++++++++++++++++
 include/linux/amd-pstate.h      | 137 ----------------------------------------
 5 files changed, 139 insertions(+), 140 deletions(-)
 create mode 100644 drivers/cpufreq/amd-pstate.h
 delete mode 100644 include/linux/amd-pstate.h

diff --git a/MAINTAINERS b/MAINTAINERS
index d6c90161c7bf..fc31870379f8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1107,7 +1107,6 @@ L:	linux-pm@vger.kernel.org
 S:	Supported
 F:	Documentation/admin-guide/pm/amd-pstate.rst
 F:	drivers/cpufreq/amd-pstate*
-F:	include/linux/amd-pstate.h
 F:	tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py
 
 AMD PTDMA DRIVER
diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
index f04ae67dda37..fc275d41d51e 100644
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -26,10 +26,11 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/fs.h>
-#include <linux/amd-pstate.h>
 
 #include <acpi/cppc_acpi.h>
 
+#include "amd-pstate.h"
+
 /*
  * Abbreviations:
  * amd_pstate_ut: used as a shortform for AMD P-State unit test.
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 1b7e82a0ad2e..91993647e09e 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -36,7 +36,6 @@
 #include <linux/delay.h>
 #include <linux/uaccess.h>
 #include <linux/static_call.h>
-#include <linux/amd-pstate.h>
 #include <linux/topology.h>
 
 #include <acpi/processor.h>
@@ -46,6 +45,8 @@
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_device_id.h>
+
+#include "amd-pstate.h"
 #include "amd-pstate-trace.h"
 
 #define AMD_PSTATE_TRANSITION_LATENCY	20000
@@ -53,6 +54,37 @@
 #define CPPC_HIGHEST_PERF_PERFORMANCE	196
 #define CPPC_HIGHEST_PERF_DEFAULT	166
 
+#define AMD_CPPC_EPP_PERFORMANCE		0x00
+#define AMD_CPPC_EPP_BALANCE_PERFORMANCE	0x80
+#define AMD_CPPC_EPP_BALANCE_POWERSAVE		0xBF
+#define AMD_CPPC_EPP_POWERSAVE			0xFF
+
+/*
+ * enum amd_pstate_mode - driver working mode of amd pstate
+ */
+enum amd_pstate_mode {
+	AMD_PSTATE_UNDEFINED = 0,
+	AMD_PSTATE_DISABLE,
+	AMD_PSTATE_PASSIVE,
+	AMD_PSTATE_ACTIVE,
+	AMD_PSTATE_GUIDED,
+	AMD_PSTATE_MAX,
+};
+
+static const char * const amd_pstate_mode_string[] = {
+	[AMD_PSTATE_UNDEFINED]   = "undefined",
+	[AMD_PSTATE_DISABLE]     = "disable",
+	[AMD_PSTATE_PASSIVE]     = "passive",
+	[AMD_PSTATE_ACTIVE]      = "active",
+	[AMD_PSTATE_GUIDED]      = "guided",
+	NULL,
+};
+
+struct quirk_entry {
+	u32 nominal_freq;
+	u32 lowest_freq;
+};
+
 /*
  * TODO: We need more time to fine tune processors with shared memory solution
  * with community together.
diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
new file mode 100644
index 000000000000..e6a28e7f4dbf
--- /dev/null
+++ b/drivers/cpufreq/amd-pstate.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ *
+ * Author: Meng Li <li.meng@amd.com>
+ */
+
+#ifndef _LINUX_AMD_PSTATE_H
+#define _LINUX_AMD_PSTATE_H
+
+#include <linux/pm_qos.h>
+
+/*********************************************************************
+ *                        AMD P-state INTERFACE                       *
+ *********************************************************************/
+/**
+ * struct  amd_aperf_mperf
+ * @aperf: actual performance frequency clock count
+ * @mperf: maximum performance frequency clock count
+ * @tsc:   time stamp counter
+ */
+struct amd_aperf_mperf {
+	u64 aperf;
+	u64 mperf;
+	u64 tsc;
+};
+
+/**
+ * struct amd_cpudata - private CPU data for AMD P-State
+ * @cpu: CPU number
+ * @req: constraint request to apply
+ * @cppc_req_cached: cached performance request hints
+ * @highest_perf: the maximum performance an individual processor may reach,
+ *		  assuming ideal conditions
+ *		  For platforms that do not support the preferred core feature, the
+ *		  highest_pef may be configured with 166 or 255, to avoid max frequency
+ *		  calculated wrongly. we take the fixed value as the highest_perf.
+ * @nominal_perf: the maximum sustained performance level of the processor,
+ *		  assuming ideal operating conditions
+ * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
+ *			   savings are achieved
+ * @lowest_perf: the absolute lowest performance level of the processor
+ * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
+ * 		  priority.
+ * @min_limit_perf: Cached value of the performance corresponding to policy->min
+ * @max_limit_perf: Cached value of the performance corresponding to policy->max
+ * @min_limit_freq: Cached value of policy->min (in khz)
+ * @max_limit_freq: Cached value of policy->max (in khz)
+ * @max_freq: the frequency (in khz) that mapped to highest_perf
+ * @min_freq: the frequency (in khz) that mapped to lowest_perf
+ * @nominal_freq: the frequency (in khz) that mapped to nominal_perf
+ * @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf
+ * @cur: Difference of Aperf/Mperf/tsc count between last and current sample
+ * @prev: Last Aperf/Mperf/tsc count value read from register
+ * @freq: current cpu frequency value (in khz)
+ * @boost_supported: check whether the Processor or SBIOS supports boost mode
+ * @hw_prefcore: check whether HW supports preferred core featue.
+ * 		  Only when hw_prefcore and early prefcore param are true,
+ * 		  AMD P-State driver supports preferred core featue.
+ * @epp_policy: Last saved policy used to set energy-performance preference
+ * @epp_cached: Cached CPPC energy-performance preference value
+ * @policy: Cpufreq policy value
+ * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value
+ *
+ * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
+ * represents all the attributes and goals that AMD P-State requests at runtime.
+ */
+struct amd_cpudata {
+	int	cpu;
+
+	struct	freq_qos_request req[2];
+	u64	cppc_req_cached;
+
+	u32	highest_perf;
+	u32	nominal_perf;
+	u32	lowest_nonlinear_perf;
+	u32	lowest_perf;
+	u32     prefcore_ranking;
+	u32     min_limit_perf;
+	u32     max_limit_perf;
+	u32     min_limit_freq;
+	u32     max_limit_freq;
+
+	u32	max_freq;
+	u32	min_freq;
+	u32	nominal_freq;
+	u32	lowest_nonlinear_freq;
+
+	struct amd_aperf_mperf cur;
+	struct amd_aperf_mperf prev;
+
+	u64	freq;
+	bool	boost_supported;
+	bool	hw_prefcore;
+
+	/* EPP feature related attributes*/
+	s16	epp_policy;
+	s16	epp_cached;
+	u32	policy;
+	u64	cppc_cap1_cached;
+	bool	suspended;
+};
+
+#endif /* _LINUX_AMD_PSTATE_H */
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
deleted file mode 100644
index d58fc022ec46..000000000000
--- a/include/linux/amd-pstate.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/include/linux/amd-pstate.h
- *
- * Copyright (C) 2022 Advanced Micro Devices, Inc.
- *
- * Author: Meng Li <li.meng@amd.com>
- */
-
-#ifndef _LINUX_AMD_PSTATE_H
-#define _LINUX_AMD_PSTATE_H
-
-#include <linux/pm_qos.h>
-
-#define AMD_CPPC_EPP_PERFORMANCE		0x00
-#define AMD_CPPC_EPP_BALANCE_PERFORMANCE	0x80
-#define AMD_CPPC_EPP_BALANCE_POWERSAVE		0xBF
-#define AMD_CPPC_EPP_POWERSAVE			0xFF
-
-/*********************************************************************
- *                        AMD P-state INTERFACE                       *
- *********************************************************************/
-/**
- * struct  amd_aperf_mperf
- * @aperf: actual performance frequency clock count
- * @mperf: maximum performance frequency clock count
- * @tsc:   time stamp counter
- */
-struct amd_aperf_mperf {
-	u64 aperf;
-	u64 mperf;
-	u64 tsc;
-};
-
-/**
- * struct amd_cpudata - private CPU data for AMD P-State
- * @cpu: CPU number
- * @req: constraint request to apply
- * @cppc_req_cached: cached performance request hints
- * @highest_perf: the maximum performance an individual processor may reach,
- *		  assuming ideal conditions
- *		  For platforms that do not support the preferred core feature, the
- *		  highest_pef may be configured with 166 or 255, to avoid max frequency
- *		  calculated wrongly. we take the fixed value as the highest_perf.
- * @nominal_perf: the maximum sustained performance level of the processor,
- *		  assuming ideal operating conditions
- * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
- *			   savings are achieved
- * @lowest_perf: the absolute lowest performance level of the processor
- * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
- * 		  priority.
- * @min_limit_perf: Cached value of the performance corresponding to policy->min
- * @max_limit_perf: Cached value of the performance corresponding to policy->max
- * @min_limit_freq: Cached value of policy->min (in khz)
- * @max_limit_freq: Cached value of policy->max (in khz)
- * @max_freq: the frequency (in khz) that mapped to highest_perf
- * @min_freq: the frequency (in khz) that mapped to lowest_perf
- * @nominal_freq: the frequency (in khz) that mapped to nominal_perf
- * @lowest_nonlinear_freq: the frequency (in khz) that mapped to lowest_nonlinear_perf
- * @cur: Difference of Aperf/Mperf/tsc count between last and current sample
- * @prev: Last Aperf/Mperf/tsc count value read from register
- * @freq: current cpu frequency value (in khz)
- * @boost_supported: check whether the Processor or SBIOS supports boost mode
- * @hw_prefcore: check whether HW supports preferred core featue.
- * 		  Only when hw_prefcore and early prefcore param are true,
- * 		  AMD P-State driver supports preferred core featue.
- * @epp_policy: Last saved policy used to set energy-performance preference
- * @epp_cached: Cached CPPC energy-performance preference value
- * @policy: Cpufreq policy value
- * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value
- *
- * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
- * represents all the attributes and goals that AMD P-State requests at runtime.
- */
-struct amd_cpudata {
-	int	cpu;
-
-	struct	freq_qos_request req[2];
-	u64	cppc_req_cached;
-
-	u32	highest_perf;
-	u32	nominal_perf;
-	u32	lowest_nonlinear_perf;
-	u32	lowest_perf;
-	u32     prefcore_ranking;
-	u32     min_limit_perf;
-	u32     max_limit_perf;
-	u32     min_limit_freq;
-	u32     max_limit_freq;
-
-	u32	max_freq;
-	u32	min_freq;
-	u32	nominal_freq;
-	u32	lowest_nonlinear_freq;
-
-	struct amd_aperf_mperf cur;
-	struct amd_aperf_mperf prev;
-
-	u64	freq;
-	bool	boost_supported;
-	bool	hw_prefcore;
-
-	/* EPP feature related attributes*/
-	s16	epp_policy;
-	s16	epp_cached;
-	u32	policy;
-	u64	cppc_cap1_cached;
-	bool	suspended;
-};
-
-/*
- * enum amd_pstate_mode - driver working mode of amd pstate
- */
-enum amd_pstate_mode {
-	AMD_PSTATE_UNDEFINED = 0,
-	AMD_PSTATE_DISABLE,
-	AMD_PSTATE_PASSIVE,
-	AMD_PSTATE_ACTIVE,
-	AMD_PSTATE_GUIDED,
-	AMD_PSTATE_MAX,
-};
-
-static const char * const amd_pstate_mode_string[] = {
-	[AMD_PSTATE_UNDEFINED]   = "undefined",
-	[AMD_PSTATE_DISABLE]     = "disable",
-	[AMD_PSTATE_PASSIVE]     = "passive",
-	[AMD_PSTATE_ACTIVE]      = "active",
-	[AMD_PSTATE_GUIDED]      = "guided",
-	NULL,
-};
-
-struct quirk_entry {
-	u32 nominal_freq;
-	u32 lowest_freq;
-};
-
-#endif /* _LINUX_AMD_PSTATE_H */
-- 
cgit v1.2.3


From e4731baaf29438508197d3a8a6d4f5a8c51663f8 Mon Sep 17 00:00:00 2001
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Date: Mon, 27 May 2024 10:41:28 +0530
Subject: cpufreq: amd-pstate: Fix the inconsistency in max frequency units

The nominal frequency in cpudata is maintained in MHz whereas all other
frequencies are in KHz. This means we have to convert nominal frequency
value to KHz before we do any interaction with other frequency values.

In amd_pstate_set_boost(), this conversion from MHz to KHz is missed,
fix that.

Tested on a AMD Zen4 EPYC server

Before:
$ cat /sys/devices/system/cpu/cpufreq/policy*/scaling_max_freq | uniq
2151
$ cat /sys/devices/system/cpu/cpufreq/policy*/cpuinfo_min_freq | uniq
400000
$ cat /sys/devices/system/cpu/cpufreq/policy*/scaling_cur_freq | uniq
2151
409422

After:
$ cat /sys/devices/system/cpu/cpufreq/policy*/scaling_max_freq | uniq
2151000
$ cat /sys/devices/system/cpu/cpufreq/policy*/cpuinfo_min_freq | uniq
400000
$ cat /sys/devices/system/cpu/cpufreq/policy*/scaling_cur_freq | uniq
2151000
1799527

Fixes: ec437d71db77 ("cpufreq: amd-pstate: Introduce a new AMD P-State driver to support future processors")
Signed-off-by: Dhananjay Ugwekar <Dhananjay.Ugwekar@amd.com>
Acked-by: Mario Limonciello <mario.limonciello@amd.com>
Acked-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Tested-by: Peter Jung <ptr1337@cachyos.org>
Cc: 5.17+ <stable@vger.kernel.org> # 5.17+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/amd-pstate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 91993647e09e..9ad62dbe8bfb 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -701,7 +701,7 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
 	if (state)
 		policy->cpuinfo.max_freq = cpudata->max_freq;
 	else
-		policy->cpuinfo.max_freq = cpudata->nominal_freq;
+		policy->cpuinfo.max_freq = cpudata->nominal_freq * 1000;
 
 	policy->max = policy->cpuinfo.max_freq;
 
-- 
cgit v1.2.3


From 016c22e410c6eabfc5164f514d2a0ad06eddf3ba Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Tue, 28 May 2024 16:47:34 -0400
Subject: bcachefs: split out sb-members_format.h

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bcachefs_format.h   | 102 +------------------------------------
 fs/bcachefs/sb-members_format.h | 110 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+), 101 deletions(-)
 create mode 100644 fs/bcachefs/sb-members_format.h

diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index d801e19cb489..c4d10b528b54 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -513,6 +513,7 @@ struct bch_sb_field {
 #include "snapshot_format.h"
 #include "subvolume_format.h"
 #include "sb-counters_format.h"
+#include "sb-members_format.h"
 
 enum bch_sb_field_type {
 #define x(f, nr)	BCH_SB_FIELD_##f = nr,
@@ -545,106 +546,6 @@ struct bch_sb_field_journal_v2 {
 	}			d[];
 };
 
-/* BCH_SB_FIELD_members_v1: */
-
-#define BCH_MIN_NR_NBUCKETS	(1 << 6)
-
-#define BCH_IOPS_MEASUREMENTS()			\
-	x(seqread,	0)			\
-	x(seqwrite,	1)			\
-	x(randread,	2)			\
-	x(randwrite,	3)
-
-enum bch_iops_measurement {
-#define x(t, n) BCH_IOPS_##t = n,
-	BCH_IOPS_MEASUREMENTS()
-#undef x
-	BCH_IOPS_NR
-};
-
-#define BCH_MEMBER_ERROR_TYPES()		\
-	x(read,		0)			\
-	x(write,	1)			\
-	x(checksum,	2)
-
-enum bch_member_error_type {
-#define x(t, n) BCH_MEMBER_ERROR_##t = n,
-	BCH_MEMBER_ERROR_TYPES()
-#undef x
-	BCH_MEMBER_ERROR_NR
-};
-
-struct bch_member {
-	__uuid_t		uuid;
-	__le64			nbuckets;	/* device size */
-	__le16			first_bucket;   /* index of first bucket used */
-	__le16			bucket_size;	/* sectors */
-	__u8			btree_bitmap_shift;
-	__u8			pad[3];
-	__le64			last_mount;	/* time_t */
-
-	__le64			flags;
-	__le32			iops[4];
-	__le64			errors[BCH_MEMBER_ERROR_NR];
-	__le64			errors_at_reset[BCH_MEMBER_ERROR_NR];
-	__le64			errors_reset_time;
-	__le64			seq;
-	__le64			btree_allocated_bitmap;
-	/*
-	 * On recovery from a clean shutdown we don't normally read the journal,
-	 * but we still want to resume writing from where we left off so we
-	 * don't overwrite more than is necessary, for list journal debugging:
-	 */
-	__le32			last_journal_bucket;
-	__le32			last_journal_bucket_offset;
-};
-
-/*
- * This limit comes from the bucket_gens array - it's a single allocation, and
- * kernel allocation are limited to INT_MAX
- */
-#define BCH_MEMBER_NBUCKETS_MAX	(INT_MAX - 64)
-
-#define BCH_MEMBER_V1_BYTES	56
-
-LE64_BITMASK(BCH_MEMBER_STATE,		struct bch_member, flags,  0,  4)
-/* 4-14 unused, was TIER, HAS_(META)DATA, REPLACEMENT */
-LE64_BITMASK(BCH_MEMBER_DISCARD,	struct bch_member, flags, 14, 15)
-LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED,	struct bch_member, flags, 15, 20)
-LE64_BITMASK(BCH_MEMBER_GROUP,		struct bch_member, flags, 20, 28)
-LE64_BITMASK(BCH_MEMBER_DURABILITY,	struct bch_member, flags, 28, 30)
-LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED,
-					struct bch_member, flags, 30, 31)
-
-#if 0
-LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS,	struct bch_member, flags[1], 0,  20);
-LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
-#endif
-
-#define BCH_MEMBER_STATES()			\
-	x(rw,		0)			\
-	x(ro,		1)			\
-	x(failed,	2)			\
-	x(spare,	3)
-
-enum bch_member_state {
-#define x(t, n) BCH_MEMBER_STATE_##t = n,
-	BCH_MEMBER_STATES()
-#undef x
-	BCH_MEMBER_STATE_NR
-};
-
-struct bch_sb_field_members_v1 {
-	struct bch_sb_field	field;
-	struct bch_member	_members[]; //Members are now variable size
-};
-
-struct bch_sb_field_members_v2 {
-	struct bch_sb_field	field;
-	__le16			member_bytes; //size of single member entry
-	u8			pad[6];
-	struct bch_member	_members[];
-};
 
 /* BCH_SB_FIELD_crypt: */
 
@@ -909,7 +810,6 @@ unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_re
 #define bcachefs_metadata_version_current	(bcachefs_metadata_version_max - 1)
 
 #define BCH_SB_SECTOR			8
-#define BCH_SB_MEMBERS_MAX		64 /* XXX kill */
 
 #define BCH_SB_LAYOUT_SIZE_BITS_MAX	16 /* 32 MB */
 
diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h
new file mode 100644
index 000000000000..e2630548c0f6
--- /dev/null
+++ b/fs/bcachefs/sb-members_format.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_MEMBERS_FORMAT_H
+#define _BCACHEFS_SB_MEMBERS_FORMAT_H
+
+/*
+ * We refer to members with bitmasks in various places - but we need to get rid
+ * of this limit:
+ */
+#define BCH_SB_MEMBERS_MAX		64
+
+#define BCH_MIN_NR_NBUCKETS	(1 << 6)
+
+#define BCH_IOPS_MEASUREMENTS()			\
+	x(seqread,	0)			\
+	x(seqwrite,	1)			\
+	x(randread,	2)			\
+	x(randwrite,	3)
+
+enum bch_iops_measurement {
+#define x(t, n) BCH_IOPS_##t = n,
+	BCH_IOPS_MEASUREMENTS()
+#undef x
+	BCH_IOPS_NR
+};
+
+#define BCH_MEMBER_ERROR_TYPES()		\
+	x(read,		0)			\
+	x(write,	1)			\
+	x(checksum,	2)
+
+enum bch_member_error_type {
+#define x(t, n) BCH_MEMBER_ERROR_##t = n,
+	BCH_MEMBER_ERROR_TYPES()
+#undef x
+	BCH_MEMBER_ERROR_NR
+};
+
+struct bch_member {
+	__uuid_t		uuid;
+	__le64			nbuckets;	/* device size */
+	__le16			first_bucket;   /* index of first bucket used */
+	__le16			bucket_size;	/* sectors */
+	__u8			btree_bitmap_shift;
+	__u8			pad[3];
+	__le64			last_mount;	/* time_t */
+
+	__le64			flags;
+	__le32			iops[4];
+	__le64			errors[BCH_MEMBER_ERROR_NR];
+	__le64			errors_at_reset[BCH_MEMBER_ERROR_NR];
+	__le64			errors_reset_time;
+	__le64			seq;
+	__le64			btree_allocated_bitmap;
+	/*
+	 * On recovery from a clean shutdown we don't normally read the journal,
+	 * but we still want to resume writing from where we left off so we
+	 * don't overwrite more than is necessary, for list journal debugging:
+	 */
+	__le32			last_journal_bucket;
+	__le32			last_journal_bucket_offset;
+};
+
+/*
+ * This limit comes from the bucket_gens array - it's a single allocation, and
+ * kernel allocation are limited to INT_MAX
+ */
+#define BCH_MEMBER_NBUCKETS_MAX	(INT_MAX - 64)
+
+#define BCH_MEMBER_V1_BYTES	56
+
+LE64_BITMASK(BCH_MEMBER_STATE,		struct bch_member, flags,  0,  4)
+/* 4-14 unused, was TIER, HAS_(META)DATA, REPLACEMENT */
+LE64_BITMASK(BCH_MEMBER_DISCARD,	struct bch_member, flags, 14, 15)
+LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED,	struct bch_member, flags, 15, 20)
+LE64_BITMASK(BCH_MEMBER_GROUP,		struct bch_member, flags, 20, 28)
+LE64_BITMASK(BCH_MEMBER_DURABILITY,	struct bch_member, flags, 28, 30)
+LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED,
+					struct bch_member, flags, 30, 31)
+
+#if 0
+LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS,	struct bch_member, flags[1], 0,  20);
+LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
+#endif
+
+#define BCH_MEMBER_STATES()			\
+	x(rw,		0)			\
+	x(ro,		1)			\
+	x(failed,	2)			\
+	x(spare,	3)
+
+enum bch_member_state {
+#define x(t, n) BCH_MEMBER_STATE_##t = n,
+	BCH_MEMBER_STATES()
+#undef x
+	BCH_MEMBER_STATE_NR
+};
+
+struct bch_sb_field_members_v1 {
+	struct bch_sb_field	field;
+	struct bch_member	_members[]; //Members are now variable size
+};
+
+struct bch_sb_field_members_v2 {
+	struct bch_sb_field	field;
+	__le16			member_bytes; //size of single member entry
+	u8			pad[6];
+	struct bch_member	_members[];
+};
+
+#endif /* _BCACHEFS_SB_MEMBERS_FORMAT_H */
-- 
cgit v1.2.3


From 4c5eef0c50ccc71e225f320835dc7cd51f64f961 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Tue, 28 May 2024 16:54:29 -0400
Subject: bcachefs: split out sb-downgrade_format.h

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bcachefs_format.h     | 14 +-------------
 fs/bcachefs/sb-downgrade_format.h | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 13 deletions(-)
 create mode 100644 fs/bcachefs/sb-downgrade_format.h

diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index c4d10b528b54..c966eb7e37b8 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -513,6 +513,7 @@ struct bch_sb_field {
 #include "snapshot_format.h"
 #include "subvolume_format.h"
 #include "sb-counters_format.h"
+#include "sb-downgrade_format.h"
 #include "sb-members_format.h"
 
 enum bch_sb_field_type {
@@ -546,7 +547,6 @@ struct bch_sb_field_journal_v2 {
 	}			d[];
 };
 
-
 /* BCH_SB_FIELD_crypt: */
 
 struct nonce {
@@ -738,18 +738,6 @@ struct bch_sb_field_ext {
 	__le64			btrees_lost_data;
 };
 
-struct bch_sb_field_downgrade_entry {
-	__le16			version;
-	__le64			recovery_passes[2];
-	__le16			nr_errors;
-	__le16			errors[] __counted_by(nr_errors);
-} __packed __aligned(2);
-
-struct bch_sb_field_downgrade {
-	struct bch_sb_field	field;
-	struct bch_sb_field_downgrade_entry entries[];
-};
-
 /* Superblock: */
 
 /*
diff --git a/fs/bcachefs/sb-downgrade_format.h b/fs/bcachefs/sb-downgrade_format.h
new file mode 100644
index 000000000000..cffd932be3ec
--- /dev/null
+++ b/fs/bcachefs/sb-downgrade_format.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_DOWNGRADE_FORMAT_H
+#define _BCACHEFS_SB_DOWNGRADE_FORMAT_H
+
+struct bch_sb_field_downgrade_entry {
+	__le16			version;
+	__le64			recovery_passes[2];
+	__le16			nr_errors;
+	__le16			errors[] __counted_by(nr_errors);
+} __packed __aligned(2);
+
+struct bch_sb_field_downgrade {
+	struct bch_sb_field	field;
+	struct bch_sb_field_downgrade_entry entries[];
+};
+
+#endif /* _BCACHEFS_SB_DOWNGRADE_FORMAT_H */
-- 
cgit v1.2.3


From 1cdcc6e3c2cb69a235bd32eb0da51205e432f77f Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Tue, 28 May 2024 17:06:51 -0400
Subject: bcachefs: Split out disk_groups_format.h

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bcachefs_format.h    | 19 +------------------
 fs/bcachefs/disk_groups_format.h | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+), 18 deletions(-)
 create mode 100644 fs/bcachefs/disk_groups_format.h

diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index c966eb7e37b8..b7690fc3ffc6 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -507,6 +507,7 @@ struct bch_sb_field {
 #include "ec_format.h"
 #include "inode_format.h"
 #include "dirent_format.h"
+#include "disk_groups_format.h"
 #include "xattr_format.h"
 #include "quota_format.h"
 #include "logged_ops_format.h"
@@ -665,24 +666,6 @@ struct bch_sb_field_replicas {
 	struct bch_replicas_entry_v1 entries[];
 } __packed __aligned(8);
 
-/* BCH_SB_FIELD_disk_groups: */
-
-#define BCH_SB_LABEL_SIZE		32
-
-struct bch_disk_group {
-	__u8			label[BCH_SB_LABEL_SIZE];
-	__le64			flags[2];
-} __packed __aligned(8);
-
-LE64_BITMASK(BCH_GROUP_DELETED,		struct bch_disk_group, flags[0], 0,  1)
-LE64_BITMASK(BCH_GROUP_DATA_ALLOWED,	struct bch_disk_group, flags[0], 1,  6)
-LE64_BITMASK(BCH_GROUP_PARENT,		struct bch_disk_group, flags[0], 6, 24)
-
-struct bch_sb_field_disk_groups {
-	struct bch_sb_field	field;
-	struct bch_disk_group	entries[];
-} __packed __aligned(8);
-
 /*
  * On clean shutdown, store btree roots and current journal sequence number in
  * the superblock:
diff --git a/fs/bcachefs/disk_groups_format.h b/fs/bcachefs/disk_groups_format.h
new file mode 100644
index 000000000000..698990bbf1d2
--- /dev/null
+++ b/fs/bcachefs/disk_groups_format.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_DISK_GROUPS_FORMAT_H
+#define _BCACHEFS_DISK_GROUPS_FORMAT_H
+
+#define BCH_SB_LABEL_SIZE		32
+
+struct bch_disk_group {
+	__u8			label[BCH_SB_LABEL_SIZE];
+	__le64			flags[2];
+} __packed __aligned(8);
+
+LE64_BITMASK(BCH_GROUP_DELETED,		struct bch_disk_group, flags[0], 0,  1)
+LE64_BITMASK(BCH_GROUP_DATA_ALLOWED,	struct bch_disk_group, flags[0], 1,  6)
+LE64_BITMASK(BCH_GROUP_PARENT,		struct bch_disk_group, flags[0], 6, 24)
+
+struct bch_sb_field_disk_groups {
+	struct bch_sb_field	field;
+	struct bch_disk_group	entries[];
+} __packed __aligned(8);
+
+#endif /* _BCACHEFS_DISK_GROUPS_FORMAT_H */
-- 
cgit v1.2.3


From 24998050b69ac1f4caa41b66dbd245f4de366b3c Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Tue, 28 May 2024 17:13:52 -0400
Subject: bcachefs: Split out replicas_format.h

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bcachefs_format.h | 37 +++++--------------------------------
 fs/bcachefs/replicas_format.h | 31 +++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 32 deletions(-)
 create mode 100644 fs/bcachefs/replicas_format.h

diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index b7690fc3ffc6..66df1e5c2a87 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -503,19 +503,20 @@ struct bch_sb_field {
 
 #include "alloc_background_format.h"
 #include "extents_format.h"
-#include "reflink_format.h"
 #include "ec_format.h"
-#include "inode_format.h"
 #include "dirent_format.h"
 #include "disk_groups_format.h"
-#include "xattr_format.h"
-#include "quota_format.h"
+#include "inode_format.h"
 #include "logged_ops_format.h"
+#include "quota_format.h"
+#include "reflink_format.h"
+#include "replicas_format.h"
 #include "snapshot_format.h"
 #include "subvolume_format.h"
 #include "sb-counters_format.h"
 #include "sb-downgrade_format.h"
 #include "sb-members_format.h"
+#include "xattr_format.h"
 
 enum bch_sb_field_type {
 #define x(f, nr)	BCH_SB_FIELD_##f = nr,
@@ -596,8 +597,6 @@ LE64_BITMASK(BCH_KDF_SCRYPT_N,	struct bch_sb_field_crypt, kdf_flags,  0, 16);
 LE64_BITMASK(BCH_KDF_SCRYPT_R,	struct bch_sb_field_crypt, kdf_flags, 16, 32);
 LE64_BITMASK(BCH_KDF_SCRYPT_P,	struct bch_sb_field_crypt, kdf_flags, 32, 48);
 
-/* BCH_SB_FIELD_replicas: */
-
 #define BCH_DATA_TYPES()		\
 	x(free,		0)		\
 	x(sb,		1)		\
@@ -640,32 +639,6 @@ static inline bool data_type_is_hidden(enum bch_data_type type)
 	}
 }
 
-struct bch_replicas_entry_v0 {
-	__u8			data_type;
-	__u8			nr_devs;
-	__u8			devs[];
-} __packed;
-
-struct bch_sb_field_replicas_v0 {
-	struct bch_sb_field	field;
-	struct bch_replicas_entry_v0 entries[];
-} __packed __aligned(8);
-
-struct bch_replicas_entry_v1 {
-	__u8			data_type;
-	__u8			nr_devs;
-	__u8			nr_required;
-	__u8			devs[];
-} __packed;
-
-#define replicas_entry_bytes(_i)					\
-	(offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
-
-struct bch_sb_field_replicas {
-	struct bch_sb_field	field;
-	struct bch_replicas_entry_v1 entries[];
-} __packed __aligned(8);
-
 /*
  * On clean shutdown, store btree roots and current journal sequence number in
  * the superblock:
diff --git a/fs/bcachefs/replicas_format.h b/fs/bcachefs/replicas_format.h
new file mode 100644
index 000000000000..b97208195d06
--- /dev/null
+++ b/fs/bcachefs/replicas_format.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_REPLICAS_FORMAT_H
+#define _BCACHEFS_REPLICAS_FORMAT_H
+
+struct bch_replicas_entry_v0 {
+	__u8			data_type;
+	__u8			nr_devs;
+	__u8			devs[];
+} __packed;
+
+struct bch_sb_field_replicas_v0 {
+	struct bch_sb_field	field;
+	struct bch_replicas_entry_v0 entries[];
+} __packed __aligned(8);
+
+struct bch_replicas_entry_v1 {
+	__u8			data_type;
+	__u8			nr_devs;
+	__u8			nr_required;
+	__u8			devs[];
+} __packed;
+
+struct bch_sb_field_replicas {
+	struct bch_sb_field	field;
+	struct bch_replicas_entry_v1 entries[];
+} __packed __aligned(8);
+
+#define replicas_entry_bytes(_i)					\
+	(offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
+
+#endif /* _BCACHEFS_REPLICAS_FORMAT_H */
-- 
cgit v1.2.3


From 5c16c5748894652e8013dbcb27e54e8319c53b00 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Tue, 28 May 2024 17:20:40 -0400
Subject: bcachefs: Split out journal_seq_blacklist_format.h

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bcachefs_format.h              | 11 +----------
 fs/bcachefs/journal_seq_blacklist_format.h | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 10 deletions(-)
 create mode 100644 fs/bcachefs/journal_seq_blacklist_format.h

diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 66df1e5c2a87..89ac286f17f0 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -507,6 +507,7 @@ struct bch_sb_field {
 #include "dirent_format.h"
 #include "disk_groups_format.h"
 #include "inode_format.h"
+#include "journal_seq_blacklist_format.h"
 #include "logged_ops_format.h"
 #include "quota_format.h"
 #include "reflink_format.h"
@@ -666,16 +667,6 @@ struct bch_sb_field_clean {
 	__u64			_data[];
 };
 
-struct journal_seq_blacklist_entry {
-	__le64			start;
-	__le64			end;
-};
-
-struct bch_sb_field_journal_seq_blacklist {
-	struct bch_sb_field	field;
-	struct journal_seq_blacklist_entry start[];
-};
-
 struct bch_sb_field_errors {
 	struct bch_sb_field	field;
 	struct bch_sb_field_error_entry {
diff --git a/fs/bcachefs/journal_seq_blacklist_format.h b/fs/bcachefs/journal_seq_blacklist_format.h
new file mode 100644
index 000000000000..2566b12dbc04
--- /dev/null
+++ b/fs/bcachefs/journal_seq_blacklist_format.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_FORMAT_H
+#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_FORMAT_H
+
+struct journal_seq_blacklist_entry {
+	__le64			start;
+	__le64			end;
+};
+
+struct bch_sb_field_journal_seq_blacklist {
+	struct bch_sb_field	field;
+	struct journal_seq_blacklist_entry start[];
+};
+
+#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_FORMAT_H */
-- 
cgit v1.2.3


From 759bb4eabc727077145f3173f8ef6c2ac745c3d6 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Tue, 28 May 2024 17:23:01 -0400
Subject: bcachefs: Split out sb-errors_format.h

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bcachefs_format.h  |  12 +-
 fs/bcachefs/sb-errors_format.h | 296 +++++++++++++++++++++++++++++++++++++++++
 fs/bcachefs/sb-errors_types.h  | 281 --------------------------------------
 3 files changed, 297 insertions(+), 292 deletions(-)
 create mode 100644 fs/bcachefs/sb-errors_format.h

diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 89ac286f17f0..90c12fe2a2cd 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -516,6 +516,7 @@ struct bch_sb_field {
 #include "subvolume_format.h"
 #include "sb-counters_format.h"
 #include "sb-downgrade_format.h"
+#include "sb-errors_format.h"
 #include "sb-members_format.h"
 #include "xattr_format.h"
 
@@ -667,17 +668,6 @@ struct bch_sb_field_clean {
 	__u64			_data[];
 };
 
-struct bch_sb_field_errors {
-	struct bch_sb_field	field;
-	struct bch_sb_field_error_entry {
-		__le64		v;
-		__le64		last_error_time;
-	}			entries[];
-};
-
-LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID,	struct bch_sb_field_error_entry, v,  0, 16);
-LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR,	struct bch_sb_field_error_entry, v, 16, 64);
-
 struct bch_sb_field_ext {
 	struct bch_sb_field	field;
 	__le64			recovery_passes_required[2];
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
new file mode 100644
index 000000000000..84d2763bd597
--- /dev/null
+++ b/fs/bcachefs/sb-errors_format.h
@@ -0,0 +1,296 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_SB_ERRORS_FORMAT_H
+#define _BCACHEFS_SB_ERRORS_FORMAT_H
+
+#define BCH_SB_ERRS()							\
+	x(clean_but_journal_not_empty,				0)	\
+	x(dirty_but_no_journal_entries,				1)	\
+	x(dirty_but_no_journal_entries_post_drop_nonflushes,	2)	\
+	x(sb_clean_journal_seq_mismatch,			3)	\
+	x(sb_clean_btree_root_mismatch,				4)	\
+	x(sb_clean_missing,					5)	\
+	x(jset_unsupported_version,				6)	\
+	x(jset_unknown_csum,					7)	\
+	x(jset_last_seq_newer_than_seq,				8)	\
+	x(jset_past_bucket_end,					9)	\
+	x(jset_seq_blacklisted,					10)	\
+	x(journal_entries_missing,				11)	\
+	x(journal_entry_replicas_not_marked,			12)	\
+	x(journal_entry_past_jset_end,				13)	\
+	x(journal_entry_replicas_data_mismatch,			14)	\
+	x(journal_entry_bkey_u64s_0,				15)	\
+	x(journal_entry_bkey_past_end,				16)	\
+	x(journal_entry_bkey_bad_format,			17)	\
+	x(journal_entry_bkey_invalid,				18)	\
+	x(journal_entry_btree_root_bad_size,			19)	\
+	x(journal_entry_blacklist_bad_size,			20)	\
+	x(journal_entry_blacklist_v2_bad_size,			21)	\
+	x(journal_entry_blacklist_v2_start_past_end,		22)	\
+	x(journal_entry_usage_bad_size,				23)	\
+	x(journal_entry_data_usage_bad_size,			24)	\
+	x(journal_entry_clock_bad_size,				25)	\
+	x(journal_entry_clock_bad_rw,				26)	\
+	x(journal_entry_dev_usage_bad_size,			27)	\
+	x(journal_entry_dev_usage_bad_dev,			28)	\
+	x(journal_entry_dev_usage_bad_pad,			29)	\
+	x(btree_node_unreadable,				30)	\
+	x(btree_node_fault_injected,				31)	\
+	x(btree_node_bad_magic,					32)	\
+	x(btree_node_bad_seq,					33)	\
+	x(btree_node_unsupported_version,			34)	\
+	x(btree_node_bset_older_than_sb_min,			35)	\
+	x(btree_node_bset_newer_than_sb,			36)	\
+	x(btree_node_data_missing,				37)	\
+	x(btree_node_bset_after_end,				38)	\
+	x(btree_node_replicas_sectors_written_mismatch,		39)	\
+	x(btree_node_replicas_data_mismatch,			40)	\
+	x(bset_unknown_csum,					41)	\
+	x(bset_bad_csum,					42)	\
+	x(bset_past_end_of_btree_node,				43)	\
+	x(bset_wrong_sector_offset,				44)	\
+	x(bset_empty,						45)	\
+	x(bset_bad_seq,						46)	\
+	x(bset_blacklisted_journal_seq,				47)	\
+	x(first_bset_blacklisted_journal_seq,			48)	\
+	x(btree_node_bad_btree,					49)	\
+	x(btree_node_bad_level,					50)	\
+	x(btree_node_bad_min_key,				51)	\
+	x(btree_node_bad_max_key,				52)	\
+	x(btree_node_bad_format,				53)	\
+	x(btree_node_bkey_past_bset_end,			54)	\
+	x(btree_node_bkey_bad_format,				55)	\
+	x(btree_node_bad_bkey,					56)	\
+	x(btree_node_bkey_out_of_order,				57)	\
+	x(btree_root_bkey_invalid,				58)	\
+	x(btree_root_read_error,				59)	\
+	x(btree_root_bad_min_key,				60)	\
+	x(btree_root_bad_max_key,				61)	\
+	x(btree_node_read_error,				62)	\
+	x(btree_node_topology_bad_min_key,			63)	\
+	x(btree_node_topology_bad_max_key,			64)	\
+	x(btree_node_topology_overwritten_by_prev_node,		65)	\
+	x(btree_node_topology_overwritten_by_next_node,		66)	\
+	x(btree_node_topology_interior_node_empty,		67)	\
+	x(fs_usage_hidden_wrong,				68)	\
+	x(fs_usage_btree_wrong,					69)	\
+	x(fs_usage_data_wrong,					70)	\
+	x(fs_usage_cached_wrong,				71)	\
+	x(fs_usage_reserved_wrong,				72)	\
+	x(fs_usage_persistent_reserved_wrong,			73)	\
+	x(fs_usage_nr_inodes_wrong,				74)	\
+	x(fs_usage_replicas_wrong,				75)	\
+	x(dev_usage_buckets_wrong,				76)	\
+	x(dev_usage_sectors_wrong,				77)	\
+	x(dev_usage_fragmented_wrong,				78)	\
+	x(dev_usage_buckets_ec_wrong,				79)	\
+	x(bkey_version_in_future,				80)	\
+	x(bkey_u64s_too_small,					81)	\
+	x(bkey_invalid_type_for_btree,				82)	\
+	x(bkey_extent_size_zero,				83)	\
+	x(bkey_extent_size_greater_than_offset,			84)	\
+	x(bkey_size_nonzero,					85)	\
+	x(bkey_snapshot_nonzero,				86)	\
+	x(bkey_snapshot_zero,					87)	\
+	x(bkey_at_pos_max,					88)	\
+	x(bkey_before_start_of_btree_node,			89)	\
+	x(bkey_after_end_of_btree_node,				90)	\
+	x(bkey_val_size_nonzero,				91)	\
+	x(bkey_val_size_too_small,				92)	\
+	x(alloc_v1_val_size_bad,				93)	\
+	x(alloc_v2_unpack_error,				94)	\
+	x(alloc_v3_unpack_error,				95)	\
+	x(alloc_v4_val_size_bad,				96)	\
+	x(alloc_v4_backpointers_start_bad,			97)	\
+	x(alloc_key_data_type_bad,				98)	\
+	x(alloc_key_empty_but_have_data,			99)	\
+	x(alloc_key_dirty_sectors_0,				100)	\
+	x(alloc_key_data_type_inconsistency,			101)	\
+	x(alloc_key_to_missing_dev_bucket,			102)	\
+	x(alloc_key_cached_inconsistency,			103)	\
+	x(alloc_key_cached_but_read_time_zero,			104)	\
+	x(alloc_key_to_missing_lru_entry,			105)	\
+	x(alloc_key_data_type_wrong,				106)	\
+	x(alloc_key_gen_wrong,					107)	\
+	x(alloc_key_dirty_sectors_wrong,			108)	\
+	x(alloc_key_cached_sectors_wrong,			109)	\
+	x(alloc_key_stripe_wrong,				110)	\
+	x(alloc_key_stripe_redundancy_wrong,			111)	\
+	x(bucket_sector_count_overflow,				112)	\
+	x(bucket_metadata_type_mismatch,			113)	\
+	x(need_discard_key_wrong,				114)	\
+	x(freespace_key_wrong,					115)	\
+	x(freespace_hole_missing,				116)	\
+	x(bucket_gens_val_size_bad,				117)	\
+	x(bucket_gens_key_wrong,				118)	\
+	x(bucket_gens_hole_wrong,				119)	\
+	x(bucket_gens_to_invalid_dev,				120)	\
+	x(bucket_gens_to_invalid_buckets,			121)	\
+	x(bucket_gens_nonzero_for_invalid_buckets,		122)	\
+	x(need_discard_freespace_key_to_invalid_dev_bucket,	123)	\
+	x(need_discard_freespace_key_bad,			124)	\
+	x(backpointer_bucket_offset_wrong,			125)	\
+	x(backpointer_to_missing_device,			126)	\
+	x(backpointer_to_missing_alloc,				127)	\
+	x(backpointer_to_missing_ptr,				128)	\
+	x(lru_entry_at_time_0,					129)	\
+	x(lru_entry_to_invalid_bucket,				130)	\
+	x(lru_entry_bad,					131)	\
+	x(btree_ptr_val_too_big,				132)	\
+	x(btree_ptr_v2_val_too_big,				133)	\
+	x(btree_ptr_has_non_ptr,				134)	\
+	x(extent_ptrs_invalid_entry,				135)	\
+	x(extent_ptrs_no_ptrs,					136)	\
+	x(extent_ptrs_too_many_ptrs,				137)	\
+	x(extent_ptrs_redundant_crc,				138)	\
+	x(extent_ptrs_redundant_stripe,				139)	\
+	x(extent_ptrs_unwritten,				140)	\
+	x(extent_ptrs_written_and_unwritten,			141)	\
+	x(ptr_to_invalid_device,				142)	\
+	x(ptr_to_duplicate_device,				143)	\
+	x(ptr_after_last_bucket,				144)	\
+	x(ptr_before_first_bucket,				145)	\
+	x(ptr_spans_multiple_buckets,				146)	\
+	x(ptr_to_missing_backpointer,				147)	\
+	x(ptr_to_missing_alloc_key,				148)	\
+	x(ptr_to_missing_replicas_entry,			149)	\
+	x(ptr_to_missing_stripe,				150)	\
+	x(ptr_to_incorrect_stripe,				151)	\
+	x(ptr_gen_newer_than_bucket_gen,			152)	\
+	x(ptr_too_stale,					153)	\
+	x(stale_dirty_ptr,					154)	\
+	x(ptr_bucket_data_type_mismatch,			155)	\
+	x(ptr_cached_and_erasure_coded,				156)	\
+	x(ptr_crc_uncompressed_size_too_small,			157)	\
+	x(ptr_crc_csum_type_unknown,				158)	\
+	x(ptr_crc_compression_type_unknown,			159)	\
+	x(ptr_crc_redundant,					160)	\
+	x(ptr_crc_uncompressed_size_too_big,			161)	\
+	x(ptr_crc_nonce_mismatch,				162)	\
+	x(ptr_stripe_redundant,					163)	\
+	x(reservation_key_nr_replicas_invalid,			164)	\
+	x(reflink_v_refcount_wrong,				165)	\
+	x(reflink_p_to_missing_reflink_v,			166)	\
+	x(stripe_pos_bad,					167)	\
+	x(stripe_val_size_bad,					168)	\
+	x(stripe_sector_count_wrong,				169)	\
+	x(snapshot_tree_pos_bad,				170)	\
+	x(snapshot_tree_to_missing_snapshot,			171)	\
+	x(snapshot_tree_to_missing_subvol,			172)	\
+	x(snapshot_tree_to_wrong_subvol,			173)	\
+	x(snapshot_tree_to_snapshot_subvol,			174)	\
+	x(snapshot_pos_bad,					175)	\
+	x(snapshot_parent_bad,					176)	\
+	x(snapshot_children_not_normalized,			177)	\
+	x(snapshot_child_duplicate,				178)	\
+	x(snapshot_child_bad,					179)	\
+	x(snapshot_skiplist_not_normalized,			180)	\
+	x(snapshot_skiplist_bad,				181)	\
+	x(snapshot_should_not_have_subvol,			182)	\
+	x(snapshot_to_bad_snapshot_tree,			183)	\
+	x(snapshot_bad_depth,					184)	\
+	x(snapshot_bad_skiplist,				185)	\
+	x(subvol_pos_bad,					186)	\
+	x(subvol_not_master_and_not_snapshot,			187)	\
+	x(subvol_to_missing_root,				188)	\
+	x(subvol_root_wrong_bi_subvol,				189)	\
+	x(bkey_in_missing_snapshot,				190)	\
+	x(inode_pos_inode_nonzero,				191)	\
+	x(inode_pos_blockdev_range,				192)	\
+	x(inode_unpack_error,					193)	\
+	x(inode_str_hash_invalid,				194)	\
+	x(inode_v3_fields_start_bad,				195)	\
+	x(inode_snapshot_mismatch,				196)	\
+	x(inode_unlinked_but_clean,				197)	\
+	x(inode_unlinked_but_nlink_nonzero,			198)	\
+	x(inode_checksum_type_invalid,				199)	\
+	x(inode_compression_type_invalid,			200)	\
+	x(inode_subvol_root_but_not_dir,			201)	\
+	x(inode_i_size_dirty_but_clean,				202)	\
+	x(inode_i_sectors_dirty_but_clean,			203)	\
+	x(inode_i_sectors_wrong,				204)	\
+	x(inode_dir_wrong_nlink,				205)	\
+	x(inode_dir_multiple_links,				206)	\
+	x(inode_multiple_links_but_nlink_0,			207)	\
+	x(inode_wrong_backpointer,				208)	\
+	x(inode_wrong_nlink,					209)	\
+	x(inode_unreachable,					210)	\
+	x(deleted_inode_but_clean,				211)	\
+	x(deleted_inode_missing,				212)	\
+	x(deleted_inode_is_dir,					213)	\
+	x(deleted_inode_not_unlinked,				214)	\
+	x(extent_overlapping,					215)	\
+	x(extent_in_missing_inode,				216)	\
+	x(extent_in_non_reg_inode,				217)	\
+	x(extent_past_end_of_inode,				218)	\
+	x(dirent_empty_name,					219)	\
+	x(dirent_val_too_big,					220)	\
+	x(dirent_name_too_long,					221)	\
+	x(dirent_name_embedded_nul,				222)	\
+	x(dirent_name_dot_or_dotdot,				223)	\
+	x(dirent_name_has_slash,				224)	\
+	x(dirent_d_type_wrong,					225)	\
+	x(inode_bi_parent_wrong,				226)	\
+	x(dirent_in_missing_dir_inode,				227)	\
+	x(dirent_in_non_dir_inode,				228)	\
+	x(dirent_to_missing_inode,				229)	\
+	x(dirent_to_missing_subvol,				230)	\
+	x(dirent_to_itself,					231)	\
+	x(quota_type_invalid,					232)	\
+	x(xattr_val_size_too_small,				233)	\
+	x(xattr_val_size_too_big,				234)	\
+	x(xattr_invalid_type,					235)	\
+	x(xattr_name_invalid_chars,				236)	\
+	x(xattr_in_missing_inode,				237)	\
+	x(root_subvol_missing,					238)	\
+	x(root_dir_missing,					239)	\
+	x(root_inode_not_dir,					240)	\
+	x(dir_loop,						241)	\
+	x(hash_table_key_duplicate,				242)	\
+	x(hash_table_key_wrong_offset,				243)	\
+	x(unlinked_inode_not_on_deleted_list,			244)	\
+	x(reflink_p_front_pad_bad,				245)	\
+	x(journal_entry_dup_same_device,			246)	\
+	x(inode_bi_subvol_missing,				247)	\
+	x(inode_bi_subvol_wrong,				248)	\
+	x(inode_points_to_missing_dirent,			249)	\
+	x(inode_points_to_wrong_dirent,				250)	\
+	x(inode_bi_parent_nonzero,				251)	\
+	x(dirent_to_missing_parent_subvol,			252)	\
+	x(dirent_not_visible_in_parent_subvol,			253)	\
+	x(subvol_fs_path_parent_wrong,				254)	\
+	x(subvol_root_fs_path_parent_nonzero,			255)	\
+	x(subvol_children_not_set,				256)	\
+	x(subvol_children_bad,					257)	\
+	x(subvol_loop,						258)	\
+	x(subvol_unreachable,					259)	\
+	x(btree_node_bkey_bad_u64s,				260)	\
+	x(btree_node_topology_empty_interior_node,		261)	\
+	x(btree_ptr_v2_min_key_bad,				262)	\
+	x(btree_root_unreadable_and_scan_found_nothing,		263)	\
+	x(snapshot_node_missing,				264)	\
+	x(dup_backpointer_to_bad_csum_extent,			265)	\
+	x(btree_bitmap_not_marked,				266)	\
+	x(sb_clean_entry_overrun,				267)	\
+	x(btree_ptr_v2_written_0,				268)	\
+	x(subvol_snapshot_bad,					269)	\
+	x(subvol_inode_bad,					270)
+
+enum bch_sb_error_id {
+#define x(t, n) BCH_FSCK_ERR_##t = n,
+	BCH_SB_ERRS()
+#undef x
+	BCH_SB_ERR_MAX
+};
+
+struct bch_sb_field_errors {
+	struct bch_sb_field	field;
+	struct bch_sb_field_error_entry {
+		__le64		v;
+		__le64		last_error_time;
+	}			entries[];
+};
+
+LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID,	struct bch_sb_field_error_entry, v,  0, 16);
+LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR,	struct bch_sb_field_error_entry, v, 16, 64);
+
+#endif /* _BCACHEFS_SB_ERRORS_FORMAT_H */
diff --git a/fs/bcachefs/sb-errors_types.h b/fs/bcachefs/sb-errors_types.h
index 666599d3fb9d..40325239c3b0 100644
--- a/fs/bcachefs/sb-errors_types.h
+++ b/fs/bcachefs/sb-errors_types.h
@@ -4,286 +4,6 @@
 
 #include "darray.h"
 
-#define BCH_SB_ERRS()							\
-	x(clean_but_journal_not_empty,				0)	\
-	x(dirty_but_no_journal_entries,				1)	\
-	x(dirty_but_no_journal_entries_post_drop_nonflushes,	2)	\
-	x(sb_clean_journal_seq_mismatch,			3)	\
-	x(sb_clean_btree_root_mismatch,				4)	\
-	x(sb_clean_missing,					5)	\
-	x(jset_unsupported_version,				6)	\
-	x(jset_unknown_csum,					7)	\
-	x(jset_last_seq_newer_than_seq,				8)	\
-	x(jset_past_bucket_end,					9)	\
-	x(jset_seq_blacklisted,					10)	\
-	x(journal_entries_missing,				11)	\
-	x(journal_entry_replicas_not_marked,			12)	\
-	x(journal_entry_past_jset_end,				13)	\
-	x(journal_entry_replicas_data_mismatch,			14)	\
-	x(journal_entry_bkey_u64s_0,				15)	\
-	x(journal_entry_bkey_past_end,				16)	\
-	x(journal_entry_bkey_bad_format,			17)	\
-	x(journal_entry_bkey_invalid,				18)	\
-	x(journal_entry_btree_root_bad_size,			19)	\
-	x(journal_entry_blacklist_bad_size,			20)	\
-	x(journal_entry_blacklist_v2_bad_size,			21)	\
-	x(journal_entry_blacklist_v2_start_past_end,		22)	\
-	x(journal_entry_usage_bad_size,				23)	\
-	x(journal_entry_data_usage_bad_size,			24)	\
-	x(journal_entry_clock_bad_size,				25)	\
-	x(journal_entry_clock_bad_rw,				26)	\
-	x(journal_entry_dev_usage_bad_size,			27)	\
-	x(journal_entry_dev_usage_bad_dev,			28)	\
-	x(journal_entry_dev_usage_bad_pad,			29)	\
-	x(btree_node_unreadable,				30)	\
-	x(btree_node_fault_injected,				31)	\
-	x(btree_node_bad_magic,					32)	\
-	x(btree_node_bad_seq,					33)	\
-	x(btree_node_unsupported_version,			34)	\
-	x(btree_node_bset_older_than_sb_min,			35)	\
-	x(btree_node_bset_newer_than_sb,			36)	\
-	x(btree_node_data_missing,				37)	\
-	x(btree_node_bset_after_end,				38)	\
-	x(btree_node_replicas_sectors_written_mismatch,		39)	\
-	x(btree_node_replicas_data_mismatch,			40)	\
-	x(bset_unknown_csum,					41)	\
-	x(bset_bad_csum,					42)	\
-	x(bset_past_end_of_btree_node,				43)	\
-	x(bset_wrong_sector_offset,				44)	\
-	x(bset_empty,						45)	\
-	x(bset_bad_seq,						46)	\
-	x(bset_blacklisted_journal_seq,				47)	\
-	x(first_bset_blacklisted_journal_seq,			48)	\
-	x(btree_node_bad_btree,					49)	\
-	x(btree_node_bad_level,					50)	\
-	x(btree_node_bad_min_key,				51)	\
-	x(btree_node_bad_max_key,				52)	\
-	x(btree_node_bad_format,				53)	\
-	x(btree_node_bkey_past_bset_end,			54)	\
-	x(btree_node_bkey_bad_format,				55)	\
-	x(btree_node_bad_bkey,					56)	\
-	x(btree_node_bkey_out_of_order,				57)	\
-	x(btree_root_bkey_invalid,				58)	\
-	x(btree_root_read_error,				59)	\
-	x(btree_root_bad_min_key,				60)	\
-	x(btree_root_bad_max_key,				61)	\
-	x(btree_node_read_error,				62)	\
-	x(btree_node_topology_bad_min_key,			63)	\
-	x(btree_node_topology_bad_max_key,			64)	\
-	x(btree_node_topology_overwritten_by_prev_node,		65)	\
-	x(btree_node_topology_overwritten_by_next_node,		66)	\
-	x(btree_node_topology_interior_node_empty,		67)	\
-	x(fs_usage_hidden_wrong,				68)	\
-	x(fs_usage_btree_wrong,					69)	\
-	x(fs_usage_data_wrong,					70)	\
-	x(fs_usage_cached_wrong,				71)	\
-	x(fs_usage_reserved_wrong,				72)	\
-	x(fs_usage_persistent_reserved_wrong,			73)	\
-	x(fs_usage_nr_inodes_wrong,				74)	\
-	x(fs_usage_replicas_wrong,				75)	\
-	x(dev_usage_buckets_wrong,				76)	\
-	x(dev_usage_sectors_wrong,				77)	\
-	x(dev_usage_fragmented_wrong,				78)	\
-	x(dev_usage_buckets_ec_wrong,				79)	\
-	x(bkey_version_in_future,				80)	\
-	x(bkey_u64s_too_small,					81)	\
-	x(bkey_invalid_type_for_btree,				82)	\
-	x(bkey_extent_size_zero,				83)	\
-	x(bkey_extent_size_greater_than_offset,			84)	\
-	x(bkey_size_nonzero,					85)	\
-	x(bkey_snapshot_nonzero,				86)	\
-	x(bkey_snapshot_zero,					87)	\
-	x(bkey_at_pos_max,					88)	\
-	x(bkey_before_start_of_btree_node,			89)	\
-	x(bkey_after_end_of_btree_node,				90)	\
-	x(bkey_val_size_nonzero,				91)	\
-	x(bkey_val_size_too_small,				92)	\
-	x(alloc_v1_val_size_bad,				93)	\
-	x(alloc_v2_unpack_error,				94)	\
-	x(alloc_v3_unpack_error,				95)	\
-	x(alloc_v4_val_size_bad,				96)	\
-	x(alloc_v4_backpointers_start_bad,			97)	\
-	x(alloc_key_data_type_bad,				98)	\
-	x(alloc_key_empty_but_have_data,			99)	\
-	x(alloc_key_dirty_sectors_0,				100)	\
-	x(alloc_key_data_type_inconsistency,			101)	\
-	x(alloc_key_to_missing_dev_bucket,			102)	\
-	x(alloc_key_cached_inconsistency,			103)	\
-	x(alloc_key_cached_but_read_time_zero,			104)	\
-	x(alloc_key_to_missing_lru_entry,			105)	\
-	x(alloc_key_data_type_wrong,				106)	\
-	x(alloc_key_gen_wrong,					107)	\
-	x(alloc_key_dirty_sectors_wrong,			108)	\
-	x(alloc_key_cached_sectors_wrong,			109)	\
-	x(alloc_key_stripe_wrong,				110)	\
-	x(alloc_key_stripe_redundancy_wrong,			111)	\
-	x(bucket_sector_count_overflow,				112)	\
-	x(bucket_metadata_type_mismatch,			113)	\
-	x(need_discard_key_wrong,				114)	\
-	x(freespace_key_wrong,					115)	\
-	x(freespace_hole_missing,				116)	\
-	x(bucket_gens_val_size_bad,				117)	\
-	x(bucket_gens_key_wrong,				118)	\
-	x(bucket_gens_hole_wrong,				119)	\
-	x(bucket_gens_to_invalid_dev,				120)	\
-	x(bucket_gens_to_invalid_buckets,			121)	\
-	x(bucket_gens_nonzero_for_invalid_buckets,		122)	\
-	x(need_discard_freespace_key_to_invalid_dev_bucket,	123)	\
-	x(need_discard_freespace_key_bad,			124)	\
-	x(backpointer_bucket_offset_wrong,			125)	\
-	x(backpointer_to_missing_device,			126)	\
-	x(backpointer_to_missing_alloc,				127)	\
-	x(backpointer_to_missing_ptr,				128)	\
-	x(lru_entry_at_time_0,					129)	\
-	x(lru_entry_to_invalid_bucket,				130)	\
-	x(lru_entry_bad,					131)	\
-	x(btree_ptr_val_too_big,				132)	\
-	x(btree_ptr_v2_val_too_big,				133)	\
-	x(btree_ptr_has_non_ptr,				134)	\
-	x(extent_ptrs_invalid_entry,				135)	\
-	x(extent_ptrs_no_ptrs,					136)	\
-	x(extent_ptrs_too_many_ptrs,				137)	\
-	x(extent_ptrs_redundant_crc,				138)	\
-	x(extent_ptrs_redundant_stripe,				139)	\
-	x(extent_ptrs_unwritten,				140)	\
-	x(extent_ptrs_written_and_unwritten,			141)	\
-	x(ptr_to_invalid_device,				142)	\
-	x(ptr_to_duplicate_device,				143)	\
-	x(ptr_after_last_bucket,				144)	\
-	x(ptr_before_first_bucket,				145)	\
-	x(ptr_spans_multiple_buckets,				146)	\
-	x(ptr_to_missing_backpointer,				147)	\
-	x(ptr_to_missing_alloc_key,				148)	\
-	x(ptr_to_missing_replicas_entry,			149)	\
-	x(ptr_to_missing_stripe,				150)	\
-	x(ptr_to_incorrect_stripe,				151)	\
-	x(ptr_gen_newer_than_bucket_gen,			152)	\
-	x(ptr_too_stale,					153)	\
-	x(stale_dirty_ptr,					154)	\
-	x(ptr_bucket_data_type_mismatch,			155)	\
-	x(ptr_cached_and_erasure_coded,				156)	\
-	x(ptr_crc_uncompressed_size_too_small,			157)	\
-	x(ptr_crc_csum_type_unknown,				158)	\
-	x(ptr_crc_compression_type_unknown,			159)	\
-	x(ptr_crc_redundant,					160)	\
-	x(ptr_crc_uncompressed_size_too_big,			161)	\
-	x(ptr_crc_nonce_mismatch,				162)	\
-	x(ptr_stripe_redundant,					163)	\
-	x(reservation_key_nr_replicas_invalid,			164)	\
-	x(reflink_v_refcount_wrong,				165)	\
-	x(reflink_p_to_missing_reflink_v,			166)	\
-	x(stripe_pos_bad,					167)	\
-	x(stripe_val_size_bad,					168)	\
-	x(stripe_sector_count_wrong,				169)	\
-	x(snapshot_tree_pos_bad,				170)	\
-	x(snapshot_tree_to_missing_snapshot,			171)	\
-	x(snapshot_tree_to_missing_subvol,			172)	\
-	x(snapshot_tree_to_wrong_subvol,			173)	\
-	x(snapshot_tree_to_snapshot_subvol,			174)	\
-	x(snapshot_pos_bad,					175)	\
-	x(snapshot_parent_bad,					176)	\
-	x(snapshot_children_not_normalized,			177)	\
-	x(snapshot_child_duplicate,				178)	\
-	x(snapshot_child_bad,					179)	\
-	x(snapshot_skiplist_not_normalized,			180)	\
-	x(snapshot_skiplist_bad,				181)	\
-	x(snapshot_should_not_have_subvol,			182)	\
-	x(snapshot_to_bad_snapshot_tree,			183)	\
-	x(snapshot_bad_depth,					184)	\
-	x(snapshot_bad_skiplist,				185)	\
-	x(subvol_pos_bad,					186)	\
-	x(subvol_not_master_and_not_snapshot,			187)	\
-	x(subvol_to_missing_root,				188)	\
-	x(subvol_root_wrong_bi_subvol,				189)	\
-	x(bkey_in_missing_snapshot,				190)	\
-	x(inode_pos_inode_nonzero,				191)	\
-	x(inode_pos_blockdev_range,				192)	\
-	x(inode_unpack_error,					193)	\
-	x(inode_str_hash_invalid,				194)	\
-	x(inode_v3_fields_start_bad,				195)	\
-	x(inode_snapshot_mismatch,				196)	\
-	x(inode_unlinked_but_clean,				197)	\
-	x(inode_unlinked_but_nlink_nonzero,			198)	\
-	x(inode_checksum_type_invalid,				199)	\
-	x(inode_compression_type_invalid,			200)	\
-	x(inode_subvol_root_but_not_dir,			201)	\
-	x(inode_i_size_dirty_but_clean,				202)	\
-	x(inode_i_sectors_dirty_but_clean,			203)	\
-	x(inode_i_sectors_wrong,				204)	\
-	x(inode_dir_wrong_nlink,				205)	\
-	x(inode_dir_multiple_links,				206)	\
-	x(inode_multiple_links_but_nlink_0,			207)	\
-	x(inode_wrong_backpointer,				208)	\
-	x(inode_wrong_nlink,					209)	\
-	x(inode_unreachable,					210)	\
-	x(deleted_inode_but_clean,				211)	\
-	x(deleted_inode_missing,				212)	\
-	x(deleted_inode_is_dir,					213)	\
-	x(deleted_inode_not_unlinked,				214)	\
-	x(extent_overlapping,					215)	\
-	x(extent_in_missing_inode,				216)	\
-	x(extent_in_non_reg_inode,				217)	\
-	x(extent_past_end_of_inode,				218)	\
-	x(dirent_empty_name,					219)	\
-	x(dirent_val_too_big,					220)	\
-	x(dirent_name_too_long,					221)	\
-	x(dirent_name_embedded_nul,				222)	\
-	x(dirent_name_dot_or_dotdot,				223)	\
-	x(dirent_name_has_slash,				224)	\
-	x(dirent_d_type_wrong,					225)	\
-	x(inode_bi_parent_wrong,				226)	\
-	x(dirent_in_missing_dir_inode,				227)	\
-	x(dirent_in_non_dir_inode,				228)	\
-	x(dirent_to_missing_inode,				229)	\
-	x(dirent_to_missing_subvol,				230)	\
-	x(dirent_to_itself,					231)	\
-	x(quota_type_invalid,					232)	\
-	x(xattr_val_size_too_small,				233)	\
-	x(xattr_val_size_too_big,				234)	\
-	x(xattr_invalid_type,					235)	\
-	x(xattr_name_invalid_chars,				236)	\
-	x(xattr_in_missing_inode,				237)	\
-	x(root_subvol_missing,					238)	\
-	x(root_dir_missing,					239)	\
-	x(root_inode_not_dir,					240)	\
-	x(dir_loop,						241)	\
-	x(hash_table_key_duplicate,				242)	\
-	x(hash_table_key_wrong_offset,				243)	\
-	x(unlinked_inode_not_on_deleted_list,			244)	\
-	x(reflink_p_front_pad_bad,				245)	\
-	x(journal_entry_dup_same_device,			246)	\
-	x(inode_bi_subvol_missing,				247)	\
-	x(inode_bi_subvol_wrong,				248)	\
-	x(inode_points_to_missing_dirent,			249)	\
-	x(inode_points_to_wrong_dirent,				250)	\
-	x(inode_bi_parent_nonzero,				251)	\
-	x(dirent_to_missing_parent_subvol,			252)	\
-	x(dirent_not_visible_in_parent_subvol,			253)	\
-	x(subvol_fs_path_parent_wrong,				254)	\
-	x(subvol_root_fs_path_parent_nonzero,			255)	\
-	x(subvol_children_not_set,				256)	\
-	x(subvol_children_bad,					257)	\
-	x(subvol_loop,						258)	\
-	x(subvol_unreachable,					259)	\
-	x(btree_node_bkey_bad_u64s,				260)	\
-	x(btree_node_topology_empty_interior_node,		261)	\
-	x(btree_ptr_v2_min_key_bad,				262)	\
-	x(btree_root_unreadable_and_scan_found_nothing,		263)	\
-	x(snapshot_node_missing,				264)	\
-	x(dup_backpointer_to_bad_csum_extent,			265)	\
-	x(btree_bitmap_not_marked,				266)	\
-	x(sb_clean_entry_overrun,				267)	\
-	x(btree_ptr_v2_written_0,				268)	\
-	x(subvol_snapshot_bad,					269)	\
-	x(subvol_inode_bad,					270)
-
-enum bch_sb_error_id {
-#define x(t, n) BCH_FSCK_ERR_##t = n,
-	BCH_SB_ERRS()
-#undef x
-	BCH_SB_ERR_MAX
-};
-
 struct bch_sb_error_entry_cpu {
 	u64			id:16,
 				nr:48;
@@ -293,4 +13,3 @@ struct bch_sb_error_entry_cpu {
 typedef DARRAY(struct bch_sb_error_entry_cpu) bch_sb_errors_cpu;
 
 #endif /* _BCACHEFS_SB_ERRORS_TYPES_H */
-
-- 
cgit v1.2.3


From 8528bde1b66bab9a0abc2f521523abd00049c81b Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Tue, 28 May 2024 18:20:52 -0400
Subject: bcachefs: Fix uninitialized var warning

Can't actually be used uninitialized, but gcc was being silly.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/backpointers.c | 2 +-
 fs/bcachefs/buckets.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 692b1c7d5018..4321f9fb73bd 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -690,7 +690,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
 
 	ptrs = bch2_bkey_ptrs_c(k);
 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-		struct bpos bucket_pos;
+		struct bpos bucket_pos = POS_MIN;
 		struct bch_backpointer bp;
 
 		if (p.ptr.cached)
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index b469586517a8..ed97712d0db1 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -1134,7 +1134,7 @@ static int __trigger_extent(struct btree_trans *trans,
 	r.e.nr_required	= 1;
 
 	bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
-		s64 disk_sectors;
+		s64 disk_sectors = 0;
 		ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags);
 		if (ret < 0)
 			return ret;
-- 
cgit v1.2.3


From 33c563ebf8d3deed7d8addd20d77398ac737ef9a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 8 May 2024 22:50:34 +0200
Subject: netfilter: nft_payload: skbuff vlan metadata mangle support

Userspace assumes vlan header is present at a given offset, but vlan
offload allows to store this in metadata fields of the skbuff. Hence
mangling vlan results in a garbled packet. Handle this transparently by
adding a parser to the kernel.

If vlan metadata is present and payload offset is over 12 bytes (source
and destination mac address fields), then subtract vlan header present
in vlan metadata, otherwise mangle vlan metadata based on offset and
length, extracting data from the source register.

This is similar to:

  8cfd23e67401 ("netfilter: nft_payload: work around vlan header stripping")

to deal with vlan payload mangling.

Fixes: 7ec3f7b47b8d ("netfilter: nft_payload: add packet mangling support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_payload.c | 72 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 65 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index a3cb5dbcb362..0c43d748e23a 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -145,12 +145,12 @@ int nft_payload_inner_offset(const struct nft_pktinfo *pkt)
 	return pkt->inneroff;
 }
 
-static bool nft_payload_need_vlan_copy(const struct nft_payload *priv)
+static bool nft_payload_need_vlan_adjust(u32 offset, u32 len)
 {
-	unsigned int len = priv->offset + priv->len;
+	unsigned int boundary = offset + len;
 
 	/* data past ether src/dst requested, copy needed */
-	if (len > offsetof(struct ethhdr, h_proto))
+	if (boundary > offsetof(struct ethhdr, h_proto))
 		return true;
 
 	return false;
@@ -174,7 +174,7 @@ void nft_payload_eval(const struct nft_expr *expr,
 			goto err;
 
 		if (skb_vlan_tag_present(skb) &&
-		    nft_payload_need_vlan_copy(priv)) {
+		    nft_payload_need_vlan_adjust(priv->offset, priv->len)) {
 			if (!nft_payload_copy_vlan(dest, skb,
 						   priv->offset, priv->len))
 				goto err;
@@ -801,21 +801,79 @@ struct nft_payload_set {
 	u8			csum_flags;
 };
 
+/* This is not struct vlan_hdr. */
+struct nft_payload_vlan_hdr {
+	__be16			h_vlan_proto;
+	__be16			h_vlan_TCI;
+};
+
+static bool
+nft_payload_set_vlan(const u32 *src, struct sk_buff *skb, u8 offset, u8 len,
+		     int *vlan_hlen)
+{
+	struct nft_payload_vlan_hdr *vlanh;
+	__be16 vlan_proto;
+	u16 vlan_tci;
+
+	if (offset >= offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto)) {
+		*vlan_hlen = VLAN_HLEN;
+		return true;
+	}
+
+	switch (offset) {
+	case offsetof(struct vlan_ethhdr, h_vlan_proto):
+		if (len == 2) {
+			vlan_proto = nft_reg_load_be16(src);
+			skb->vlan_proto = vlan_proto;
+		} else if (len == 4) {
+			vlanh = (struct nft_payload_vlan_hdr *)src;
+			__vlan_hwaccel_put_tag(skb, vlanh->h_vlan_proto,
+					       ntohs(vlanh->h_vlan_TCI));
+		} else {
+			return false;
+		}
+		break;
+	case offsetof(struct vlan_ethhdr, h_vlan_TCI):
+		if (len != 2)
+			return false;
+
+		vlan_tci = ntohs(nft_reg_load_be16(src));
+		skb->vlan_tci = vlan_tci;
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 static void nft_payload_set_eval(const struct nft_expr *expr,
 				 struct nft_regs *regs,
 				 const struct nft_pktinfo *pkt)
 {
 	const struct nft_payload_set *priv = nft_expr_priv(expr);
-	struct sk_buff *skb = pkt->skb;
 	const u32 *src = &regs->data[priv->sreg];
-	int offset, csum_offset;
+	int offset, csum_offset, vlan_hlen = 0;
+	struct sk_buff *skb = pkt->skb;
 	__wsum fsum, tsum;
 
 	switch (priv->base) {
 	case NFT_PAYLOAD_LL_HEADER:
 		if (!skb_mac_header_was_set(skb))
 			goto err;
-		offset = skb_mac_header(skb) - skb->data;
+
+		if (skb_vlan_tag_present(skb) &&
+		    nft_payload_need_vlan_adjust(priv->offset, priv->len)) {
+			if (!nft_payload_set_vlan(src, skb,
+						  priv->offset, priv->len,
+						  &vlan_hlen))
+				goto err;
+
+			if (!vlan_hlen)
+				return;
+		}
+
+		offset = skb_mac_header(skb) - skb->data - vlan_hlen;
 		break;
 	case NFT_PAYLOAD_NETWORK_HEADER:
 		offset = skb_network_offset(skb);
-- 
cgit v1.2.3


From 21a673bddc8fd4873c370caf9ae70ffc6d47e8d3 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 13 May 2024 12:27:15 +0200
Subject: netfilter: tproxy: bail out if IP has been disabled on the device

syzbot reports:
general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN PTI
KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f]
[..]
RIP: 0010:nf_tproxy_laddr4+0xb7/0x340 net/ipv4/netfilter/nf_tproxy_ipv4.c:62
Call Trace:
 nft_tproxy_eval_v4 net/netfilter/nft_tproxy.c:56 [inline]
 nft_tproxy_eval+0xa9a/0x1a00 net/netfilter/nft_tproxy.c:168

__in_dev_get_rcu() can return NULL, so check for this.

Reported-and-tested-by: syzbot+b94a6818504ea90d7661@syzkaller.appspotmail.com
Fixes: cc6eb4338569 ("tproxy: use the interface primary IP address as a default value for --on-ip")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_tproxy_ipv4.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
index 69e331799604..73e66a088e25 100644
--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -58,6 +58,8 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
 
 	laddr = 0;
 	indev = __in_dev_get_rcu(skb->dev);
+	if (!indev)
+		return daddr;
 
 	in_dev_for_each_ifa_rcu(ifa, indev) {
 		if (ifa->ifa_flags & IFA_F_SECONDARY)
-- 
cgit v1.2.3


From e8ded22ef0f4831279c363c264cd41cd9d59ca9e Mon Sep 17 00:00:00 2001
From: Eric Garver <eric@garver.life>
Date: Tue, 21 May 2024 10:25:05 -0400
Subject: netfilter: nft_fib: allow from forward/input without iif selector

This removes the restriction of needing iif selector in the
forward/input hooks for fib lookups when requested result is
oif/oifname.

Removing this restriction allows "loose" lookups from the forward hooks.

Fixes: be8be04e5ddb ("netfilter: nft_fib: reverse path filter for policy-based routing on iif")
Signed-off-by: Eric Garver <eric@garver.life>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_fib.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 37cfe6dd712d..b58f62195ff3 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -35,11 +35,9 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	switch (priv->result) {
 	case NFT_FIB_RESULT_OIF:
 	case NFT_FIB_RESULT_OIFNAME:
-		hooks = (1 << NF_INET_PRE_ROUTING);
-		if (priv->flags & NFTA_FIB_F_IIF) {
-			hooks |= (1 << NF_INET_LOCAL_IN) |
-				 (1 << NF_INET_FORWARD);
-		}
+		hooks = (1 << NF_INET_PRE_ROUTING) |
+			(1 << NF_INET_LOCAL_IN) |
+			(1 << NF_INET_FORWARD);
 		break;
 	case NFT_FIB_RESULT_ADDRTYPE:
 		if (priv->flags & NFTA_FIB_F_IIF)
-- 
cgit v1.2.3


From d55510527153d17a3af8cc2df69c04f95ae1350d Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 28 May 2024 15:55:51 -0700
Subject: cxl/test: Add missing vmalloc.h for tools/testing/cxl/test/mem.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

tools/testing/cxl/test/mem.c uses vmalloc() and vfree() but does not
include linux/vmalloc.h. Kernel v6.10 made changes that causes the
currently included headers not depend on vmalloc.h and therefore
mem.c can no longer compile. Add linux/vmalloc.h to fix compile
issue.

  CC [M]  tools/testing/cxl/test/mem.o
tools/testing/cxl/test/mem.c: In function ‘label_area_release’:
tools/testing/cxl/test/mem.c:1428:9: error: implicit declaration of function ‘vfree’; did you mean ‘kvfree’? [-Werror=implicit-function-declaration]
 1428 |         vfree(lsa);
      |         ^~~~~
      |         kvfree
tools/testing/cxl/test/mem.c: In function ‘cxl_mock_mem_probe’:
tools/testing/cxl/test/mem.c:1466:22: error: implicit declaration of function ‘vmalloc’; did you mean ‘kmalloc’? [-Werror=implicit-function-declaration]
 1466 |         mdata->lsa = vmalloc(LSA_SIZE);
      |                      ^~~~~~~
      |                      kmalloc

Fixes: 7d3eb23c4ccf ("tools/testing/cxl: Introduce a mock memory device + driver")
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Alison Schofield <alison.schofield@intel.com>
Link: https://lore.kernel.org/r/20240528225551.1025977-1-dave.jiang@intel.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 tools/testing/cxl/test/mem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 6584443144de..eaf091a3d331 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -3,6 +3,7 @@
 
 #include <linux/platform_device.h>
 #include <linux/mod_devicetable.h>
+#include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/sizes.h>
-- 
cgit v1.2.3


From 49ba7b515c4c0719b866d16f068e62d16a8a3dd1 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Tue, 7 May 2024 13:34:21 +0800
Subject: cxl/region: Fix memregion leaks in devm_cxl_add_region()

Move the mode verification to __create_region() before allocating the
memregion to avoid the memregion leaks.

Fixes: 6e099264185d ("cxl/region: Add volatile region creation support")
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://lore.kernel.org/r/20240507053421.456439-1-lizhijian@fujitsu.com
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/cxl/core/region.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 00a9f0eef8dd..3c2b6144be23 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2352,15 +2352,6 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
 	struct device *dev;
 	int rc;
 
-	switch (mode) {
-	case CXL_DECODER_RAM:
-	case CXL_DECODER_PMEM:
-		break;
-	default:
-		dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
-		return ERR_PTR(-EINVAL);
-	}
-
 	cxlr = cxl_region_alloc(cxlrd, id);
 	if (IS_ERR(cxlr))
 		return cxlr;
@@ -2415,6 +2406,15 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
 {
 	int rc;
 
+	switch (mode) {
+	case CXL_DECODER_RAM:
+	case CXL_DECODER_PMEM:
+		break;
+	default:
+		dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode);
+		return ERR_PTR(-EINVAL);
+	}
+
 	rc = memregion_alloc(GFP_KERNEL);
 	if (rc < 0)
 		return ERR_PTR(rc);
-- 
cgit v1.2.3


From 83208cbf2f08c270033003e10f3e7351de64a5c5 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Tue, 28 May 2024 19:21:59 -0400
Subject: bcachefs: Don't return -EROFS from mount on inconsistency error

We were accidentally returning -EROFS during recovery on filesystem
inconsistency - since this is what the journal returns on emergency
shutdown.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/fs.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 96040a95cf46..cd388f1702dc 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1939,8 +1939,7 @@ got_sb:
 
 	if (IS_ERR(sb)) {
 		ret = PTR_ERR(sb);
-		ret = bch2_err_class(ret);
-		return ERR_PTR(ret);
+		goto err;
 	}
 
 	c = sb->s_fs_info;
@@ -2016,6 +2015,15 @@ out:
 err_put_super:
 	__bch2_fs_stop(c);
 	deactivate_locked_super(sb);
+err:
+	/*
+	 * On an inconsistency error in recovery we might see an -EROFS derived
+	 * errorcode (from the journal), but we don't want to return that to
+	 * userspace as that causes util-linux to retry the mount RO - which is
+	 * confusing:
+	 */
+	if (bch2_err_matches(ret, EROFS) && ret != -EROFS)
+		ret = -EIO;
 	return ERR_PTR(bch2_err_class(ret));
 }
 
-- 
cgit v1.2.3


From e634134180885574d1fe7aa162777ba41e7fcd5b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 27 May 2024 18:39:54 +0300
Subject: net/sched: taprio: make q->picos_per_byte available to
 fill_sched_entry()

In commit b5b73b26b3ca ("taprio: Fix allowing too small intervals"), a
comparison of user input against length_to_duration(q, ETH_ZLEN) was
introduced, to avoid RCU stalls due to frequent hrtimers.

The implementation of length_to_duration() depends on q->picos_per_byte
being set for the link speed. The blamed commit in the Fixes: tag has
moved this too late, so the checks introduced above are ineffective.
The q->picos_per_byte is zero at parse_taprio_schedule() ->
parse_sched_list() -> parse_sched_entry() -> fill_sched_entry() time.

Move the taprio_set_picos_per_byte() call as one of the first things in
taprio_change(), before the bulk of the netlink attribute parsing is
done. That's because it is needed there.

Add a selftest to make sure the issue doesn't get reintroduced.

Fixes: 09dbdf28f9f9 ("net/sched: taprio: fix calculation of maximum gate durations")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240527153955.553333-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/sch_taprio.c                             |  4 +++-
 .../tc-testing/tc-tests/qdiscs/taprio.json         | 22 ++++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 1ab17e8a7260..118915055360 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1848,6 +1848,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 	q->flags = taprio_flags;
 
+	/* Needed for length_to_duration() during netlink attribute parsing */
+	taprio_set_picos_per_byte(dev, q);
+
 	err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
 	if (err < 0)
 		return err;
@@ -1907,7 +1910,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
 	if (err < 0)
 		goto free_sched;
 
-	taprio_set_picos_per_byte(dev, q);
 	taprio_update_queue_max_sdu(q, new_admin, stab);
 
 	if (FULL_OFFLOAD_IS_ENABLED(q->flags))
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
index 12da0a939e3e..8f12f00a4f57 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
@@ -132,6 +132,28 @@
             "echo \"1\" > /sys/bus/netdevsim/del_device"
         ]
     },
+    {
+        "id": "6f62",
+        "name": "Add taprio Qdisc with too short interval",
+        "category": [
+            "qdisc",
+            "taprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 2 queues 1@0 1@1 sched-entry S 01 300 sched-entry S 02 1700 clockid CLOCK_TAI",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc taprio 1: root refcnt",
+        "matchCount": "0",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
     {
         "id": "3e1e",
         "name": "Add taprio Qdisc with an invalid cycle-time",
-- 
cgit v1.2.3


From fb66df20a7201e60f2b13d7f95d031b31a8831d3 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 27 May 2024 18:39:55 +0300
Subject: net/sched: taprio: extend minimum interval restriction to entire
 cycle too

It is possible for syzbot to side-step the restriction imposed by the
blamed commit in the Fixes: tag, because the taprio UAPI permits a
cycle-time different from (and potentially shorter than) the sum of
entry intervals.

We need one more restriction, which is that the cycle time itself must
be larger than N * ETH_ZLEN bit times, where N is the number of schedule
entries. This restriction needs to apply regardless of whether the cycle
time came from the user or was the implicit, auto-calculated value, so
we move the existing "cycle == 0" check outside the "if "(!new->cycle_time)"
branch. This way covers both conditions and scenarios.

Add a selftest which illustrates the issue triggered by syzbot.

Fixes: b5b73b26b3ca ("taprio: Fix allowing too small intervals")
Reported-by: syzbot+a7d2b1d5d1af83035567@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/0000000000007d66bc06196e7c66@google.com/
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20240527153955.553333-2-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/sch_taprio.c                             | 10 +++++-----
 .../tc-testing/tc-tests/qdiscs/taprio.json         | 22 ++++++++++++++++++++++
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 118915055360..937a0c513c17 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1151,11 +1151,6 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
 		list_for_each_entry(entry, &new->entries, list)
 			cycle = ktime_add_ns(cycle, entry->interval);
 
-		if (!cycle) {
-			NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
-			return -EINVAL;
-		}
-
 		if (cycle < 0 || cycle > INT_MAX) {
 			NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
 			return -EINVAL;
@@ -1164,6 +1159,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
 		new->cycle_time = cycle;
 	}
 
+	if (new->cycle_time < new->num_entries * length_to_duration(q, ETH_ZLEN)) {
+		NL_SET_ERR_MSG(extack, "'cycle_time' is too small");
+		return -EINVAL;
+	}
+
 	taprio_calculate_gate_durations(q, new);
 
 	return 0;
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
index 8f12f00a4f57..557fb074acf0 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
@@ -154,6 +154,28 @@
             "echo \"1\" > /sys/bus/netdevsim/del_device"
         ]
     },
+    {
+        "id": "831f",
+        "name": "Add taprio Qdisc with too short cycle-time",
+        "category": [
+            "qdisc",
+            "taprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 2 queues 1@0 1@1 sched-entry S 01 200000 sched-entry S 02 200000 cycle-time 100 clockid CLOCK_TAI",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc taprio 1: root refcnt",
+        "matchCount": "0",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
     {
         "id": "3e1e",
         "name": "Add taprio Qdisc with an invalid cycle-time",
-- 
cgit v1.2.3


From a607468b521cc99ca64f19947cb7a40f8c814730 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 19 May 2024 13:24:30 +0900
Subject: kconfig: remove unused expr_is_no()

This has not been used since commit e911503085ae ("Kconfig: Remove
bad inference rules expr_eliminate_dups2()").

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/expr.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index d965e427753e..fa50fc45622e 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -302,11 +302,6 @@ static inline int expr_is_yes(struct expr *e)
 	return !e || (e->type == E_SYMBOL && e->left.sym == &symbol_yes);
 }
 
-static inline int expr_is_no(struct expr *e)
-{
-	return e && (e->type == E_SYMBOL && e->left.sym == &symbol_no);
-}
-
 #ifdef __cplusplus
 }
 #endif
-- 
cgit v1.2.3


From aabdc960a283ba78086b0bf66ee74326f49e218e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 19 May 2024 18:22:27 +0900
Subject: kconfig: fix comparison to constant symbols, 'm', 'n'

Currently, comparisons to 'm' or 'n' result in incorrect output.

[Test Code]

    config MODULES
            def_bool y
            modules

    config A
            def_tristate m

    config B
            def_bool A > n

CONFIG_B is unset, while CONFIG_B=y is expected.

The reason for the issue is because Kconfig compares the tristate values
as strings.

Currently, the .type fields in the constant symbol definitions,
symbol_{yes,mod,no} are unspecified, i.e., S_UNKNOWN.

When expr_calc_value() evaluates 'A > n', it checks the types of 'A' and
'n' to determine how to compare them.

The left-hand side, 'A', is a tristate symbol with a value of 'm', which
corresponds to a numeric value of 1. (Internally, 'y', 'm', and 'n' are
represented as 2, 1, and 0, respectively.)

The right-hand side, 'n', has an unknown type, so it is treated as the
string "n" during the comparison.

expr_calc_value() compares two values numerically only when both can
have numeric values. Otherwise, they are compared as strings.

    symbol    numeric value    ASCII code
    -------------------------------------
      y           2             0x79
      m           1             0x6d
      n           0             0x6e

'm' is greater than 'n' if compared numerically (since 1 is greater
than 0), but smaller than 'n' if compared as strings (since the ASCII
code 0x6d is smaller than 0x6e).

Specifying .type=S_TRISTATE for symbol_{yes,mod,no} fixes the above
test code.

Doing so, however, would cause a regression to the following test code.

[Test Code 2]

    config MODULES
            def_bool n
            modules

    config A
            def_tristate n

    config B
            def_bool A = m

You would get CONFIG_B=y, while CONFIG_B should not be set.

The reason is because sym_get_string_value() turns 'm' into 'n' when the
module feature is disabled. Consequently, expr_calc_value() evaluates
'A = n' instead of 'A = m'. This oddity has been hidden because the type
of 'm' was previously S_UNKNOWN instead of S_TRISTATE.

sym_get_string_value() should not tweak the string because the tristate
value has already been correctly calculated. There is no reason to
return the string "n" where its tristate value is mod.

Fixes: 31847b67bec0 ("kconfig: allow use of relations other than (in)equality")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/symbol.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c
index aa0e25ee5119..0e439d3d48d1 100644
--- a/scripts/kconfig/symbol.c
+++ b/scripts/kconfig/symbol.c
@@ -14,6 +14,7 @@
 
 struct symbol symbol_yes = {
 	.name = "y",
+	.type = S_TRISTATE,
 	.curr = { "y", yes },
 	.menus = LIST_HEAD_INIT(symbol_yes.menus),
 	.flags = SYMBOL_CONST|SYMBOL_VALID,
@@ -21,6 +22,7 @@ struct symbol symbol_yes = {
 
 struct symbol symbol_mod = {
 	.name = "m",
+	.type = S_TRISTATE,
 	.curr = { "m", mod },
 	.menus = LIST_HEAD_INIT(symbol_mod.menus),
 	.flags = SYMBOL_CONST|SYMBOL_VALID,
@@ -28,6 +30,7 @@ struct symbol symbol_mod = {
 
 struct symbol symbol_no = {
 	.name = "n",
+	.type = S_TRISTATE,
 	.curr = { "n", no },
 	.menus = LIST_HEAD_INIT(symbol_no.menus),
 	.flags = SYMBOL_CONST|SYMBOL_VALID,
@@ -820,8 +823,7 @@ const char *sym_get_string_value(struct symbol *sym)
 		case no:
 			return "n";
 		case mod:
-			sym_calc_value(modules_sym);
-			return (modules_sym->curr.tri == no) ? "n" : "m";
+			return "m";
 		case yes:
 			return "y";
 		}
-- 
cgit v1.2.3


From 31894d35b51ba61e2931cbf28e80114a4f72bc2b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 19 May 2024 22:34:02 +0900
Subject: kconfig: remove redundant check in expr_join_or()

The check for 'sym1 == sym2' is redundant here because it has already
been done a few lines above:

    if (sym1 != sym2)
            return NULL;

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/expr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c
index a290de36307b..4d95fce5f9a7 100644
--- a/scripts/kconfig/expr.c
+++ b/scripts/kconfig/expr.c
@@ -476,7 +476,7 @@ static struct expr *expr_join_or(struct expr *e1, struct expr *e2)
 			return expr_alloc_comp(E_UNEQUAL, sym1, &symbol_yes);
 		}
 	}
-	if (sym1->type == S_BOOLEAN && sym1 == sym2) {
+	if (sym1->type == S_BOOLEAN) {
 		if ((e1->type == E_NOT && e1->left.expr->type == E_SYMBOL && e2->type == E_SYMBOL) ||
 		    (e2->type == E_NOT && e2->left.expr->type == E_SYMBOL && e1->type == E_SYMBOL))
 			return expr_alloc_symbol(&symbol_yes);
-- 
cgit v1.2.3


From 659bbf7e1b08267b8e1dd900b316edcb6f6d9e2e Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Mon, 20 May 2024 12:56:52 -0700
Subject: kbuild: scripts/gdb: Replace missed $(srctree)/$(src) w/ $(src)

Recently we went through the source tree and replaced
$(srctree)/$(src) w/ $(src). However, the gdb scripts Makefile had a
hidden $(srctree)/$(src) that looked like this:

  $(abspath $(srctree))/$(src)

Because we missed that then my installed kernel had symlinks that
looked like this:

  __init__.py ->
    ${INSTALL_DIR}/$(INSTALL_DIR}/scripts/gdb/linux/__init__.py

Let's also replace the midden $(abspath $(srctree))/$(src) with
$(src). Now:

  __init__.py ->
    $(INSTALL_DIR}/scripts/gdb/linux/__init__.py

Fixes: b1992c3772e6 ("kbuild: use $(src) instead of $(srctree)/$(src) for source directory")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/gdb/linux/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/gdb/linux/Makefile b/scripts/gdb/linux/Makefile
index d77ad9079d0f..fd1402c0a1a1 100644
--- a/scripts/gdb/linux/Makefile
+++ b/scripts/gdb/linux/Makefile
@@ -5,7 +5,7 @@ ifdef building_out_of_srctree
 symlinks := $(patsubst $(src)/%,%,$(wildcard $(src)/*.py))
 
 quiet_cmd_symlink = SYMLINK $@
-      cmd_symlink = ln -fsn $(patsubst $(obj)/%,$(abspath $(srctree))/$(src)/%,$@) $@
+      cmd_symlink = ln -fsn $(patsubst $(obj)/%,$(src)/%,$@) $@
 
 always-y += $(symlinks)
 $(addprefix $(obj)/, $(symlinks)): FORCE
-- 
cgit v1.2.3


From 04b8cb0945b4bf679c71dc2351e0d3c25481e3c6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 20 May 2024 21:42:09 +0900
Subject: kbuild: avoid unneeded kallsyms step 3

Since commit 951bcae6c5a0 ("kallsyms: Avoid weak references for kallsyms
symbols"), the kallsyms step 3 always occurs.

You can compare the build logs.

[Before 951bcae6c5a0]

  $ git checkout 951bcae6c5a0^
  $ make defconfig all
       [ snip ]
    LD      .tmp_vmlinux.kallsyms1
    NM      .tmp_vmlinux.kallsyms1.syms
    KSYMS   .tmp_vmlinux.kallsyms1.S
    AS      .tmp_vmlinux.kallsyms1.S
    LD      .tmp_vmlinux.kallsyms2
    NM      .tmp_vmlinux.kallsyms2.syms
    KSYMS   .tmp_vmlinux.kallsyms2.S
    AS      .tmp_vmlinux.kallsyms2.S
    LD      vmlinux

[After 951bcae6c5a0]

  $ git checkout 951bcae6c5a0
  $ make defconfig all
       [ snip ]
    LD      .tmp_vmlinux.kallsyms1
    NM      .tmp_vmlinux.kallsyms1.syms
    KSYMS   .tmp_vmlinux.kallsyms1.S
    AS      .tmp_vmlinux.kallsyms1.S
    LD      .tmp_vmlinux.kallsyms2
    NM      .tmp_vmlinux.kallsyms2.syms
    KSYMS   .tmp_vmlinux.kallsyms2.S
    AS      .tmp_vmlinux.kallsyms2.S
    LD      .tmp_vmlinux.kallsyms3             # should not happen
    NM      .tmp_vmlinux.kallsyms3.syms        # should not happen
    KSYMS   .tmp_vmlinux.kallsyms3.S           # should not happen
    AS      .tmp_vmlinux.kallsyms3.S           # should not happen
    LD      vmlinux

The resulting vmlinux is correct, but it always requires an additional
linking step.

The symbols produced by kallsyms are excluded from kallsyms itself
because they were previously missing in step 1. With those symbols
excluded, the symbol lists matched between step 1 and step 2,
eliminating the need for step 3. Now, this has a negative effect.

Since 951bcae6c5a0, the PROVIDE() directives provide the fallback
definitions, which are not trimmed from the sysbol list in step 1
because ${kallsymso_prev} is empty at this point.

In step 2, ${kallsymso_prev} is set, and the kallsyms_* symbols are
trimmed from the symbol list.

Due to the table size difference between step 1 and step 2 (the former
is larger due to the presence of kallsyms_*), step 3 is triggered.

Now that the kallsyms_* symbols are always linked, let's stop omitting
them from kallsyms. This avoids unnecessary step 3.

Fixes: 951bcae6c5a0 ("kallsyms: Avoid weak references for kallsyms symbols")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/link-vmlinux.sh |  6 +++---
 scripts/mksysmap        | 11 +----------
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 7862a8101747..b0d39a927fbc 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -179,7 +179,7 @@ kallsyms_step()
 	kallsyms_S=${kallsyms_vmlinux}.S
 
 	vmlinux_link ${kallsyms_vmlinux} "${kallsymso_prev}" ${btf_vmlinux_bin_o}
-	mksysmap ${kallsyms_vmlinux} ${kallsyms_vmlinux}.syms ${kallsymso_prev}
+	mksysmap ${kallsyms_vmlinux} ${kallsyms_vmlinux}.syms
 	kallsyms ${kallsyms_vmlinux}.syms ${kallsyms_S}
 
 	info AS ${kallsyms_S}
@@ -193,7 +193,7 @@ kallsyms_step()
 mksysmap()
 {
 	info NM ${2}
-	${CONFIG_SHELL} "${srctree}/scripts/mksysmap" ${1} ${2} ${3}
+	${CONFIG_SHELL} "${srctree}/scripts/mksysmap" ${1} ${2}
 }
 
 sorttable()
@@ -282,7 +282,7 @@ if is_enabled CONFIG_DEBUG_INFO_BTF && is_enabled CONFIG_BPF; then
 	${RESOLVE_BTFIDS} vmlinux
 fi
 
-mksysmap vmlinux System.map ${kallsymso}
+mksysmap vmlinux System.map
 
 if is_enabled CONFIG_BUILDTIME_TABLE_SORT; then
 	info SORTTAB vmlinux
diff --git a/scripts/mksysmap b/scripts/mksysmap
index 57ff5656d566..e46bafe333bd 100755
--- a/scripts/mksysmap
+++ b/scripts/mksysmap
@@ -4,7 +4,7 @@
 # tools to retrieve the actual addresses of symbols in the kernel.
 #
 # Usage
-# mksysmap vmlinux System.map [exclude]
+# mksysmap vmlinux System.map
 
 
 #####
@@ -92,13 +92,4 @@ ${NM} -n ${1} | sed >${2} -e "
 # ppc stub
 /\.long_branch\./d
 /\.plt_branch\./d
-
-# ---------------------------------------------------------------------------
-# Ignored kallsyms symbols
-#
-# If the 3rd parameter exists, symbols from it will be omitted from the output.
-# This makes kallsyms have the identical symbol lists in the step 1 and 2.
-# Without this, the step2 would get new symbols generated by scripts/kallsyms.c
-# when CONFIG_KALLSYMS_ALL is enabled. That might require one more pass.
-$(if [ $# -ge 3 ]; then ${NM} ${3} | sed -n '/ U /!s:.* \([^ ]*\)$:/ \1$/d:p'; fi)
 "
-- 
cgit v1.2.3


From b18b047002b7d3b19d9fb905c1bd2a214016c153 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 20 May 2024 21:42:10 +0900
Subject: kbuild: change scripts/mksysmap into sed script

The previous commit removed the subshell execution from scripts/mksysmap,
which is now simple enough to become a sed script.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/link-vmlinux.sh |  2 +-
 scripts/mksysmap        | 19 ++++++-------------
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index b0d39a927fbc..c22a213ea6a9 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -193,7 +193,7 @@ kallsyms_step()
 mksysmap()
 {
 	info NM ${2}
-	${CONFIG_SHELL} "${srctree}/scripts/mksysmap" ${1} ${2}
+	${NM} -n "${1}" | "${srctree}/scripts/mksysmap" > "${2}"
 }
 
 sorttable()
diff --git a/scripts/mksysmap b/scripts/mksysmap
index e46bafe333bd..c12723a04655 100755
--- a/scripts/mksysmap
+++ b/scripts/mksysmap
@@ -1,22 +1,16 @@
-#!/bin/sh -x
-# Based on the vmlinux file create the System.map file
+#!/bin/sed -f
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# sed script to filter out symbols that are not needed for System.map,
+# or not suitable for kallsyms. The input should be 'nm -n <file>'.
+#
 # System.map is used by module-init tools and some debugging
 # tools to retrieve the actual addresses of symbols in the kernel.
 #
-# Usage
-# mksysmap vmlinux System.map
-
-
-#####
-# Generate System.map (actual filename passed as second argument)
-# The following refers to the symbol type as per nm(1).
-
 # readprofile starts reading symbols when _stext is found, and
 # continue until it finds a symbol which is not either of 'T', 't',
 # 'W' or 'w'.
 #
-
-${NM} -n ${1} | sed >${2} -e "
 # ---------------------------------------------------------------------------
 # Ignored symbol types
 #
@@ -92,4 +86,3 @@ ${NM} -n ${1} | sed >${2} -e "
 # ppc stub
 /\.long_branch\./d
 /\.plt_branch\./d
-"
-- 
cgit v1.2.3


From 3430f65d6130ccbc86f0ff45642eeb9e2032a600 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 20 May 2024 21:42:11 +0900
Subject: kbuild: fix short log for AS in link-vmlinux.sh

In convention, short logs print the output file, not the input file.

Let's change the suffix for 'AS' since it assembles *.S into *.o.

[Before]

  LD      .tmp_vmlinux.kallsyms1
  NM      .tmp_vmlinux.kallsyms1.syms
  KSYMS   .tmp_vmlinux.kallsyms1.S
  AS      .tmp_vmlinux.kallsyms1.S
  LD      .tmp_vmlinux.kallsyms2
  NM      .tmp_vmlinux.kallsyms2.syms
  KSYMS   .tmp_vmlinux.kallsyms2.S
  AS      .tmp_vmlinux.kallsyms2.S
  LD      vmlinux

[After]

  LD      .tmp_vmlinux.kallsyms1
  NM      .tmp_vmlinux.kallsyms1.syms
  KSYMS   .tmp_vmlinux.kallsyms1.S
  AS      .tmp_vmlinux.kallsyms1.o
  LD      .tmp_vmlinux.kallsyms2
  NM      .tmp_vmlinux.kallsyms2.syms
  KSYMS   .tmp_vmlinux.kallsyms2.S
  AS      .tmp_vmlinux.kallsyms2.o
  LD      vmlinux

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/link-vmlinux.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index c22a213ea6a9..7aca51b24e9f 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -182,7 +182,7 @@ kallsyms_step()
 	mksysmap ${kallsyms_vmlinux} ${kallsyms_vmlinux}.syms
 	kallsyms ${kallsyms_vmlinux}.syms ${kallsyms_S}
 
-	info AS ${kallsyms_S}
+	info AS ${kallsymso}
 	${CC} ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS} \
 	      ${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \
 	      -c -o ${kallsymso} ${kallsyms_S}
-- 
cgit v1.2.3


From 3c562a70cf4da331baef60ebb3f0e30b254006e9 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Wed, 22 May 2024 19:43:11 +0900
Subject: kbuild: remove a stale comment about cleaning in link-vmlinux.sh

Remove the left-over of commit 51eb95e2da41 ("kbuild: Don't remove
link-vmlinux temporary files on exit/signal").

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/link-vmlinux.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 7aca51b24e9f..46ce5d04dbeb 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -201,7 +201,6 @@ sorttable()
 	${objtree}/scripts/sorttable ${1}
 }
 
-# Delete output files in case of error
 cleanup()
 {
 	rm -f .btf.*
-- 
cgit v1.2.3


From e06a698ae62b9ee5ca98e65be2c90a61464192e6 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Tue, 28 May 2024 16:52:18 +0800
Subject: scripts/make_fit: Drop fdt image entry compatible string

According to the FIT image source file format document found in U-boot [1]
and the split-out FIT image specification [2], under "'/images' node" ->
"Conditionally mandatory property", the "compatible" property is described
as "compatible method for loading image", i.e., not the compatible string
embedded in the FDT or used for matching.

Drop the compatible string from the fdt image entry node.

While at it also fix up a typo in the document section of output_dtb.

[1] U-boot source "doc/usage/fit/source_file_format.rst", or on the website:
    https://docs.u-boot.org/en/latest/usage/fit/source_file_format.html
[2] https://github.com/open-source-firmware/flat-image-tree/blob/main/source/chapter2-source-file-format.rst

Fixes: 7a23b027ec17 ("arm64: boot: Support Flat Image Tree")
Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/make_fit.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/make_fit.py b/scripts/make_fit.py
index 3de90c5a094b..263147df80a4 100755
--- a/scripts/make_fit.py
+++ b/scripts/make_fit.py
@@ -190,7 +190,7 @@ def output_dtb(fsw, seq, fname, arch, compress):
     Args:
         fsw (libfdt.FdtSw): Object to use for writing
         seq (int): Sequence number (1 for first)
-        fmame (str): Filename containing the DTB
+        fname (str): Filename containing the DTB
         arch: FIT architecture, e.g. 'arm64'
         compress (str): Compressed algorithm, e.g. 'gzip'
 
@@ -211,7 +211,6 @@ def output_dtb(fsw, seq, fname, arch, compress):
         fsw.property_string('type', 'flat_dt')
         fsw.property_string('arch', arch)
         fsw.property_string('compression', compress)
-        fsw.property('compatible', bytes(compat))
 
         with open(fname, 'rb') as inf:
             compressed = compress_data(inf, compress)
-- 
cgit v1.2.3


From 647535760a00a854c185dd4d7e6eccfea30ea0d5 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Date: Mon, 6 May 2024 20:02:50 +0200
Subject: Revert "drm/i915: Remove extra multi-gt pm-references"

This reverts commit 1f33dc0c1189efb9ae19c6fc22b64dd3e26261fb.

There was a patch supposed to fix an issue of illegal attempts to free a
still active i915 VMA object when parking a GT believed to be idle,
reported by CI on 2-GT Meteor Lake.  As a solution, an extra wakeref for
a Primary GT was acquired from i915_gem_do_execbuffer() -- see commit
f56fe3e91787 ("drm/i915: Fix a VMA UAF for multi-gt platform").

However, that fix occurred insufficient -- the issue was still reported by
CI.  That wakeref was released on exit from i915_gem_do_execbuffer(), then
potentially before completion of the request and deactivation of its
associated VMAs.  Moreover, CI reports indicated that single-GT platforms
also suffered sporadically from the same race.

Since that issue was fixed by another commit f3c71b2ded5c ("drm/i915/vma:
Fix UAF on destroy against retire race"), the changes introduced by that
insufficient fix were dropped as no longer useful.  However, that series
resulted in another VMA UAF scenario now being triggered in CI.

<4> [260.290809] ------------[ cut here ]------------
<4> [260.290988] list_del corruption. prev->next should be ffff888118c5d990, but was ffff888118c5a510. (prev=ffff888118c5a510)
<4> [260.291004] WARNING: CPU: 2 PID: 1143 at lib/list_debug.c:62 __list_del_entry_valid_or_report+0xb7/0xe0
..
<4> [260.291055] CPU: 2 PID: 1143 Comm: kms_plane Not tainted 6.9.0-rc2-CI_DRM_14524-ga25d180c6853+ #1
<4> [260.291058] Hardware name: Intel Corporation Meteor Lake Client Platform/MTL-P LP5x T3 RVP, BIOS MTLPFWI1.R00.3471.D91.2401310918 01/31/2024
<4> [260.291060] RIP: 0010:__list_del_entry_valid_or_report+0xb7/0xe0
...
<4> [260.291087] Call Trace:
<4> [260.291089]  <TASK>
<4> [260.291124]  i915_vma_reopen+0x43/0x80 [i915]
<4> [260.291298]  eb_lookup_vmas+0x9cb/0xcc0 [i915]
<4> [260.291579]  i915_gem_do_execbuffer+0xc9a/0x26d0 [i915]
<4> [260.291883]  i915_gem_execbuffer2_ioctl+0x123/0x2a0 [i915]
...
<4> [260.292301]  </TASK>
...
<4> [260.292506] ---[ end trace 0000000000000000 ]---
<4> [260.292782] general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b6ca3: 0000 [#1] PREEMPT SMP NOPTI
<4> [260.303575] CPU: 2 PID: 1143 Comm: kms_plane Tainted: G        W          6.9.0-rc2-CI_DRM_14524-ga25d180c6853+ #1
<4> [260.313851] Hardware name: Intel Corporation Meteor Lake Client Platform/MTL-P LP5x T3 RVP, BIOS MTLPFWI1.R00.3471.D91.2401310918 01/31/2024
<4> [260.326359] RIP: 0010:eb_validate_vmas+0x114/0xd80 [i915]
...
<4> [260.428756] Call Trace:
<4> [260.431192]  <TASK>
<4> [639.283393]  i915_gem_do_execbuffer+0xd05/0x26d0 [i915]
<4> [639.305245]  i915_gem_execbuffer2_ioctl+0x123/0x2a0 [i915]
...
<4> [639.411134]  </TASK>
...
<4> [639.449979] ---[ end trace 0000000000000000 ]---

We defer actually closing, unbinding and destroying a VMA until next idle
point, or until the object is freed in the meantime.  By postponing the
unbind, we allow for the VMA to be reopened by the client, avoiding the
work required to rebind the VMA.

Starting from commit b0647a5e79b1 ("drm/i915: Avoid live-lock with
i915_vma_parked()"), we assume that as long as a GT is held idle, no VMA
would be reopened while we destroy them.  That assumption is no longer
true in multi-GT configurations, where a VMA we reopen may be handled by a
GT different from the one that we already keep active via its engine while
we set up an execbuf request.

Restoring the extra GT0 PM wakeref removed from i915_gem_do_execbuffer()
processing path seems to fix this issue.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/10608
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Nirmoy Das <nirmoy.das@linux.intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@linux.intel.com>
Fixes: 1f33dc0c1189 ("drm/i915: Remove extra multi-gt pm-references")
Link: https://patchwork.freedesktop.org/patch/msgid/20240506180253.96858-2-janusz.krzysztofik@linux.intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
(cherry picked from commit 749670a58d935303ad1ce529acc73f12de25832e)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 42619fc05de4..090724fa766c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -255,6 +255,7 @@ struct i915_execbuffer {
 	struct intel_context *context; /* logical state for the request */
 	struct i915_gem_context *gem_context; /** caller's context */
 	intel_wakeref_t wakeref;
+	intel_wakeref_t wakeref_gt0;
 
 	/** our requests to build */
 	struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
@@ -2685,6 +2686,7 @@ static int
 eb_select_engine(struct i915_execbuffer *eb)
 {
 	struct intel_context *ce, *child;
+	struct intel_gt *gt;
 	unsigned int idx;
 	int err;
 
@@ -2708,10 +2710,17 @@ eb_select_engine(struct i915_execbuffer *eb)
 		}
 	}
 	eb->num_batches = ce->parallel.number_children + 1;
+	gt = ce->engine->gt;
 
 	for_each_child(ce, child)
 		intel_context_get(child);
 	eb->wakeref = intel_gt_pm_get(ce->engine->gt);
+	/*
+	 * Keep GT0 active on MTL so that i915_vma_parked() doesn't
+	 * free VMAs while execbuf ioctl is validating VMAs.
+	 */
+	if (gt->info.id)
+		eb->wakeref_gt0 = intel_gt_pm_get(to_gt(gt->i915));
 
 	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
 		err = intel_context_alloc_state(ce);
@@ -2750,6 +2759,9 @@ eb_select_engine(struct i915_execbuffer *eb)
 	return err;
 
 err:
+	if (gt->info.id)
+		intel_gt_pm_put(to_gt(gt->i915), eb->wakeref_gt0);
+
 	intel_gt_pm_put(ce->engine->gt, eb->wakeref);
 	for_each_child(ce, child)
 		intel_context_put(child);
@@ -2763,6 +2775,12 @@ eb_put_engine(struct i915_execbuffer *eb)
 	struct intel_context *child;
 
 	i915_vm_put(eb->context->vm);
+	/*
+	 * This works in conjunction with eb_select_engine() to prevent
+	 * i915_vma_parked() from interfering while execbuf validates vmas.
+	 */
+	if (eb->gt->info.id)
+		intel_gt_pm_put(to_gt(eb->gt->i915), eb->wakeref_gt0);
 	intel_gt_pm_put(eb->context->engine->gt, eb->wakeref);
 	for_each_child(eb->context, child)
 		intel_context_put(child);
-- 
cgit v1.2.3


From 70cb9188ffc75e643debf292fcddff36c9dbd4ae Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 23 Apr 2024 18:23:10 +0200
Subject: drm/i915/gt: Disarm breadcrumbs if engines are already idle

The breadcrumbs use a GT wakeref for guarding the interrupt, but are
disarmed during release of the engine wakeref. This leaves a hole where
we may attach a breadcrumb just as the engine is parking (after it has
parked its breadcrumbs), execute the irq worker with some signalers still
attached, but never be woken again.

That issue manifests itself in CI with IGT runner timeouts while tests
are waiting indefinitely for release of all GT wakerefs.

<6> [209.151778] i915: Running live_engine_pm_selftests/live_engine_busy_stats
<7> [209.231628] i915 0000:00:02.0: [drm:intel_power_well_disable [i915]] disabling PW_5
<7> [209.231816] i915 0000:00:02.0: [drm:intel_power_well_disable [i915]] disabling PW_4
<7> [209.231944] i915 0000:00:02.0: [drm:intel_power_well_disable [i915]] disabling PW_3
<7> [209.232056] i915 0000:00:02.0: [drm:intel_power_well_disable [i915]] disabling PW_2
<7> [209.232166] i915 0000:00:02.0: [drm:intel_power_well_disable [i915]] disabling DC_off
<7> [209.232270] i915 0000:00:02.0: [drm:skl_enable_dc6 [i915]] Enabling DC6
<7> [209.232368] i915 0000:00:02.0: [drm:gen9_set_dc_state.part.0 [i915]] Setting DC state from 00 to 02
<4> [299.356116] [IGT] Inactivity timeout exceeded. Killing the current test with SIGQUIT.
...
<6> [299.356526] sysrq: Show State
...
<6> [299.373964] task:i915_selftest   state:D stack:11784 pid:5578  tgid:5578  ppid:873    flags:0x00004002
<6> [299.373967] Call Trace:
<6> [299.373968]  <TASK>
<6> [299.373970]  __schedule+0x3bb/0xda0
<6> [299.373974]  schedule+0x41/0x110
<6> [299.373976]  intel_wakeref_wait_for_idle+0x82/0x100 [i915]
<6> [299.374083]  ? __pfx_var_wake_function+0x10/0x10
<6> [299.374087]  live_engine_busy_stats+0x9b/0x500 [i915]
<6> [299.374173]  __i915_subtests+0xbe/0x240 [i915]
<6> [299.374277]  ? __pfx___intel_gt_live_setup+0x10/0x10 [i915]
<6> [299.374369]  ? __pfx___intel_gt_live_teardown+0x10/0x10 [i915]
<6> [299.374456]  intel_engine_live_selftests+0x1c/0x30 [i915]
<6> [299.374547]  __run_selftests+0xbb/0x190 [i915]
<6> [299.374635]  i915_live_selftests+0x4b/0x90 [i915]
<6> [299.374717]  i915_pci_probe+0x10d/0x210 [i915]

At the end of the interrupt worker, if there are no more engines awake,
disarm the breadcrumb and go to sleep.

Fixes: 9d5612ca165a ("drm/i915/gt: Defer enabling the breadcrumb interrupt to after submission")
Closes: https://gitlab.freedesktop.org/drm/intel/issues/10026
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Andrzej Hajda <andrzej.hajda@intel.com>
Cc: <stable@vger.kernel.org> # v5.12+
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Acked-by: Nirmoy Das <nirmoy.das@intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240423165505.465734-2-janusz.krzysztofik@linux.intel.com
(cherry picked from commit fbad43eccae5cb14594195c20113369aabaa22b5)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index d650beb8ed22..20b9b04ec1e0 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -263,8 +263,13 @@ static void signal_irq_work(struct irq_work *work)
 		i915_request_put(rq);
 	}
 
+	/* Lazy irq enabling after HW submission */
 	if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers))
 		intel_breadcrumbs_arm_irq(b);
+
+	/* And confirm that we still want irqs enabled before we yield */
+	if (READ_ONCE(b->irq_armed) && !atomic_read(&b->active))
+		intel_breadcrumbs_disarm_irq(b);
 }
 
 struct intel_breadcrumbs *
@@ -315,13 +320,7 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
 		return;
 
 	/* Kick the work once more to drain the signalers, and disarm the irq */
-	irq_work_sync(&b->irq_work);
-	while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
-		local_irq_disable();
-		signal_irq_work(&b->irq_work);
-		local_irq_enable();
-		cond_resched();
-	}
+	irq_work_queue(&b->irq_work);
 }
 
 void intel_breadcrumbs_free(struct kref *kref)
@@ -404,7 +403,7 @@ static void insert_breadcrumb(struct i915_request *rq)
 	 * the request as it may have completed and raised the interrupt as
 	 * we were attaching it into the lists.
 	 */
-	if (!b->irq_armed || __i915_request_is_complete(rq))
+	if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq))
 		irq_work_queue(&b->irq_work);
 }
 
-- 
cgit v1.2.3


From d4f36db62396b73bed383c0b6e48d36278cafa78 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 30 Apr 2024 09:48:09 -0700
Subject: drm/i915/guc: avoid FIELD_PREP warning

With gcc-7 and earlier, there are lots of warnings like

In file included from <command-line>:0:0:
In function '__guc_context_policy_add_priority.isra.66',
    inlined from '__guc_context_set_prio.isra.67' at drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3292:3,
    inlined from 'guc_context_set_prio' at drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:3320:2:
include/linux/compiler_types.h:399:38: error: call to '__compiletime_assert_631' declared with attribute error: FIELD_PREP: mask is not constant
  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
                                      ^
...
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c:2422:3: note: in expansion of macro 'FIELD_PREP'
   FIELD_PREP(GUC_KLV_0_KEY, GUC_CONTEXT_POLICIES_KLV_ID_##id) | \
   ^~~~~~~~~~

Make sure that GUC_KLV_0_KEY is an unsigned value to avoid the warning.

Fixes: 77b6f79df66e ("drm/i915/guc: Update to GuC version 69.0.3")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Signed-off-by: Julia Filipchuk <julia.filipchuk@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240430164809.482131-1-julia.filipchuk@intel.com
(cherry picked from commit 364e039827ef628c650c21c1afe1c54d9c3296d9)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
index bebf28e3c479..525587cfe1af 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
@@ -29,9 +29,9 @@
  */
 
 #define GUC_KLV_LEN_MIN				1u
-#define GUC_KLV_0_KEY				(0xffff << 16)
-#define GUC_KLV_0_LEN				(0xffff << 0)
-#define GUC_KLV_n_VALUE				(0xffffffff << 0)
+#define GUC_KLV_0_KEY				(0xffffu << 16)
+#define GUC_KLV_0_LEN				(0xffffu << 0)
+#define GUC_KLV_n_VALUE				(0xffffffffu << 0)
 
 /**
  * DOC: GuC Self Config KLVs
-- 
cgit v1.2.3


From 659a3062c705753a9ec6fd28a4c67ee4254f9584 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@intel.com>
Date: Thu, 16 May 2024 17:14:03 +0200
Subject: drm/i915/selftests: Set always_coherent to false when reading from
 CPU

Commit 8d4ba9fc1c6c ("drm/i915/selftests: Pick correct caching mode.")
was not complete  as for non LLC  sharing platforms cpu read can happen
from LLC which probably doesn't have the latest changes made by GPU.

Cc: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Cc: Jonathan Cavitt <jonathan.cavitt@intel.com>
Fixes: 8d4ba9fc1c6c ("drm/i915/selftests: Pick correct caching mode.")
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240516151403.2875-1-nirmoy.das@intel.com
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
(cherry picked from commit 007ed70831426d4cc108d879d688de6b8e3e6d45)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index 65a931ea80e9..3527b8f446fe 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -196,7 +196,7 @@ static int verify_access(struct drm_i915_private *i915,
 	if (err)
 		goto out_file;
 
-	mode = intel_gt_coherent_map_type(to_gt(i915), native_obj, true);
+	mode = intel_gt_coherent_map_type(to_gt(i915), native_obj, false);
 	vaddr = i915_gem_object_pin_map_unlocked(native_obj, mode);
 	if (IS_ERR(vaddr)) {
 		err = PTR_ERR(vaddr);
-- 
cgit v1.2.3


From ee01b6a386eaf9984b58a2476e8f531149679da9 Mon Sep 17 00:00:00 2001
From: Andi Shyti <andi.shyti@linux.intel.com>
Date: Fri, 17 May 2024 11:06:16 +0200
Subject: drm/i915/gt: Fix CCS id's calculation for CCS mode setting

The whole point of the previous fixes has been to change the CCS
hardware configuration to generate only one stream available to
the compute users. We did this by changing the info.engine_mask
that is set during device probe, reset during the detection of
the fused engines, and finally reset again when choosing the CCS
mode.

We can't use the engine_mask variable anymore, as with the
current configuration, it imposes only one CCS no matter what the
hardware configuration is.

Before changing the engine_mask for the third time, save it and
use it for calculating the CCS mode.

After the previous changes, the user reported a performance drop
to around 1/4. We have tested that the compute operations, with
the current patch, have improved by the same factor.

Fixes: 6db31251bb26 ("drm/i915/gt: Enable only one CCS for compute workload")
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
Cc: Gnattu OC <gnattuoc@me.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Tested-by: Jian Ye <jian.ye@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Tested-by: Gnattu OC <gnattuoc@me.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240517090616.242529-1-andi.shyti@linux.intel.com
(cherry picked from commit a09d2327a9ba8e3f5be238bc1b7ca2809255b464)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c   | 6 ++++++
 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 2 +-
 drivers/gpu/drm/i915/gt/intel_gt_types.h    | 8 ++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 5c8e9ee3b008..3b740ca25000 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -885,6 +885,12 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
 	if (IS_DG2(gt->i915)) {
 		u8 first_ccs = __ffs(CCS_MASK(gt));
 
+		/*
+		 * Store the number of active cslices before
+		 * changing the CCS engine configuration
+		 */
+		gt->ccs.cslices = CCS_MASK(gt);
+
 		/* Mask off all the CCS engine */
 		info->engine_mask &= ~GENMASK(CCS3, CCS0);
 		/* Put back in the first CCS engine */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
index 99b71bb7da0a..3c62a44e9106 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -19,7 +19,7 @@ unsigned int intel_gt_apply_ccs_mode(struct intel_gt *gt)
 
 	/* Build the value for the fixed CCS load balancing */
 	for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
-		if (CCS_MASK(gt) & BIT(cslice))
+		if (gt->ccs.cslices & BIT(cslice))
 			/*
 			 * If available, assign the cslice
 			 * to the first available engine...
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index def7dd0eb6f1..cfdd2ad5e954 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -207,6 +207,14 @@ struct intel_gt {
 					    [MAX_ENGINE_INSTANCE + 1];
 	enum intel_submission_method submission_method;
 
+	struct {
+		/*
+		 * Mask of the non fused CCS slices
+		 * to be used for the load balancing
+		 */
+		intel_engine_mask_t cslices;
+	} ccs;
+
 	/*
 	 * Default address space (either GGTT or ppGTT depending on arch).
 	 *
-- 
cgit v1.2.3


From 43e2b37e2ab660c3565d4cff27922bc70e79c3f1 Mon Sep 17 00:00:00 2001
From: Vidya Srinivas <vidya.srinivas@intel.com>
Date: Mon, 20 May 2024 22:26:34 +0530
Subject: drm/i915/dpt: Make DPT object unshrinkable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In some scenarios, the DPT object gets shrunk but
the actual framebuffer did not and thus its still
there on the DPT's vm->bound_list. Then it tries to
rewrite the PTEs via a stale CPU mapping. This causes panic.

Cc: stable@vger.kernel.org
Reported-by: Shawn Lee <shawn.c.lee@intel.com>
Fixes: 0dc987b699ce ("drm/i915/display: Add smem fallback allocation for dpt")
Signed-off-by: Vidya Srinivas <vidya.srinivas@intel.com>
[vsyrjala: Add TODO comment]
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240520165634.1162470-1-vidya.srinivas@intel.com
(cherry picked from commit 51064d471c53dcc8eddd2333c3f1c1d9131ba36c)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 3560a062d287..5d7446a48ae7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -284,7 +284,9 @@ bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj);
 static inline bool
 i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
 {
-	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE);
+	/* TODO: make DPT shrinkable when it has no bound vmas */
+	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE) &&
+		!obj->is_dpt;
 }
 
 static inline bool
-- 
cgit v1.2.3


From 75800e2e4203ea83bbc9d4f63ad97ea582244a08 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 21 May 2024 17:30:22 +0300
Subject: drm/i915: Fix audio component initialization

After registering the audio component in i915_audio_component_init()
the audio driver may call i915_audio_component_get_power() via the
component ops. This could program AUD_FREQ_CNTRL with an uninitialized
value if the latter function is called before display.audio.freq_cntrl
gets initialized. The get_power() function also does a modeset which in
the above case happens too early before the initialization step and
triggers the

"Reject display access from task"

error message added by the Fixes: commit below.

Fix the above issue by registering the audio component only after the
initialization step.

Fixes: 87c1694533c9 ("drm/i915: save AUD_FREQ_CNTRL state at audio domain suspend")
Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/10291
Cc: stable@vger.kernel.org # v5.5+
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240521143022.3784539-1-imre.deak@intel.com
(cherry picked from commit fdd0b80172758ce284f19fa8a26d90c61e4371d2)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_audio.c         | 32 ++++++++++++++--------
 drivers/gpu/drm/i915/display/intel_audio.h         |  1 +
 .../gpu/drm/i915/display/intel_display_driver.c    |  2 ++
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
index ed81e1466c4b..40e7d862675e 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -1252,17 +1252,6 @@ static const struct component_ops i915_audio_component_bind_ops = {
 static void i915_audio_component_init(struct drm_i915_private *i915)
 {
 	u32 aud_freq, aud_freq_init;
-	int ret;
-
-	ret = component_add_typed(i915->drm.dev,
-				  &i915_audio_component_bind_ops,
-				  I915_COMPONENT_AUDIO);
-	if (ret < 0) {
-		drm_err(&i915->drm,
-			"failed to add audio component (%d)\n", ret);
-		/* continue with reduced functionality */
-		return;
-	}
 
 	if (DISPLAY_VER(i915) >= 9) {
 		aud_freq_init = intel_de_read(i915, AUD_FREQ_CNTRL);
@@ -1285,6 +1274,21 @@ static void i915_audio_component_init(struct drm_i915_private *i915)
 
 	/* init with current cdclk */
 	intel_audio_cdclk_change_post(i915);
+}
+
+static void i915_audio_component_register(struct drm_i915_private *i915)
+{
+	int ret;
+
+	ret = component_add_typed(i915->drm.dev,
+				  &i915_audio_component_bind_ops,
+				  I915_COMPONENT_AUDIO);
+	if (ret < 0) {
+		drm_err(&i915->drm,
+			"failed to add audio component (%d)\n", ret);
+		/* continue with reduced functionality */
+		return;
+	}
 
 	i915->display.audio.component_registered = true;
 }
@@ -1317,6 +1321,12 @@ void intel_audio_init(struct drm_i915_private *i915)
 		i915_audio_component_init(i915);
 }
 
+void intel_audio_register(struct drm_i915_private *i915)
+{
+	if (!i915->display.audio.lpe.platdev)
+		i915_audio_component_register(i915);
+}
+
 /**
  * intel_audio_deinit() - deinitialize the audio driver
  * @i915: the i915 drm device private data
diff --git a/drivers/gpu/drm/i915/display/intel_audio.h b/drivers/gpu/drm/i915/display/intel_audio.h
index 9327954b801e..576c061d72a4 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.h
+++ b/drivers/gpu/drm/i915/display/intel_audio.h
@@ -28,6 +28,7 @@ void intel_audio_codec_get_config(struct intel_encoder *encoder,
 void intel_audio_cdclk_change_pre(struct drm_i915_private *dev_priv);
 void intel_audio_cdclk_change_post(struct drm_i915_private *dev_priv);
 void intel_audio_init(struct drm_i915_private *dev_priv);
+void intel_audio_register(struct drm_i915_private *i915);
 void intel_audio_deinit(struct drm_i915_private *dev_priv);
 void intel_audio_sdp_split_update(const struct intel_crtc_state *crtc_state);
 
diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c
index 89bd032ed995..794b4af38055 100644
--- a/drivers/gpu/drm/i915/display/intel_display_driver.c
+++ b/drivers/gpu/drm/i915/display/intel_display_driver.c
@@ -540,6 +540,8 @@ void intel_display_driver_register(struct drm_i915_private *i915)
 
 	intel_display_driver_enable_user_access(i915);
 
+	intel_audio_register(i915);
+
 	intel_display_debugfs_register(i915);
 
 	/*
-- 
cgit v1.2.3


From edb32776196afa393c074d6a2733e3a69e66b299 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 29 May 2024 10:37:59 +0200
Subject: ALSA: seq: Fix incorrect UMP type for system messages

When converting a legacy system message to a UMP packet, it forgot to
modify the UMP type field but keeping the default type (either type 2
or 4).  Correct to the right type for system messages.

Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events")
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20240529083800.5742-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_ump_convert.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c
index f5d22dd00842..add70b4c2885 100644
--- a/sound/core/seq/seq_ump_convert.c
+++ b/sound/core/seq/seq_ump_convert.c
@@ -739,6 +739,7 @@ static int system_1p_ev_to_ump_midi1(const struct snd_seq_event *event,
 				     union snd_ump_midi1_msg *data,
 				     unsigned char status)
 {
+	data->system.type = UMP_MSG_TYPE_SYSTEM; // override
 	data->system.status = status;
 	data->system.parm1 = event->data.control.value & 0x7f;
 	return 1;
@@ -750,6 +751,7 @@ static int system_2p_ev_to_ump_midi1(const struct snd_seq_event *event,
 				     union snd_ump_midi1_msg *data,
 				     unsigned char status)
 {
+	data->system.type = UMP_MSG_TYPE_SYSTEM; // override
 	data->system.status = status;
 	data->system.parm1 = (event->data.control.value >> 7) & 0x7f;
 	data->system.parm2 = event->data.control.value & 0x7f;
-- 
cgit v1.2.3


From fe85f6e607d75b856e7229924c71f55e005f8284 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 29 May 2024 10:38:21 +0200
Subject: ALSA: ump: Don't clear bank selection after sending a program change

The current code clears the bank selection MSB/LSB after sending a
program change, but this can be wrong, as many apps may not send the
full bank selection with both MSB and LSB but sending only one.
Better to keep the previous bank set.

Fixes: 0b5288f5fe63 ("ALSA: ump: Add legacy raw MIDI support")
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20240529083823.5778-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/ump_convert.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sound/core/ump_convert.c b/sound/core/ump_convert.c
index de04799fdb69..f67c44c83fde 100644
--- a/sound/core/ump_convert.c
+++ b/sound/core/ump_convert.c
@@ -404,7 +404,6 @@ static int cvt_legacy_cmd_to_ump(struct ump_cvt_to_ump *cvt,
 			midi2->pg.bank_msb = cc->cc_bank_msb;
 			midi2->pg.bank_lsb = cc->cc_bank_lsb;
 			cc->bank_set = 0;
-			cc->cc_bank_msb = cc->cc_bank_lsb = 0;
 		}
 		break;
 	case UMP_MSG_STATUS_CHANNEL_PRESSURE:
-- 
cgit v1.2.3


From 6d40dbc75877110c5e0d661dd77f6cfce916765e Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Tue, 28 May 2024 21:18:50 +0200
Subject: ALSA: pcm: fix typo in comment

Fix the typo in the comment for SNDRV_PCM_RATE_KNOT

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20240528191850.63314-1-alexandre.belloni@bootlin.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 61c6054618c8..3edd7a7346da 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -124,7 +124,7 @@ struct snd_pcm_ops {
 #define SNDRV_PCM_RATE_768000		(1U<<16)	/* 768000Hz */
 
 #define SNDRV_PCM_RATE_CONTINUOUS	(1U<<30)	/* continuous range */
-#define SNDRV_PCM_RATE_KNOT		(1U<<31)	/* supports more non-continuos rates */
+#define SNDRV_PCM_RATE_KNOT		(1U<<31)	/* supports more non-continuous rates */
 
 #define SNDRV_PCM_RATE_8000_44100	(SNDRV_PCM_RATE_8000|SNDRV_PCM_RATE_11025|\
 					 SNDRV_PCM_RATE_16000|SNDRV_PCM_RATE_22050|\
-- 
cgit v1.2.3


From b062938fd9afec844c50571fddd8d81623a60ee1 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 27 May 2024 14:19:40 -0500
Subject: ASoC: Intel: sof-sdw: fix missing SPI_MASTER dependency

The addition of the Cirrus Logic 'sidecar' amps adds a dependency on SPI_MASTER.

Kconfig warnings: (for reference only)
   WARNING: unmet direct dependencies detected for SND_SOC_CS35L56_SPI
   Depends on [n]:
     SOUND [=y] && SND [=y] && SND_SOC [=y] && SPI_MASTER [=n] &&
     (SOUNDWIRE [=y] || !SOUNDWIRE [=y])
   Selected by [y]:
    - SND_SOC_INTEL_SOUNDWIRE_SOF_MACH [=y] && SOUND [=y] && SND [=y]
      && SND_SOC [=y] && SND_SOC_INTEL_MACH [=y] &&
      SND_SOC_SOF_INTEL_SOUNDWIRE [=y] && I2C [=y] && ACPI [=y] &&
      (MFD_INTEL_LPSS [=y] || COMPILE_TEST [=n]) &&
      (SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES [=y] || COMPILE_TEST
      [=n]) && SOUNDWIRE [=y]

Fixes: b831b4dca48d ("ASoC: intel: sof_sdw: Add support for cs42l43-cs35l56 sidecar amps")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202405140758.o2HY4nYD-lkp@intel.com/
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://msgid.link/r/20240527191940.30107-1-pierre-louis.bossart@linux.intel.com
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/boards/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig
index 3ed81ab649c5..4e0586034de4 100644
--- a/sound/soc/intel/boards/Kconfig
+++ b/sound/soc/intel/boards/Kconfig
@@ -652,7 +652,7 @@ if SND_SOC_SOF_INTEL_SOUNDWIRE
 
 config SND_SOC_INTEL_SOUNDWIRE_SOF_MACH
 	tristate "SoundWire generic machine driver"
-	depends on I2C && ACPI
+	depends on I2C && SPI_MASTER && ACPI
 	depends on MFD_INTEL_LPSS || COMPILE_TEST
 	depends on SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES || COMPILE_TEST
 	depends on SOUNDWIRE
-- 
cgit v1.2.3


From e9022b31db80019025967b03df1d059433e9f26d Mon Sep 17 00:00:00 2001
From: Minda Chen <minda.chen@starfivetech.com>
Date: Tue, 28 May 2024 09:51:20 +0800
Subject: MAINTAINERS: dwmac: starfive: update Maintainer

Update the maintainer of starfive dwmac driver.

Signed-off-by: Minda Chen <minda.chen@starfivetech.com>
Acked-by: Emil Renner Berthing <emil.renner.berthing@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e80db76fe393..90930a03e7a5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21318,7 +21318,7 @@ F:	arch/riscv/boot/dts/starfive/
 
 STARFIVE DWMAC GLUE LAYER
 M:	Emil Renner Berthing <kernel@esmil.dk>
-M:	Samin Guo <samin.guo@starfivetech.com>
+M:	Minda Chen <minda.chen@starfivetech.com>
 S:	Maintained
 F:	Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
 F:	drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c
-- 
cgit v1.2.3


From 068648aab72c9ba7b0597354ef4d81ffaac7b979 Mon Sep 17 00:00:00 2001
From: Edward Adam Davis <eadavis@qq.com>
Date: Tue, 28 May 2024 11:12:31 +0800
Subject: nfc/nci: Add the inconsistency check between the input data length
 and count

write$nci(r0, &(0x7f0000000740)=ANY=[@ANYBLOB="610501"], 0xf)

Syzbot constructed a write() call with a data length of 3 bytes but a count value
of 15, which passed too little data to meet the basic requirements of the function
nci_rf_intf_activated_ntf_packet().

Therefore, increasing the comparison between data length and count value to avoid
problems caused by inconsistent data length and count.

Reported-and-tested-by: syzbot+71bfed2b2bcea46c98f2@syzkaller.appspotmail.com
Signed-off-by: Edward Adam Davis <eadavis@qq.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/virtual_ncidev.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/nfc/virtual_ncidev.c b/drivers/nfc/virtual_ncidev.c
index 590b038e449e..6b89d596ba9a 100644
--- a/drivers/nfc/virtual_ncidev.c
+++ b/drivers/nfc/virtual_ncidev.c
@@ -125,6 +125,10 @@ static ssize_t virtual_ncidev_write(struct file *file,
 		kfree_skb(skb);
 		return -EFAULT;
 	}
+	if (strnlen(skb->data, count) != count) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
 
 	nci_recv_frame(vdev->ndev, skb);
 	return count;
-- 
cgit v1.2.3


From b1e7cee96127468c2483cf10c2899c9b5cf79bf8 Mon Sep 17 00:00:00 2001
From: Puranjay Mohan <puranjay@kernel.org>
Date: Mon, 13 May 2024 10:02:48 +0000
Subject: powerpc/bpf: enforce full ordering for ATOMIC operations with
 BPF_FETCH

The Linux Kernel Memory Model [1][2] requires RMW operations that have a
return value to be fully ordered.

BPF atomic operations with BPF_FETCH (including BPF_XCHG and
BPF_CMPXCHG) return a value back so they need to be JITed to fully
ordered operations. POWERPC currently emits relaxed operations for
these.

We can show this by running the following litmus-test:

  PPC SB+atomic_add+fetch

  {
      0:r0=x;  (* dst reg assuming offset is 0 *)
      0:r1=2;  (* src reg *)
      0:r2=1;
      0:r4=y;  (* P0 writes to this, P1 reads this *)
      0:r5=z;  (* P1 writes to this, P0 reads this *)
      0:r6=0;

      1:r2=1;
      1:r4=y;
      1:r5=z;
  }

  P0                      | P1            ;
  stw         r2, 0(r4)   | stw  r2,0(r5) ;
                          |               ;
  loop:lwarx  r3, r6, r0  |               ;
  mr          r8, r3      |               ;
  add         r3, r3, r1  | sync          ;
  stwcx.      r3, r6, r0  |               ;
  bne         loop        |               ;
  mr          r1, r8      |               ;
                          |               ;
  lwa         r7, 0(r5)   | lwa  r7,0(r4) ;

  ~exists(0:r7=0 /\ 1:r7=0)

  Witnesses
  Positive: 9 Negative: 3
  Condition ~exists (0:r7=0 /\ 1:r7=0)
  Observation SB+atomic_add+fetch Sometimes 3 9

This test shows that the older store in P0 is reordered with a newer
load to a different address. Although there is a RMW operation with
fetch between them. Adding a sync before and after RMW fixes the issue:

  Witnesses
  Positive: 9 Negative: 0
  Condition ~exists (0:r7=0 /\ 1:r7=0)
  Observation SB+atomic_add+fetch Never 0 9

[1] https://www.kernel.org/doc/Documentation/memory-barriers.txt
[2] https://www.kernel.org/doc/Documentation/atomic_t.txt

Fixes: aea7ef8a82c0 ("powerpc/bpf/32: add support for BPF_ATOMIC bitwise operations")
Fixes: 2d9206b22743 ("powerpc/bpf/32: Add instructions for atomic_[cmp]xchg")
Fixes: dbe6e2456fb0 ("powerpc/bpf/64: add support for atomic fetch operations")
Fixes: 1e82dfaa7819 ("powerpc/bpf/64: Add instructions for atomic_[cmp]xchg")
Cc: stable@vger.kernel.org # v6.0+
Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Naveen N Rao <naveen@kernel.org>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20240513100248.110535-1-puranjay@kernel.org
---
 arch/powerpc/net/bpf_jit_comp32.c | 12 ++++++++++++
 arch/powerpc/net/bpf_jit_comp64.c | 12 ++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 43b97032a91c..a0c4f1bde83e 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -900,6 +900,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
 
 			/* Get offset into TMP_REG */
 			EMIT(PPC_RAW_LI(tmp_reg, off));
+			/*
+			 * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
+			 * before and after the operation.
+			 *
+			 * This is a requirement in the Linux Kernel Memory Model.
+			 * See __cmpxchg_u32() in asm/cmpxchg.h as an example.
+			 */
+			if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
+				EMIT(PPC_RAW_SYNC());
 			tmp_idx = ctx->idx * 4;
 			/* load value from memory into r0 */
 			EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
@@ -953,6 +962,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code
 
 			/* For the BPF_FETCH variant, get old data into src_reg */
 			if (imm & BPF_FETCH) {
+				/* Emit 'sync' to enforce full ordering */
+				if (IS_ENABLED(CONFIG_SMP))
+					EMIT(PPC_RAW_SYNC());
 				EMIT(PPC_RAW_MR(ret_reg, ax_reg));
 				if (!fp->aux->verifier_zext)
 					EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 8afc14a4a125..7703dcf48be8 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -846,6 +846,15 @@ emit_clear:
 
 			/* Get offset into TMP_REG_1 */
 			EMIT(PPC_RAW_LI(tmp1_reg, off));
+			/*
+			 * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
+			 * before and after the operation.
+			 *
+			 * This is a requirement in the Linux Kernel Memory Model.
+			 * See __cmpxchg_u64() in asm/cmpxchg.h as an example.
+			 */
+			if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
+				EMIT(PPC_RAW_SYNC());
 			tmp_idx = ctx->idx * 4;
 			/* load value from memory into TMP_REG_2 */
 			if (size == BPF_DW)
@@ -908,6 +917,9 @@ emit_clear:
 			PPC_BCC_SHORT(COND_NE, tmp_idx);
 
 			if (imm & BPF_FETCH) {
+				/* Emit 'sync' to enforce full ordering */
+				if (IS_ENABLED(CONFIG_SMP))
+					EMIT(PPC_RAW_SYNC());
 				EMIT(PPC_RAW_MR(ret_reg, _R0));
 				/*
 				 * Skip unnecessary zero-extension for 32-bit cmpxchg.
-- 
cgit v1.2.3


From ffa077b2f6ad124ec3d23fbddc5e4b0ff2647af8 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Wed, 29 May 2024 15:12:01 +0300
Subject: ASoC: SOF: ipc4-topology: Fix input format query of process modules
 without base extension

If a process module does not have base config extension then the same
format applies to all of it's inputs and the process->base_config_ext is
NULL, causing NULL dereference when specifically crafted topology and
sequences used.

Fixes: 648fea128476 ("ASoC: SOF: ipc4-topology: set copier output format for process module")
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Cc: stable@vger.kernel.org
Link: https://msgid.link/r/20240529121201.14687-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc4-topology.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index beff10989324..33e8c5f7d9ca 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -217,6 +217,14 @@ sof_ipc4_get_input_pin_audio_fmt(struct snd_sof_widget *swidget, int pin_index)
 	}
 
 	process = swidget->private;
+
+	/*
+	 * For process modules without base config extension, base module config
+	 * format is used for all input pins
+	 */
+	if (process->init_config != SOF_IPC4_MODULE_INIT_CONFIG_TYPE_BASE_CFG_WITH_EXT)
+		return &process->base_config.audio_fmt;
+
 	base_cfg_ext = process->base_config_ext;
 
 	/*
-- 
cgit v1.2.3


From 4a69c1264ff41bc5bf7c03101ada0454fbf08868 Mon Sep 17 00:00:00 2001
From: Witold Sadowski <wsadowski@marvell.com>
Date: Wed, 29 May 2024 00:40:32 -0700
Subject: spi: cadence: Ensure data lines set to low during dummy-cycle period

During dummy-cycles xSPI will switch GPIO into Hi-Z mode. In that dummy
period voltage on data lines will slowly drop, what can cause
unintentional modebyte transmission. Value send to SPI memory chip will
depend on last address, and clock frequency.
To prevent unforeseen consequences of that behaviour, force send
single modebyte(0x00).
Modebyte will be send only if number of dummy-cycles is not equal
to 0. Code must also reduce dummycycle byte count by one - as one byte
is send as modebyte.

Signed-off-by: Witold Sadowski <wsadowski@marvell.com>
Link: https://msgid.link/r/20240529074037.1345882-2-wsadowski@marvell.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-cadence-xspi.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/spi/spi-cadence-xspi.c b/drivers/spi/spi-cadence-xspi.c
index 2209e9fc378f..2e3eacd46b72 100644
--- a/drivers/spi/spi-cadence-xspi.c
+++ b/drivers/spi/spi-cadence-xspi.c
@@ -145,6 +145,9 @@
 #define CDNS_XSPI_STIG_DONE_FLAG		BIT(0)
 #define CDNS_XSPI_TRD_STATUS			0x0104
 
+#define MODE_NO_OF_BYTES			GENMASK(25, 24)
+#define MODEBYTES_COUNT			1
+
 /* Helper macros for filling command registers */
 #define CDNS_XSPI_CMD_FLD_P1_INSTR_CMD_1(op, data_phase) ( \
 	FIELD_PREP(CDNS_XSPI_CMD_INSTR_TYPE, (data_phase) ? \
@@ -157,9 +160,10 @@
 	FIELD_PREP(CDNS_XSPI_CMD_P1_R2_ADDR3, ((op)->addr.val >> 24) & 0xFF) | \
 	FIELD_PREP(CDNS_XSPI_CMD_P1_R2_ADDR4, ((op)->addr.val >> 32) & 0xFF))
 
-#define CDNS_XSPI_CMD_FLD_P1_INSTR_CMD_3(op) ( \
+#define CDNS_XSPI_CMD_FLD_P1_INSTR_CMD_3(op, modebytes) ( \
 	FIELD_PREP(CDNS_XSPI_CMD_P1_R3_ADDR5, ((op)->addr.val >> 40) & 0xFF) | \
 	FIELD_PREP(CDNS_XSPI_CMD_P1_R3_CMD, (op)->cmd.opcode) | \
+	FIELD_PREP(MODE_NO_OF_BYTES, modebytes) | \
 	FIELD_PREP(CDNS_XSPI_CMD_P1_R3_NUM_ADDR_BYTES, (op)->addr.nbytes))
 
 #define CDNS_XSPI_CMD_FLD_P1_INSTR_CMD_4(op, chipsel) ( \
@@ -173,12 +177,12 @@
 #define CDNS_XSPI_CMD_FLD_DSEQ_CMD_2(op) \
 	FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R2_DCNT_L, (op)->data.nbytes & 0xFFFF)
 
-#define CDNS_XSPI_CMD_FLD_DSEQ_CMD_3(op) ( \
+#define CDNS_XSPI_CMD_FLD_DSEQ_CMD_3(op, dummybytes) ( \
 	FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_DCNT_H, \
 		((op)->data.nbytes >> 16) & 0xffff) | \
 	FIELD_PREP(CDNS_XSPI_CMD_DSEQ_R3_NUM_OF_DUMMY, \
 		  (op)->dummy.buswidth != 0 ? \
-		  (((op)->dummy.nbytes * 8) / (op)->dummy.buswidth) : \
+		  (((dummybytes) * 8) / (op)->dummy.buswidth) : \
 		  0))
 
 #define CDNS_XSPI_CMD_FLD_DSEQ_CMD_4(op, chipsel) ( \
@@ -351,6 +355,7 @@ static int cdns_xspi_send_stig_command(struct cdns_xspi_dev *cdns_xspi,
 	u32 cmd_regs[6];
 	u32 cmd_status;
 	int ret;
+	int dummybytes = op->dummy.nbytes;
 
 	ret = cdns_xspi_wait_for_controller_idle(cdns_xspi);
 	if (ret < 0)
@@ -365,7 +370,12 @@ static int cdns_xspi_send_stig_command(struct cdns_xspi_dev *cdns_xspi,
 	memset(cmd_regs, 0, sizeof(cmd_regs));
 	cmd_regs[1] = CDNS_XSPI_CMD_FLD_P1_INSTR_CMD_1(op, data_phase);
 	cmd_regs[2] = CDNS_XSPI_CMD_FLD_P1_INSTR_CMD_2(op);
-	cmd_regs[3] = CDNS_XSPI_CMD_FLD_P1_INSTR_CMD_3(op);
+	if (dummybytes != 0) {
+		cmd_regs[3] = CDNS_XSPI_CMD_FLD_P1_INSTR_CMD_3(op, 1);
+		dummybytes--;
+	} else {
+		cmd_regs[3] = CDNS_XSPI_CMD_FLD_P1_INSTR_CMD_3(op, 0);
+	}
 	cmd_regs[4] = CDNS_XSPI_CMD_FLD_P1_INSTR_CMD_4(op,
 						       cdns_xspi->cur_cs);
 
@@ -375,7 +385,7 @@ static int cdns_xspi_send_stig_command(struct cdns_xspi_dev *cdns_xspi,
 		cmd_regs[0] = CDNS_XSPI_STIG_DONE_FLAG;
 		cmd_regs[1] = CDNS_XSPI_CMD_FLD_DSEQ_CMD_1(op);
 		cmd_regs[2] = CDNS_XSPI_CMD_FLD_DSEQ_CMD_2(op);
-		cmd_regs[3] = CDNS_XSPI_CMD_FLD_DSEQ_CMD_3(op);
+		cmd_regs[3] = CDNS_XSPI_CMD_FLD_DSEQ_CMD_3(op, dummybytes);
 		cmd_regs[4] = CDNS_XSPI_CMD_FLD_DSEQ_CMD_4(op,
 							   cdns_xspi->cur_cs);
 
-- 
cgit v1.2.3


From b7d7f11a291830fdf69d3301075dd0fb347ced84 Mon Sep 17 00:00:00 2001
From: Nicolas Escande <nico.escande@gmail.com>
Date: Tue, 28 May 2024 16:26:05 +0200
Subject: wifi: mac80211: mesh: Fix leak of mesh_preq_queue objects

The hwmp code use objects of type mesh_preq_queue, added to a list in
ieee80211_if_mesh, to keep track of mpath we need to resolve. If the mpath
gets deleted, ex mesh interface is removed, the entries in that list will
never get cleaned. Fix this by flushing all corresponding items of the
preq_queue in mesh_path_flush_pending().

This should take care of KASAN reports like this:

unreferenced object 0xffff00000668d800 (size 128):
  comm "kworker/u8:4", pid 67, jiffies 4295419552 (age 1836.444s)
  hex dump (first 32 bytes):
    00 1f 05 09 00 00 ff ff 00 d5 68 06 00 00 ff ff  ..........h.....
    8e 97 ea eb 3e b8 01 00 00 00 00 00 00 00 00 00  ....>...........
  backtrace:
    [<000000007302a0b6>] __kmem_cache_alloc_node+0x1e0/0x35c
    [<00000000049bd418>] kmalloc_trace+0x34/0x80
    [<0000000000d792bb>] mesh_queue_preq+0x44/0x2a8
    [<00000000c99c3696>] mesh_nexthop_resolve+0x198/0x19c
    [<00000000926bf598>] ieee80211_xmit+0x1d0/0x1f4
    [<00000000fc8c2284>] __ieee80211_subif_start_xmit+0x30c/0x764
    [<000000005926ee38>] ieee80211_subif_start_xmit+0x9c/0x7a4
    [<000000004c86e916>] dev_hard_start_xmit+0x174/0x440
    [<0000000023495647>] __dev_queue_xmit+0xe24/0x111c
    [<00000000cfe9ca78>] batadv_send_skb_packet+0x180/0x1e4
    [<000000007bacc5d5>] batadv_v_elp_periodic_work+0x2f4/0x508
    [<00000000adc3cd94>] process_one_work+0x4b8/0xa1c
    [<00000000b36425d1>] worker_thread+0x9c/0x634
    [<0000000005852dd5>] kthread+0x1bc/0x1c4
    [<000000005fccd770>] ret_from_fork+0x10/0x20
unreferenced object 0xffff000009051f00 (size 128):
  comm "kworker/u8:4", pid 67, jiffies 4295419553 (age 1836.440s)
  hex dump (first 32 bytes):
    90 d6 92 0d 00 00 ff ff 00 d8 68 06 00 00 ff ff  ..........h.....
    36 27 92 e4 02 e0 01 00 00 58 79 06 00 00 ff ff  6'.......Xy.....
  backtrace:
    [<000000007302a0b6>] __kmem_cache_alloc_node+0x1e0/0x35c
    [<00000000049bd418>] kmalloc_trace+0x34/0x80
    [<0000000000d792bb>] mesh_queue_preq+0x44/0x2a8
    [<00000000c99c3696>] mesh_nexthop_resolve+0x198/0x19c
    [<00000000926bf598>] ieee80211_xmit+0x1d0/0x1f4
    [<00000000fc8c2284>] __ieee80211_subif_start_xmit+0x30c/0x764
    [<000000005926ee38>] ieee80211_subif_start_xmit+0x9c/0x7a4
    [<000000004c86e916>] dev_hard_start_xmit+0x174/0x440
    [<0000000023495647>] __dev_queue_xmit+0xe24/0x111c
    [<00000000cfe9ca78>] batadv_send_skb_packet+0x180/0x1e4
    [<000000007bacc5d5>] batadv_v_elp_periodic_work+0x2f4/0x508
    [<00000000adc3cd94>] process_one_work+0x4b8/0xa1c
    [<00000000b36425d1>] worker_thread+0x9c/0x634
    [<0000000005852dd5>] kthread+0x1bc/0x1c4
    [<000000005fccd770>] ret_from_fork+0x10/0x20

Fixes: 050ac52cbe1f ("mac80211: code for on-demand Hybrid Wireless Mesh Protocol")
Signed-off-by: Nicolas Escande <nico.escande@gmail.com>
Link: https://msgid.link/20240528142605.1060566-1-nico.escande@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh_pathtbl.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index a6b62169f084..c0a5c75cddcb 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -1017,10 +1017,23 @@ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
  */
 void mesh_path_flush_pending(struct mesh_path *mpath)
 {
+	struct ieee80211_sub_if_data *sdata = mpath->sdata;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+	struct mesh_preq_queue *preq, *tmp;
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL)
 		mesh_path_discard_frame(mpath->sdata, skb);
+
+	spin_lock_bh(&ifmsh->mesh_preq_queue_lock);
+	list_for_each_entry_safe(preq, tmp, &ifmsh->preq_queue.list, list) {
+		if (ether_addr_equal(mpath->dst, preq->dst)) {
+			list_del(&preq->list);
+			kfree(preq);
+			--ifmsh->preq_queue_len;
+		}
+	}
+	spin_unlock_bh(&ifmsh->mesh_preq_queue_lock);
 }
 
 /**
-- 
cgit v1.2.3


From 6f6291f09a322c1c1578badac8072d049363f4e6 Mon Sep 17 00:00:00 2001
From: Nicolas Escande <nico.escande@gmail.com>
Date: Mon, 27 May 2024 16:17:59 +0200
Subject: wifi: mac80211: mesh: init nonpeer_pm to active by default in mesh
 sdata

With a ath9k device I can see that:
	iw phy phy0 interface add mesh0 type mp
	ip link set mesh0 up
	iw dev mesh0 scan

Will start a scan with the Power Management bit set in the Frame Control Field.
This is because we set this bit depending on the nonpeer_pm variable of the mesh
iface sdata and when there are no active links on the interface it remains to
NL80211_MESH_POWER_UNKNOWN.

As soon as links starts to be established, it wil switch to
NL80211_MESH_POWER_ACTIVE as it is the value set by befault on the per sta
nonpeer_pm field.
As we want no power save by default, (as expressed with the per sta ini values),
lets init it to the expected default value of NL80211_MESH_POWER_ACTIVE.

Also please note that we cannot change the default value from userspace prior to
establishing a link as using NL80211_CMD_SET_MESH_CONFIG will not work before
NL80211_CMD_JOIN_MESH has been issued. So too late for our initial scan.

Signed-off-by: Nicolas Escande <nico.escande@gmail.com>
Link: https://msgid.link/20240527141759.299411-1-nico.escande@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index cbc9b5e40cb3..6d4510221c98 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1776,6 +1776,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 	ifmsh->last_preq = jiffies;
 	ifmsh->next_perr = jiffies;
 	ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE;
+	ifmsh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
 	/* Allocate all mesh structures when creating the first mesh interface. */
 	if (!mesh_allocated)
 		ieee80211s_init();
-- 
cgit v1.2.3


From 44c06bbde6443de206b30f513100b5670b23fc5e Mon Sep 17 00:00:00 2001
From: Remi Pommarel <repk@triplefau.lt>
Date: Wed, 29 May 2024 08:57:53 +0200
Subject: wifi: mac80211: Fix deadlock in ieee80211_sta_ps_deliver_wakeup()

The ieee80211_sta_ps_deliver_wakeup() function takes sta->ps_lock to
synchronizes with ieee80211_tx_h_unicast_ps_buf() which is called from
softirq context. However using only spin_lock() to get sta->ps_lock in
ieee80211_sta_ps_deliver_wakeup() does not prevent softirq to execute
on this same CPU, to run ieee80211_tx_h_unicast_ps_buf() and try to
take this same lock ending in deadlock. Below is an example of rcu stall
that arises in such situation.

 rcu: INFO: rcu_sched self-detected stall on CPU
 rcu:    2-....: (42413413 ticks this GP) idle=b154/1/0x4000000000000000 softirq=1763/1765 fqs=21206996
 rcu:    (t=42586894 jiffies g=2057 q=362405 ncpus=4)
 CPU: 2 PID: 719 Comm: wpa_supplicant Tainted: G        W          6.4.0-02158-g1b062f552873 #742
 Hardware name: RPT (r1) (DT)
 pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
 pc : queued_spin_lock_slowpath+0x58/0x2d0
 lr : invoke_tx_handlers_early+0x5b4/0x5c0
 sp : ffff00001ef64660
 x29: ffff00001ef64660 x28: ffff000009bc1070 x27: ffff000009bc0ad8
 x26: ffff000009bc0900 x25: ffff00001ef647a8 x24: 0000000000000000
 x23: ffff000009bc0900 x22: ffff000009bc0900 x21: ffff00000ac0e000
 x20: ffff00000a279e00 x19: ffff00001ef646e8 x18: 0000000000000000
 x17: ffff800016468000 x16: ffff00001ef608c0 x15: 0010533c93f64f80
 x14: 0010395c9faa3946 x13: 0000000000000000 x12: 00000000fa83b2da
 x11: 000000012edeceea x10: ffff0000010fbe00 x9 : 0000000000895440
 x8 : 000000000010533c x7 : ffff00000ad8b740 x6 : ffff00000c350880
 x5 : 0000000000000007 x4 : 0000000000000001 x3 : 0000000000000000
 x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffff00000ac0e0e8
 Call trace:
  queued_spin_lock_slowpath+0x58/0x2d0
  ieee80211_tx+0x80/0x12c
  ieee80211_tx_pending+0x110/0x278
  tasklet_action_common.constprop.0+0x10c/0x144
  tasklet_action+0x20/0x28
  _stext+0x11c/0x284
  ____do_softirq+0xc/0x14
  call_on_irq_stack+0x24/0x34
  do_softirq_own_stack+0x18/0x20
  do_softirq+0x74/0x7c
  __local_bh_enable_ip+0xa0/0xa4
  _ieee80211_wake_txqs+0x3b0/0x4b8
  __ieee80211_wake_queue+0x12c/0x168
  ieee80211_add_pending_skbs+0xec/0x138
  ieee80211_sta_ps_deliver_wakeup+0x2a4/0x480
  ieee80211_mps_sta_status_update.part.0+0xd8/0x11c
  ieee80211_mps_sta_status_update+0x18/0x24
  sta_apply_parameters+0x3bc/0x4c0
  ieee80211_change_station+0x1b8/0x2dc
  nl80211_set_station+0x444/0x49c
  genl_family_rcv_msg_doit.isra.0+0xa4/0xfc
  genl_rcv_msg+0x1b0/0x244
  netlink_rcv_skb+0x38/0x10c
  genl_rcv+0x34/0x48
  netlink_unicast+0x254/0x2bc
  netlink_sendmsg+0x190/0x3b4
  ____sys_sendmsg+0x1e8/0x218
  ___sys_sendmsg+0x68/0x8c
  __sys_sendmsg+0x44/0x84
  __arm64_sys_sendmsg+0x20/0x28
  do_el0_svc+0x6c/0xe8
  el0_svc+0x14/0x48
  el0t_64_sync_handler+0xb0/0xb4
  el0t_64_sync+0x14c/0x150

Using spin_lock_bh()/spin_unlock_bh() instead prevents softirq to raise
on the same CPU that is holding the lock.

Fixes: 1d147bfa6429 ("mac80211: fix AP powersave TX vs. wakeup race")
Signed-off-by: Remi Pommarel <repk@triplefau.lt>
Link: https://msgid.link/8e36fe07d0fbc146f89196cd47a53c8a0afe84aa.1716910344.git.repk@triplefau.lt
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/sta_info.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index da5fdd6f5c85..aa22f09e6d14 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1724,7 +1724,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 	skb_queue_head_init(&pending);
 
 	/* sync with ieee80211_tx_h_unicast_ps_buf */
-	spin_lock(&sta->ps_lock);
+	spin_lock_bh(&sta->ps_lock);
 	/* Send all buffered frames to the station */
 	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
 		int count = skb_queue_len(&pending), tmp;
@@ -1753,7 +1753,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 	 */
 	clear_sta_flag(sta, WLAN_STA_PSPOLL);
 	clear_sta_flag(sta, WLAN_STA_UAPSD);
-	spin_unlock(&sta->ps_lock);
+	spin_unlock_bh(&sta->ps_lock);
 
 	atomic_dec(&ps->num_sta_ps);
 
-- 
cgit v1.2.3


From 4dc3a3893dae5a7f73e5809273aca0f1f3548d55 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 23 May 2024 12:05:33 +0200
Subject: wifi: cfg80211: validate HE operation element parsing

Validate that the HE operation element has the correct
length before parsing it.

Cc: stable@vger.kernel.org
Fixes: 645f3d85129d ("wifi: cfg80211: handle UHB AP and STA power type")
Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240523120533.677025eb4a92.I44c091029ef113c294e8fe8b9bf871bf5dbeeb27@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/scan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 127853877a0a..8daed8232b05 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -2128,7 +2128,8 @@ static bool cfg80211_6ghz_power_type_valid(const u8 *ie, size_t ielen,
 	struct ieee80211_he_operation *he_oper;
 
 	tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen);
-	if (tmp && tmp->datalen >= sizeof(*he_oper) + 1) {
+	if (tmp && tmp->datalen >= sizeof(*he_oper) + 1 &&
+	    tmp->datalen >= ieee80211_he_oper_size(tmp->data + 1)) {
 		const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
 
 		he_oper = (void *)&tmp->data[1];
-- 
cgit v1.2.3


From e296c95eac655008d5a709b8cf54d0018da1c916 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 22 May 2024 12:41:25 +0200
Subject: wifi: cfg80211: fully move wiphy work to unbound workqueue

Previously I had moved the wiphy work to the unbound
system workqueue, but missed that when it restarts and
during resume it was still using the normal system
workqueue. Fix that.

Fixes: 91d20ab9d9ca ("wifi: cfg80211: use system_unbound_wq for wiphy work")
Reviewed-by: Miriam Rachel Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240522124126.7ca959f2cbd3.I3e2a71ef445d167b84000ccf934ea245aef8d395@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c  | 2 +-
 net/wireless/sysfs.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3fb1b637352a..4b1f45e3070e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -431,7 +431,7 @@ static void cfg80211_wiphy_work(struct work_struct *work)
 	if (wk) {
 		list_del_init(&wk->entry);
 		if (!list_empty(&rdev->wiphy_work_list))
-			schedule_work(work);
+			queue_work(system_unbound_wq, work);
 		spin_unlock_irq(&rdev->wiphy_work_lock);
 
 		wk->func(&rdev->wiphy, wk);
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 565511a3f461..62f26618f674 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -5,7 +5,7 @@
  *
  * Copyright 2005-2006	Jiri Benc <jbenc@suse.cz>
  * Copyright 2006	Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2020-2021, 2023 Intel Corporation
+ * Copyright (C) 2020-2021, 2023-2024 Intel Corporation
  */
 
 #include <linux/device.h>
@@ -137,7 +137,7 @@ static int wiphy_resume(struct device *dev)
 	if (rdev->wiphy.registered && rdev->ops->resume)
 		ret = rdev_resume(rdev);
 	rdev->suspended = false;
-	schedule_work(&rdev->wiphy_work);
+	queue_work(system_unbound_wq, &rdev->wiphy_work);
 	wiphy_unlock(&rdev->wiphy);
 
 	if (ret)
-- 
cgit v1.2.3


From 642f89daa34567d02f312d03e41523a894906dae Mon Sep 17 00:00:00 2001
From: Remi Pommarel <repk@triplefau.lt>
Date: Tue, 21 May 2024 21:47:26 +0200
Subject: wifi: cfg80211: Lock wiphy in cfg80211_get_station

Wiphy should be locked before calling rdev_get_station() (see lockdep
assert in ieee80211_get_station()).

This fixes the following kernel NULL dereference:

 Unable to handle kernel NULL pointer dereference at virtual address 0000000000000050
 Mem abort info:
   ESR = 0x0000000096000006
   EC = 0x25: DABT (current EL), IL = 32 bits
   SET = 0, FnV = 0
   EA = 0, S1PTW = 0
   FSC = 0x06: level 2 translation fault
 Data abort info:
   ISV = 0, ISS = 0x00000006
   CM = 0, WnR = 0
 user pgtable: 4k pages, 48-bit VAs, pgdp=0000000003001000
 [0000000000000050] pgd=0800000002dca003, p4d=0800000002dca003, pud=08000000028e9003, pmd=0000000000000000
 Internal error: Oops: 0000000096000006 [#1] SMP
 Modules linked in: netconsole dwc3_meson_g12a dwc3_of_simple dwc3 ip_gre gre ath10k_pci ath10k_core ath9k ath9k_common ath9k_hw ath
 CPU: 0 PID: 1091 Comm: kworker/u8:0 Not tainted 6.4.0-02144-g565f9a3a7911-dirty #705
 Hardware name: RPT (r1) (DT)
 Workqueue: bat_events batadv_v_elp_throughput_metric_update
 pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
 pc : ath10k_sta_statistics+0x10/0x2dc [ath10k_core]
 lr : sta_set_sinfo+0xcc/0xbd4
 sp : ffff000007b43ad0
 x29: ffff000007b43ad0 x28: ffff0000071fa900 x27: ffff00000294ca98
 x26: ffff000006830880 x25: ffff000006830880 x24: ffff00000294c000
 x23: 0000000000000001 x22: ffff000007b43c90 x21: ffff800008898acc
 x20: ffff00000294c6e8 x19: ffff000007b43c90 x18: 0000000000000000
 x17: 445946354d552d78 x16: 62661f7200000000 x15: 57464f445946354d
 x14: 0000000000000000 x13: 00000000000000e3 x12: d5f0acbcebea978e
 x11: 00000000000000e3 x10: 000000010048fe41 x9 : 0000000000000000
 x8 : ffff000007b43d90 x7 : 000000007a1e2125 x6 : 0000000000000000
 x5 : ffff0000024e0900 x4 : ffff800000a0250c x3 : ffff000007b43c90
 x2 : ffff00000294ca98 x1 : ffff000006831920 x0 : 0000000000000000
 Call trace:
  ath10k_sta_statistics+0x10/0x2dc [ath10k_core]
  sta_set_sinfo+0xcc/0xbd4
  ieee80211_get_station+0x2c/0x44
  cfg80211_get_station+0x80/0x154
  batadv_v_elp_get_throughput+0x138/0x1fc
  batadv_v_elp_throughput_metric_update+0x1c/0xa4
  process_one_work+0x1ec/0x414
  worker_thread+0x70/0x46c
  kthread+0xdc/0xe0
  ret_from_fork+0x10/0x20
 Code: a9bb7bfd 910003fd a90153f3 f9411c40 (f9402814)

This happens because STA has time to disconnect and reconnect before
batadv_v_elp_throughput_metric_update() delayed work gets scheduled. In
this situation, ath10k_sta_state() can be in the middle of resetting
arsta data when the work queue get chance to be scheduled and ends up
accessing it. Locking wiphy prevents that.

Fixes: 7406353d43c8 ("cfg80211: implement cfg80211_get_station cfg80211 API")
Signed-off-by: Remi Pommarel <repk@triplefau.lt>
Reviewed-by: Nicolas Escande <nico.escande@gmail.com>
Acked-by: Antonio Quartulli <a@unstable.cc>
Link: https://msgid.link/983b24a6a176e0800c01aedcd74480d9b551cb13.1716046653.git.repk@triplefau.lt
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 2bde8a354631..082c6f9c5416 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -2549,6 +2549,7 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
 {
 	struct cfg80211_registered_device *rdev;
 	struct wireless_dev *wdev;
+	int ret;
 
 	wdev = dev->ieee80211_ptr;
 	if (!wdev)
@@ -2560,7 +2561,11 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
 
 	memset(sinfo, 0, sizeof(*sinfo));
 
-	return rdev_get_station(rdev, dev, mac_addr, sinfo);
+	wiphy_lock(&rdev->wiphy);
+	ret = rdev_get_station(rdev, dev, mac_addr, sinfo);
+	wiphy_unlock(&rdev->wiphy);
+
+	return ret;
 }
 EXPORT_SYMBOL(cfg80211_get_station);
 
-- 
cgit v1.2.3


From ab904521f4de52fef4f179d2dfc1877645ef5f5c Mon Sep 17 00:00:00 2001
From: Lin Ma <linma@zju.edu.cn>
Date: Tue, 21 May 2024 15:50:59 +0800
Subject: wifi: cfg80211: pmsr: use correct nla_get_uX functions

The commit 9bb7e0f24e7e ("cfg80211: add peer measurement with FTM
initiator API") defines four attributes NL80211_PMSR_FTM_REQ_ATTR_
{NUM_BURSTS_EXP}/{BURST_PERIOD}/{BURST_DURATION}/{FTMS_PER_BURST} in
following ways.

static const struct nla_policy
nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = {
    ...
    [NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP] =
        NLA_POLICY_MAX(NLA_U8, 15),
    [NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD] = { .type = NLA_U16 },
    [NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION] =
        NLA_POLICY_MAX(NLA_U8, 15),
    [NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] =
        NLA_POLICY_MAX(NLA_U8, 31),
    ...
};

That is, those attributes are expected to be NLA_U8 and NLA_U16 types.
However, the consumers of these attributes in `pmsr_parse_ftm` blindly
all use `nla_get_u32`, which is incorrect and causes functionality issues
on little-endian platforms. Hence, fix them with the correct `nla_get_u8`
and `nla_get_u16` functions.

Fixes: 9bb7e0f24e7e ("cfg80211: add peer measurement with FTM initiator API")
Signed-off-by: Lin Ma <linma@zju.edu.cn>
Link: https://msgid.link/20240521075059.47999-1-linma@zju.edu.cn
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/pmsr.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index e106dcea3977..c569c37da317 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -56,7 +56,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
 	out->ftm.burst_period = 0;
 	if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD])
 		out->ftm.burst_period =
-			nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]);
+			nla_get_u16(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]);
 
 	out->ftm.asap = !!tb[NL80211_PMSR_FTM_REQ_ATTR_ASAP];
 	if (out->ftm.asap && !capa->ftm.asap) {
@@ -75,7 +75,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
 	out->ftm.num_bursts_exp = 0;
 	if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP])
 		out->ftm.num_bursts_exp =
-			nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]);
+			nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]);
 
 	if (capa->ftm.max_bursts_exponent >= 0 &&
 	    out->ftm.num_bursts_exp > capa->ftm.max_bursts_exponent) {
@@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
 	out->ftm.burst_duration = 15;
 	if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION])
 		out->ftm.burst_duration =
-			nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]);
+			nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]);
 
 	out->ftm.ftms_per_burst = 0;
 	if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST])
@@ -107,7 +107,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev,
 	out->ftm.ftmr_retries = 3;
 	if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES])
 		out->ftm.ftmr_retries =
-			nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]);
+			nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]);
 
 	out->ftm.request_lci = !!tb[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI];
 	if (out->ftm.request_lci && !capa->ftm.request_lci) {
-- 
cgit v1.2.3


From 02c665f048a439c0d58cc45334c94634bd7c18e6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 May 2024 13:34:10 +0200
Subject: wifi: mac80211: apply mcast rate only if interface is up

If the interface isn't enabled, don't apply multicast
rate changes immediately.

Reported-by: syzbot+de87c09cc7b964ea2e23@syzkaller.appspotmail.com
Link: https://msgid.link/20240515133410.d6cffe5756cc.I47b624a317e62bdb4609ff7fa79403c0c444d32d@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b08e5d7687e3..99abbb9e8477 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2958,8 +2958,9 @@ static int ieee80211_set_mcast_rate(struct wiphy *wiphy, struct net_device *dev,
 	memcpy(sdata->vif.bss_conf.mcast_rate, rate,
 	       sizeof(int) * NUM_NL80211_BANDS);
 
-	ieee80211_link_info_change_notify(sdata, &sdata->deflink,
-					  BSS_CHANGED_MCAST_RATE);
+	if (ieee80211_sdata_running(sdata))
+		ieee80211_link_info_change_notify(sdata, &sdata->deflink,
+						  BSS_CHANGED_MCAST_RATE);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 177c6ae9725d783f9e96f02593ce8fb2639be22f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 May 2024 13:53:19 +0200
Subject: wifi: mac80211: handle tasklet frames before stopping

The code itself doesn't want to handle frames from the driver
if it's already stopped, but if the tasklet was queued before
and runs after the stop, then all bets are off. Flush queues
before actually stopping, RX should be off at this point since
all the interfaces are removed already, etc.

Reported-by: syzbot+8830db5d3593b5546d2e@syzkaller.appspotmail.com
Link: https://msgid.link/20240515135318.b05f11385c9a.I41c1b33a2e1814c3a7ef352cd7f2951b91785617@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/main.c        | 10 ++++++++--
 net/mac80211/util.c        |  2 ++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index eb62b7d4b4f7..3cedfdc9099b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1845,6 +1845,8 @@ void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata,
 void ieee80211_configure_filter(struct ieee80211_local *local);
 u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
 
+void ieee80211_handle_queued_frames(struct ieee80211_local *local);
+
 u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local);
 int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
 			     u64 *cookie, gfp_t gfp);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4eaea0a9975b..1132dea0e290 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -423,9 +423,8 @@ u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
 	       BSS_CHANGED_ERP_SLOT;
 }
 
-static void ieee80211_tasklet_handler(struct tasklet_struct *t)
+void ieee80211_handle_queued_frames(struct ieee80211_local *local)
 {
-	struct ieee80211_local *local = from_tasklet(local, t, tasklet);
 	struct sk_buff *skb;
 
 	while ((skb = skb_dequeue(&local->skb_queue)) ||
@@ -450,6 +449,13 @@ static void ieee80211_tasklet_handler(struct tasklet_struct *t)
 	}
 }
 
+static void ieee80211_tasklet_handler(struct tasklet_struct *t)
+{
+	struct ieee80211_local *local = from_tasklet(local, t, tasklet);
+
+	ieee80211_handle_queued_frames(local);
+}
+
 static void ieee80211_restart_work(struct work_struct *work)
 {
 	struct ieee80211_local *local =
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 0b893e958959..283bfc99417e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1567,6 +1567,8 @@ u32 ieee80211_sta_get_rates(struct ieee80211_sub_if_data *sdata,
 
 void ieee80211_stop_device(struct ieee80211_local *local)
 {
+	ieee80211_handle_queued_frames(local);
+
 	ieee80211_led_radio(local, false);
 	ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO);
 
-- 
cgit v1.2.3


From f7a8b10bfd614d7a9a16fbe80d28ead4f063cb00 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 10 May 2024 11:37:38 +0200
Subject: wifi: cfg80211: fix 6 GHz scan request building

The 6 GHz scan request struct allocated by cfg80211_scan_6ghz() is
meant to be formed this way:

 [base struct][channels][ssids][6ghz_params]

It is allocated with [channels] as the maximum number of channels
supported by the driver in the 6 GHz band, since allocation is
before knowing how many there will be.

However, the inner pointers are set incorrectly: initially, the
6 GHz scan parameters pointer is set:

 [base struct][channels]
                        ^ scan_6ghz_params

and later the SSID pointer is set to the end of the actually
_used_ channels.

 [base struct][channels]
                  ^ ssids

If many APs were to be discovered, and many channels used, and
there were many SSIDs, then the SSIDs could overlap the 6 GHz
parameters.

Additionally, the request->ssids for most of the function points
to the original request still (given the struct copy) but is used
normally, which is confusing.

Clear this up, by actually using the allocated space for 6 GHz
parameters _after_ the SSIDs, and set up the SSIDs initially so
they are used more clearly. Just like in nl80211.c, set them
only if there actually are SSIDs though.

Finally, also copy the elements (ie/ie_len) so they're part of
the same request, not pointing to the old request.

Co-developed-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Reviewed-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://msgid.link/20240510113738.4190692ef4ee.I0cb19188be17a8abd029805e3373c0a7777c214c@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/rdev-ops.h |  6 +++++-
 net/wireless/scan.c     | 47 +++++++++++++++++++++++++++++++----------------
 2 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 43897a5269b6..755af47b88b9 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -2,7 +2,7 @@
 /*
  * Portions of this file
  * Copyright(c) 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018, 2021-2023 Intel Corporation
+ * Copyright (C) 2018, 2021-2024 Intel Corporation
  */
 #ifndef __CFG80211_RDEV_OPS
 #define __CFG80211_RDEV_OPS
@@ -458,6 +458,10 @@ static inline int rdev_scan(struct cfg80211_registered_device *rdev,
 			    struct cfg80211_scan_request *request)
 {
 	int ret;
+
+	if (WARN_ON_ONCE(!request->n_ssids && request->ssids))
+		return -EINVAL;
+
 	trace_rdev_scan(&rdev->wiphy, request);
 	ret = rdev->ops->scan(&rdev->wiphy, request);
 	trace_rdev_return_int(&rdev->wiphy, ret);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 8daed8232b05..2f2a3163968a 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -812,6 +812,7 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
 	LIST_HEAD(coloc_ap_list);
 	bool need_scan_psc = true;
 	const struct ieee80211_sband_iftype_data *iftd;
+	size_t size, offs_ssids, offs_6ghz_params, offs_ies;
 
 	rdev_req->scan_6ghz = true;
 
@@ -877,10 +878,15 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
 		spin_unlock_bh(&rdev->bss_lock);
 	}
 
-	request = kzalloc(struct_size(request, channels, n_channels) +
-			  sizeof(*request->scan_6ghz_params) * count +
-			  sizeof(*request->ssids) * rdev_req->n_ssids,
-			  GFP_KERNEL);
+	size = struct_size(request, channels, n_channels);
+	offs_ssids = size;
+	size += sizeof(*request->ssids) * rdev_req->n_ssids;
+	offs_6ghz_params = size;
+	size += sizeof(*request->scan_6ghz_params) * count;
+	offs_ies = size;
+	size += rdev_req->ie_len;
+
+	request = kzalloc(size, GFP_KERNEL);
 	if (!request) {
 		cfg80211_free_coloc_ap_list(&coloc_ap_list);
 		return -ENOMEM;
@@ -888,8 +894,26 @@ static int cfg80211_scan_6ghz(struct cfg80211_registered_device *rdev)
 
 	*request = *rdev_req;
 	request->n_channels = 0;
-	request->scan_6ghz_params =
-		(void *)&request->channels[n_channels];
+	request->n_6ghz_params = 0;
+	if (rdev_req->n_ssids) {
+		/*
+		 * Add the ssids from the parent scan request to the new
+		 * scan request, so the driver would be able to use them
+		 * in its probe requests to discover hidden APs on PSC
+		 * channels.
+		 */
+		request->ssids = (void *)request + offs_ssids;
+		memcpy(request->ssids, rdev_req->ssids,
+		       sizeof(*request->ssids) * request->n_ssids);
+	}
+	request->scan_6ghz_params = (void *)request + offs_6ghz_params;
+
+	if (rdev_req->ie_len) {
+		void *ie = (void *)request + offs_ies;
+
+		memcpy(ie, rdev_req->ie, rdev_req->ie_len);
+		request->ie = ie;
+	}
 
 	/*
 	 * PSC channels should not be scanned in case of direct scan with 1 SSID
@@ -978,17 +1002,8 @@ skip:
 
 	if (request->n_channels) {
 		struct cfg80211_scan_request *old = rdev->int_scan_req;
-		rdev->int_scan_req = request;
 
-		/*
-		 * Add the ssids from the parent scan request to the new scan
-		 * request, so the driver would be able to use them in its
-		 * probe requests to discover hidden APs on PSC channels.
-		 */
-		request->ssids = (void *)&request->channels[request->n_channels];
-		request->n_ssids = rdev_req->n_ssids;
-		memcpy(request->ssids, rdev_req->ssids, sizeof(*request->ssids) *
-		       request->n_ssids);
+		rdev->int_scan_req = request;
 
 		/*
 		 * If this scan follows a previous scan, save the scan start
-- 
cgit v1.2.3


From 8ecc4d7a7cd3e9704b63b8e4f6cd8b6b7314210f Mon Sep 17 00:00:00 2001
From: Aditya Kumar Singh <quic_adisi@quicinc.com>
Date: Thu, 9 May 2024 08:55:55 +0530
Subject: wifi: mac80211: pass proper link id for channel switch started
 notification

Original changes[1] posted is having proper changes. However, at the same
time, there was chandef puncturing changes which had a conflict with this.
While applying, two errors crept in -
   a) Whitespace error.
   b) Link ID being passed to channel switch started notifier function is
      0. However proper link ID is present in the function.

Fix these now.

[1] https://lore.kernel.org/all/20240130140918.1172387-5-quic_adisi@quicinc.com/

Fixes: 1a96bb4e8a79 ("wifi: mac80211: start and finalize channel switch on link basis")
Signed-off-by: Aditya Kumar Singh <quic_adisi@quicinc.com>
Link: https://msgid.link/20240509032555.263933-1-quic_adisi@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 99abbb9e8477..83ad6c9709fe 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -4017,7 +4017,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
 		goto out;
 	}
 
-	link_data->csa_chanreq = chanreq; 
+	link_data->csa_chanreq = chanreq;
 	link_conf->csa_active = true;
 
 	if (params->block_tx &&
@@ -4028,7 +4028,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
 	}
 
 	cfg80211_ch_switch_started_notify(sdata->dev,
-					  &link_data->csa_chanreq.oper, 0,
+					  &link_data->csa_chanreq.oper, link_id,
 					  params->count, params->block_tx);
 
 	if (changed) {
-- 
cgit v1.2.3


From 92158790ce4391ce4c35d8dfbce759195e4724cb Mon Sep 17 00:00:00 2001
From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Date: Sun, 12 May 2024 15:25:00 +0300
Subject: wifi: iwlwifi: mvm: don't initialize csa_work twice

The initialization of this worker moved to iwl_mvm_mac_init_mvmvif
but we removed only from the pre-MLD version of the add_interface
callback. Remove it also from the MLD version.

Fixes: 0bcc2155983e ("wifi: iwlwifi: mvm: init vif works only once")
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
Link: https://msgid.link/20240512152312.4f15b41604f0.Iec912158e5a706175531d3736d77d25adf02fba4@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
index 0a3b7284eedd..fcfd2dd7568e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c
@@ -75,8 +75,6 @@ static int iwl_mvm_mld_mac_add_interface(struct ieee80211_hw *hw,
 		goto out_free_bf;
 
 	iwl_mvm_tcm_add_vif(mvm, vif);
-	INIT_DELAYED_WORK(&mvmvif->csa_work,
-			  iwl_mvm_channel_switch_disconnect_wk);
 
 	if (vif->type == NL80211_IFTYPE_MONITOR) {
 		mvm->monitor_on = true;
-- 
cgit v1.2.3


From 98b7017ddb914a32152ed9aec7468386fff85f8f Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Sun, 12 May 2024 15:25:04 +0300
Subject: wifi: iwlwifi: mvm: always set the TWT IE offset

In beacon template version 14, make sure to always set
the TWT IE offset before sending the beacon template command,
also in the debugfs inject_beacon_ie path.
If the TWT IE does not exist, the offset will be set to zero.

Fixes: bf0212fd8faa ("wifi: iwlwifi: mvm: add beacon template version 14")
Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240512152312.eb27175c345a.If30ef24aba10fe47fd42a7a9703eb8903035e294@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c  | 9 +++++++++
 drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 2 +-
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h      | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index 79f4ac8cbc72..8101ecbb478b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -1617,6 +1617,15 @@ static int _iwl_dbgfs_inject_beacon_ie(struct iwl_mvm *mvm, char *bin, int len)
 					 &beacon_cmd.tim_size,
 					 beacon->data, beacon->len);
 
+		if (iwl_fw_lookup_cmd_ver(mvm->fw,
+					  BEACON_TEMPLATE_CMD, 0) >= 14) {
+			u32 offset = iwl_mvm_find_ie_offset(beacon->data,
+							    WLAN_EID_S1G_TWT,
+							    beacon->len);
+
+			beacon_cmd.btwt_offset = cpu_to_le32(offset);
+		}
+
 		iwl_mvm_mac_ctxt_send_beacon_cmd(mvm, beacon, &beacon_cmd,
 						 sizeof(beacon_cmd));
 	}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index 5a06f887769a..5144fa0f96b0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -873,7 +873,7 @@ void iwl_mvm_mac_ctxt_set_tim(struct iwl_mvm *mvm,
 	}
 }
 
-static u32 iwl_mvm_find_ie_offset(u8 *beacon, u8 eid, u32 frame_size)
+u32 iwl_mvm_find_ie_offset(u8 *beacon, u8 eid, u32 frame_size)
 {
 	struct ieee80211_mgmt *mgmt = (void *)beacon;
 	const u8 *ie;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 1f58c727fa63..0a1959bd4079 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1758,6 +1758,7 @@ u8 iwl_mvm_next_antenna(struct iwl_mvm *mvm, u8 valid, u8 last_idx);
 void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, int clock_type, u32 *gp2,
 			   u64 *boottime, ktime_t *realtime);
 u32 iwl_mvm_get_systime(struct iwl_mvm *mvm);
+u32 iwl_mvm_find_ie_offset(u8 *beacon, u8 eid, u32 frame_size);
 
 /* Tx / Host Commands */
 int __must_check iwl_mvm_send_cmd(struct iwl_mvm *mvm,
-- 
cgit v1.2.3


From 788e4c75f831d06fcfbbec1d455fac429521e607 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sun, 12 May 2024 07:31:06 +0300
Subject: wifi: iwlwifi: mvm: fix a crash on 7265

Since IWL_FW_CMD_VER_UNKNOWN = 99, then my change to consider
cmd_ver >= 7 instead of cmd_ver = 7 included also firmwares that don't
advertise the command version at all. This made us send a command with a
bad size and because of that, the firmware hit a BAD_COMMAND immediately
after handling the REDUCE_TX_POWER_CMD command.

Fixes: 8f892e225f41 ("wifi: iwlwifi: mvm: support iwl_dev_tx_power_cmd_v8")
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240512072733.eb20ff5050d3.Ie4fc6f5496cd296fd6ff20d15e98676f28a3cccd@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index e7f5978ef2d7..ca7fcae6995e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -895,8 +895,8 @@ int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b)
 	int ret;
 	u16 len = 0;
 	u32 n_subbands;
-	u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id,
-					   IWL_FW_CMD_VER_UNKNOWN);
+	u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id, 3);
+
 	if (cmd_ver >= 7) {
 		len = sizeof(cmd.v7);
 		n_subbands = IWL_NUM_SUB_BANDS_V2;
-- 
cgit v1.2.3


From b7ffca99313d856f7d1cc89038d9061b128e8e97 Mon Sep 17 00:00:00 2001
From: Yedidya Benshimol <yedidya.ben.shimol@intel.com>
Date: Fri, 10 May 2024 17:06:29 +0300
Subject: wifi: iwlwifi: mvm: d3: fix WoWLAN command version lookup

After moving from commands to notificaitons in the d3 resume flow,
removing the WOWLAN_GET_STATUSES and REPLY_OFFLOADS_QUERY_CMD causes
the return of the default value when looking up their version.
Returning zero here results in the driver sending the not supported
NON_QOS_TX_COUNTER_CMD.

Signed-off-by: Yedidya Benshimol <yedidya.ben.shimol@intel.com>
Reviewed-by: Gregory Greenman <gregory.greenman@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240510170500.8cabfd580614.If3a0db9851f56041f8f5360959354abd5379224a@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 71e6b06481a9..e37ae34acba6 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -2341,7 +2341,8 @@ static bool iwl_mvm_setup_connection_keep(struct iwl_mvm *mvm,
 
 out:
 	if (iwl_fw_lookup_notif_ver(mvm->fw, LONG_GROUP,
-				    WOWLAN_GET_STATUSES, 0) < 10) {
+				    WOWLAN_GET_STATUSES,
+				    IWL_FW_CMD_VER_UNKNOWN) < 10) {
 		mvmvif->seqno_valid = true;
 		/* +0x10 because the set API expects next-to-use, not last-used */
 		mvmvif->seqno = status->non_qos_seq_ctr + 0x10;
-- 
cgit v1.2.3


From 4a7aace2899711592327463c1a29ffee44fcc66e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 10 May 2024 17:06:33 +0300
Subject: wifi: iwlwifi: mvm: revert gen2 TX A-MPDU size to 64

We don't actually support >64 even for HE devices, so revert
back to 64. This fixes an issue where the session is refused
because the queue is configured differently from the actual
session later.

Fixes: 514c30696fbc ("iwlwifi: add support for IEEE802.11ax")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Reviewed-by: Liad Kaufman <liad.kaufman@intel.com>
Reviewed-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240510170500.52f7b4cf83aa.If47e43adddf7fe250ed7f5571fbb35d8221c7c47@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/rs.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
index 376b23b409dc..6cd4ec4d8f34 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h
@@ -122,13 +122,8 @@ enum {
 
 #define LINK_QUAL_AGG_FRAME_LIMIT_DEF	(63)
 #define LINK_QUAL_AGG_FRAME_LIMIT_MAX	(63)
-/*
- * FIXME - various places in firmware API still use u8,
- * e.g. LQ command and SCD config command.
- * This should be 256 instead.
- */
-#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_DEF	(255)
-#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_MAX	(255)
+#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_DEF	(64)
+#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_MAX	(64)
 #define LINK_QUAL_AGG_FRAME_LIMIT_MIN	(0)
 
 #define LQ_SIZE		2	/* 2 mode tables:  "Active" and "Search" */
-- 
cgit v1.2.3


From 0f2e9f6f21d1ff292363cdfb5bc4d492eeaff76e Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Fri, 10 May 2024 17:06:35 +0300
Subject: wifi: iwlwifi: mvm: set properly mac header

In the driver we only use skb_put* for adding data to the skb, hence data
never moves and skb_reset_mac_haeder would set mac_header to the first
time data was added and not to mac80211 header, fix this my using the
actual len of bytes added for setting the mac header.

Fixes: 3f7a9d577d47 ("wifi: iwlwifi: mvm: simplify by using SKB MAC header pointer")
Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240510170500.12f2de2909c3.I72a819b96f2fe55bde192a8fd31a4b96c301aa73@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index d78af2928152..489cfb0a4ab1 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -2450,8 +2450,11 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi,
 	 *
 	 * We mark it as mac header, for upper layers to know where
 	 * all radio tap header ends.
+	 *
+	 * Since data doesn't move data while putting data on skb and that is
+	 * the only way we use, data + len is the next place that hdr would be put
 	 */
-	skb_reset_mac_header(skb);
+	skb_set_mac_header(skb, skb->len);
 
 	/*
 	 * Override the nss from the rx_vec since the rate_n_flags has
-- 
cgit v1.2.3


From 87821b67dea87addbc4ab093ba752753b002176a Mon Sep 17 00:00:00 2001
From: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Date: Fri, 10 May 2024 17:06:39 +0300
Subject: wifi: iwlwifi: dbg_ini: move iwl_dbg_tlv_free outside of debugfs
 ifdef

The driver should call iwl_dbg_tlv_free even if debugfs is not defined
since ini mode does not depend on debugfs ifdef.

Fixes: 68f6f492c4fa ("iwlwifi: trans: support loading ini TLVs from external file")
Signed-off-by: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Reviewed-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240510170500.c8e3723f55b0.I5e805732b0be31ee6b83c642ec652a34e974ff10@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index 33654f228ee8..d156a9c64194 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -1815,8 +1815,8 @@ struct iwl_drv *iwl_drv_start(struct iwl_trans *trans)
 err_fw:
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 	debugfs_remove_recursive(drv->dbgfs_drv);
-	iwl_dbg_tlv_free(drv->trans);
 #endif
+	iwl_dbg_tlv_free(drv->trans);
 	kfree(drv);
 err:
 	return ERR_PTR(ret);
-- 
cgit v1.2.3


From cc3ba78f202de9752aceb16342ab62bdfbffac7e Mon Sep 17 00:00:00 2001
From: Benjamin Berg <benjamin.berg@intel.com>
Date: Mon, 13 May 2024 13:27:08 +0300
Subject: wifi: iwlwifi: mvm: remove stale STA link data during restart

If pre-recovery mac80211 tried to disable a link but this disablement
failed, then there might be a mismatch between mac80211 assuming the
link has been disabled and the driver still having the data around.
During recover itself, that is not a problem, but should the link be
activated again at a later point, iwlwifi will refuse the activation as
it detects the inconsistent state.

Solve this corner-case by iterating the station in the restart cleanup
handler.

Signed-off-by: Benjamin Berg <benjamin.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240513132416.d2fd60338055.I840d4fdce5fd49fe69896d928b071067e3730259@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 37 +++++++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c  | 10 +++---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.h      |  5 +++
 3 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 486a6b8f3c97..628b50ee0244 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -1128,6 +1128,39 @@ static void iwl_mvm_cleanup_iterator(void *data, u8 *mac,
 	RCU_INIT_POINTER(mvmvif->deflink.probe_resp_data, NULL);
 }
 
+static void iwl_mvm_cleanup_sta_iterator(void *data, struct ieee80211_sta *sta)
+{
+	struct iwl_mvm *mvm = data;
+	struct iwl_mvm_sta *mvm_sta;
+	struct ieee80211_vif *vif;
+	int link_id;
+
+	mvm_sta = iwl_mvm_sta_from_mac80211(sta);
+	vif = mvm_sta->vif;
+
+	if (!sta->valid_links)
+		return;
+
+	for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) {
+		struct iwl_mvm_link_sta *mvm_link_sta;
+
+		mvm_link_sta =
+			rcu_dereference_check(mvm_sta->link[link_id],
+					      lockdep_is_held(&mvm->mutex));
+		if (mvm_link_sta && !(vif->active_links & BIT(link_id))) {
+			/*
+			 * We have a link STA but the link is inactive in
+			 * mac80211. This will happen if we failed to
+			 * deactivate the link but mac80211 roll back the
+			 * deactivation of the link.
+			 * Delete the stale data to avoid issues later on.
+			 */
+			iwl_mvm_mld_free_sta_link(mvm, mvm_sta, mvm_link_sta,
+						  link_id, false);
+		}
+	}
+}
+
 static void iwl_mvm_restart_cleanup(struct iwl_mvm *mvm)
 {
 	iwl_mvm_stop_device(mvm);
@@ -1150,6 +1183,10 @@ static void iwl_mvm_restart_cleanup(struct iwl_mvm *mvm)
 	 */
 	ieee80211_iterate_interfaces(mvm->hw, 0, iwl_mvm_cleanup_iterator, mvm);
 
+	/* cleanup stations as links may be gone after restart */
+	ieee80211_iterate_stations_atomic(mvm->hw,
+					  iwl_mvm_cleanup_sta_iterator, mvm);
+
 	mvm->p2p_device_vif = NULL;
 
 	iwl_mvm_reset_phy_ctxts(mvm);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
index b7a461dba41e..ae3cde0e64f3 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
@@ -515,11 +515,11 @@ static int iwl_mvm_mld_cfg_sta(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 	return iwl_mvm_mld_send_sta_cmd(mvm, &cmd);
 }
 
-static void iwl_mvm_mld_free_sta_link(struct iwl_mvm *mvm,
-				      struct iwl_mvm_sta *mvm_sta,
-				      struct iwl_mvm_link_sta *mvm_sta_link,
-				      unsigned int link_id,
-				      bool is_in_fw)
+void iwl_mvm_mld_free_sta_link(struct iwl_mvm *mvm,
+			       struct iwl_mvm_sta *mvm_sta,
+			       struct iwl_mvm_link_sta *mvm_sta_link,
+			       unsigned int link_id,
+			       bool is_in_fw)
 {
 	RCU_INIT_POINTER(mvm->fw_id_to_mac_id[mvm_sta_link->sta_id],
 			 is_in_fw ? ERR_PTR(-EINVAL) : NULL);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index 264f1f9394b6..754a05a8c189 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -662,6 +662,11 @@ int iwl_mvm_mld_update_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 			   struct ieee80211_sta *sta);
 int iwl_mvm_mld_rm_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 		       struct ieee80211_sta *sta);
+void iwl_mvm_mld_free_sta_link(struct iwl_mvm *mvm,
+			       struct iwl_mvm_sta *mvm_sta,
+			       struct iwl_mvm_link_sta *mvm_sta_link,
+			       unsigned int link_id,
+			       bool is_in_fw);
 int iwl_mvm_mld_rm_sta_id(struct iwl_mvm *mvm, u8 sta_id);
 int iwl_mvm_mld_update_sta_links(struct iwl_mvm *mvm,
 				 struct ieee80211_vif *vif,
-- 
cgit v1.2.3


From 08b16d1b5997dc378533318e2a9cd73c7a898284 Mon Sep 17 00:00:00 2001
From: Yedidya Benshimol <yedidya.ben.shimol@intel.com>
Date: Mon, 13 May 2024 13:27:09 +0300
Subject: wifi: iwlwifi: mvm: Handle BIGTK cipher in kek_kck cmd

The BIGTK cipher field was added to the kek_kck_material_cmd
but wasn't assigned. Fix that by differentiating between the
IGTK/BIGTK keys and assign the ciphers fields accordingly.

Signed-off-by: Yedidya Benshimol <yedidya.ben.shimol@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240513132416.7fd0b22b7267.Ie9b581652b74bd7806980364d59e1b2e78e682c0@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index e37ae34acba6..54f4acbbd05b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -595,6 +595,12 @@ static void iwl_mvm_wowlan_gtk_type_iter(struct ieee80211_hw *hw,
 					 void *_data)
 {
 	struct wowlan_key_gtk_type_iter *data = _data;
+	__le32 *cipher = NULL;
+
+	if (key->keyidx == 4 || key->keyidx == 5)
+		cipher = &data->kek_kck_cmd->igtk_cipher;
+	if (key->keyidx == 6 || key->keyidx == 7)
+		cipher = &data->kek_kck_cmd->bigtk_cipher;
 
 	switch (key->cipher) {
 	default:
@@ -606,10 +612,13 @@ static void iwl_mvm_wowlan_gtk_type_iter(struct ieee80211_hw *hw,
 		return;
 	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
 	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
-		data->kek_kck_cmd->igtk_cipher = cpu_to_le32(STA_KEY_FLG_GCMP);
+		if (cipher)
+			*cipher = cpu_to_le32(STA_KEY_FLG_GCMP);
 		return;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
-		data->kek_kck_cmd->igtk_cipher = cpu_to_le32(STA_KEY_FLG_CCM);
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+		if (cipher)
+			*cipher = cpu_to_le32(STA_KEY_FLG_CCM);
 		return;
 	case WLAN_CIPHER_SUITE_CCMP:
 		if (!sta)
-- 
cgit v1.2.3


From 4d08c0b3357cba0aeffaf3abc62cae0c154f2816 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 13 May 2024 13:27:10 +0300
Subject: wifi: iwlwifi: mvm: handle BA session teardown in RF-kill

When entering RF-kill, mac80211 tears down BA sessions, but
due to RF-kill the commands aren't sent to the device. As a
result, there can be frames pending on the reorder buffer or
perhaps even received while doing so, leading to warnings.

Avoid the warnings by doing the BA session teardown normally
even in RF-kill, which also requires queue sync.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240513132416.0762cd80fb3d.I43c5877f3b546159b2db4f36d6d956b333c41cf0@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c |  2 +-
 drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c  |  3 ++-
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c      | 12 ++++++++----
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 628b50ee0244..de9f0b446545 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -6385,7 +6385,7 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
 		.len[0] = sizeof(cmd),
 		.data[1] = data,
 		.len[1] = size,
-		.flags = sync ? 0 : CMD_ASYNC,
+		.flags = CMD_SEND_IN_RFKILL | (sync ? 0 : CMD_ASYNC),
 	};
 	int ret;
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
index ae3cde0e64f3..9d139b56e152 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-sta.c
@@ -1014,7 +1014,8 @@ static int iwl_mvm_mld_update_sta_baids(struct iwl_mvm *mvm,
 
 		cmd.modify.tid = cpu_to_le32(data->tid);
 
-		ret = iwl_mvm_send_cmd_pdu(mvm, cmd_id, 0, sizeof(cmd), &cmd);
+		ret = iwl_mvm_send_cmd_pdu(mvm, cmd_id, CMD_SEND_IN_RFKILL,
+					   sizeof(cmd), &cmd);
 		data->sta_mask = new_sta_mask;
 		if (ret)
 			return ret;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 20d4968d692a..cc79fe991c26 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -2848,7 +2848,12 @@ static int iwl_mvm_fw_baid_op_cmd(struct iwl_mvm *mvm,
 		.action = start ? cpu_to_le32(IWL_RX_BAID_ACTION_ADD) :
 				  cpu_to_le32(IWL_RX_BAID_ACTION_REMOVE),
 	};
-	u32 cmd_id = WIDE_ID(DATA_PATH_GROUP, RX_BAID_ALLOCATION_CONFIG_CMD);
+	struct iwl_host_cmd hcmd = {
+		.id = WIDE_ID(DATA_PATH_GROUP, RX_BAID_ALLOCATION_CONFIG_CMD),
+		.flags = CMD_SEND_IN_RFKILL,
+		.len[0] = sizeof(cmd),
+		.data[0] = &cmd,
+	};
 	int ret;
 
 	BUILD_BUG_ON(sizeof(struct iwl_rx_baid_cfg_resp) != sizeof(baid));
@@ -2860,7 +2865,7 @@ static int iwl_mvm_fw_baid_op_cmd(struct iwl_mvm *mvm,
 		cmd.alloc.ssn = cpu_to_le16(ssn);
 		cmd.alloc.win_size = cpu_to_le16(buf_size);
 		baid = -EIO;
-	} else if (iwl_fw_lookup_cmd_ver(mvm->fw, cmd_id, 1) == 1) {
+	} else if (iwl_fw_lookup_cmd_ver(mvm->fw, hcmd.id, 1) == 1) {
 		cmd.remove_v1.baid = cpu_to_le32(baid);
 		BUILD_BUG_ON(sizeof(cmd.remove_v1) > sizeof(cmd.remove));
 	} else {
@@ -2869,8 +2874,7 @@ static int iwl_mvm_fw_baid_op_cmd(struct iwl_mvm *mvm,
 		cmd.remove.tid = cpu_to_le32(tid);
 	}
 
-	ret = iwl_mvm_send_cmd_pdu_status(mvm, cmd_id, sizeof(cmd),
-					  &cmd, &baid);
+	ret = iwl_mvm_send_cmd_status(mvm, &hcmd, &baid);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From 989830d1cf16bd149bf0690d889a9caef95fb5b1 Mon Sep 17 00:00:00 2001
From: Ayala Beker <ayala.beker@intel.com>
Date: Mon, 13 May 2024 13:27:11 +0300
Subject: wifi: iwlwifi: mvm: properly set 6 GHz channel direct probe option

Ensure that the 6 GHz channel is configured with a valid direct BSSID,
avoiding any invalid or multicast BSSID addresses.

Signed-off-by: Ayala Beker <ayala.beker@intel.com>
Reviewed-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240513132416.91a631a0fe60.I2ea2616af9b8a2eaf959b156c69cf65a2f1204d4@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index a7ec172eeade..890d4d79b172 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -1730,7 +1730,10 @@ iwl_mvm_umac_scan_fill_6g_chan_list(struct iwl_mvm *mvm,
 				break;
 		}
 
-		if (k == idex_b && idex_b < SCAN_BSSID_MAX_SIZE) {
+		if (k == idex_b && idex_b < SCAN_BSSID_MAX_SIZE &&
+		    !WARN_ONCE(!is_valid_ether_addr(scan_6ghz_params[j].bssid),
+			       "scan: invalid BSSID at index %u, index_b=%u\n",
+			       j, idex_b)) {
 			memcpy(&pp->bssid_array[idex_b++],
 			       scan_6ghz_params[j].bssid, ETH_ALEN);
 		}
-- 
cgit v1.2.3


From 60d62757df30b74bf397a2847a6db7385c6ee281 Mon Sep 17 00:00:00 2001
From: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Date: Mon, 13 May 2024 13:27:12 +0300
Subject: wifi: iwlwifi: mvm: check n_ssids before accessing the ssids

In some versions of cfg80211, the ssids poinet might be a valid one even
though n_ssids is 0. Accessing the pointer in this case will cuase an
out-of-bound access. Fix this by checking n_ssids first.

Fixes: c1a7515393e4 ("iwlwifi: mvm: add adaptive dwell support")
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Reviewed-by: Ilan Peer <ilan.peer@intel.com>
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
Link: https://msgid.link/20240513132416.6e4d1762bf0d.I5a0e6cc8f02050a766db704d15594c61fe583d45@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 890d4d79b172..9739c38ef824 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -1313,7 +1313,7 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm,
 		if (IWL_MVM_ADWELL_MAX_BUDGET)
 			cmd->v7.adwell_max_budget =
 				cpu_to_le16(IWL_MVM_ADWELL_MAX_BUDGET);
-		else if (params->ssids && params->ssids[0].ssid_len)
+		else if (params->n_ssids && params->ssids[0].ssid_len)
 			cmd->v7.adwell_max_budget =
 				cpu_to_le16(IWL_SCAN_ADWELL_MAX_BUDGET_DIRECTED_SCAN);
 		else
@@ -1418,7 +1418,7 @@ iwl_mvm_scan_umac_dwell_v11(struct iwl_mvm *mvm,
 	if (IWL_MVM_ADWELL_MAX_BUDGET)
 		general_params->adwell_max_budget =
 			cpu_to_le16(IWL_MVM_ADWELL_MAX_BUDGET);
-	else if (params->ssids && params->ssids[0].ssid_len)
+	else if (params->n_ssids && params->ssids[0].ssid_len)
 		general_params->adwell_max_budget =
 			cpu_to_le16(IWL_SCAN_ADWELL_MAX_BUDGET_DIRECTED_SCAN);
 	else
-- 
cgit v1.2.3


From e6dd2936ce7ce94a1915b799f8af8193ec628e87 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Mon, 13 May 2024 13:27:13 +0300
Subject: wifi: iwlwifi: mvm: Fix scan abort handling with HW rfkill

When HW rfkill is toggled to disable the RF, the flow to stop scan is
called. When trying to send the command to abort the scan, since
HW rfkill is toggled, the command is not sent due to rfkill being
asserted, and -ERFKILL is returned from iwl_trans_send_cmd(), but this
is silently ignored in iwl_mvm_send_cmd() and thus the scan abort flow
continues to wait for scan complete notification and fails. Since it
fails, the UID to type mapping is not cleared, and thus a warning is
later fired when trying to stop the interface.

To fix this, modify the UMAC scan abort flow to force sending the
scan abort command even when in rfkill, so stop the FW from accessing
the radio etc.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240513132416.8cbe2f8c1a97.Iffe235c12a919dafec88eef399eb1f7bae2c5bdb@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 9739c38ef824..b5f664ae5a17 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -3322,10 +3322,11 @@ static int iwl_mvm_umac_scan_abort(struct iwl_mvm *mvm, int type)
 
 	ret = iwl_mvm_send_cmd_pdu(mvm,
 				   WIDE_ID(IWL_ALWAYS_LONG_GROUP, SCAN_ABORT_UMAC),
-				   0, sizeof(cmd), &cmd);
+				   CMD_SEND_IN_RFKILL, sizeof(cmd), &cmd);
 	if (!ret)
 		mvm->scan_uid_status[uid] = type << IWL_MVM_SCAN_STOPPING_SHIFT;
 
+	IWL_DEBUG_SCAN(mvm, "Scan abort: ret=%d\n", ret);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 4bb95f4535489ed830cf9b34b0a891e384d1aee4 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 13 May 2024 13:27:14 +0300
Subject: wifi: iwlwifi: mvm: don't read past the mfuart notifcation

In case the firmware sends a notification that claims it has more data
than it has, we will read past that was allocated for the notification.
Remove the print of the buffer, we won't see it by default. If needed,
we can see the content with tracing.

This was reported by KFENCE.

Fixes: bdccdb854f2f ("iwlwifi: mvm: support MFUART dump in case of MFUART assert")
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Miri Korenblit <miriam.rachel.korenblit@intel.com>
Link: https://msgid.link/20240513132416.ba82a01a559e.Ia91dd20f5e1ca1ad380b95e68aebf2794f553d9b@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index ca7fcae6995e..f4937a100cbe 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -94,20 +94,10 @@ void iwl_mvm_mfu_assert_dump_notif(struct iwl_mvm *mvm,
 {
 	struct iwl_rx_packet *pkt = rxb_addr(rxb);
 	struct iwl_mfu_assert_dump_notif *mfu_dump_notif = (void *)pkt->data;
-	__le32 *dump_data = mfu_dump_notif->data;
-	int n_words = le32_to_cpu(mfu_dump_notif->data_size) / sizeof(__le32);
-	int i;
 
 	if (mfu_dump_notif->index_num == 0)
 		IWL_INFO(mvm, "MFUART assert id 0x%x occurred\n",
 			 le32_to_cpu(mfu_dump_notif->assert_id));
-
-	for (i = 0; i < n_words; i++)
-		IWL_DEBUG_INFO(mvm,
-			       "MFUART assert dump, dword %u: 0x%08x\n",
-			       le16_to_cpu(mfu_dump_notif->index_num) *
-			       n_words + i,
-			       le32_to_cpu(dump_data[i]));
 }
 
 static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait,
-- 
cgit v1.2.3


From 0c2fd18f7ec552796179c14f13a0e06942f09d16 Mon Sep 17 00:00:00 2001
From: Lingbo Kong <quic_lingbok@quicinc.com>
Date: Thu, 16 May 2024 10:18:53 +0800
Subject: wifi: mac80211: fix Spatial Reuse element size check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, the way to check the size of Spatial Reuse IE data in the
ieee80211_parse_extension_element() is incorrect.

This is because the len variable in the ieee80211_parse_extension_element()
function is equal to the size of Spatial Reuse IE data minus one and the
value of returned by the ieee80211_he_spr_size() function is equal to
the length of Spatial Reuse IE data. So the result of the
len >= ieee80211_he_spr_size(data) statement always false.

To address this issue and make it consistent with the logic used elsewhere
with ieee80211_he_oper_size(), change the
"len >= ieee80211_he_spr_size(data)" to
“len >= ieee80211_he_spr_size(data) - 1”.

Fixes: 9d0480a7c05b ("wifi: mac80211: move element parsing to a new file")
Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>
Link: https://msgid.link/20240516021854.5682-2-quic_lingbok@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/parse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c
index 55e5497f8978..055a60e90979 100644
--- a/net/mac80211/parse.c
+++ b/net/mac80211/parse.c
@@ -111,7 +111,7 @@ ieee80211_parse_extension_element(u32 *crc,
 		if (params->mode < IEEE80211_CONN_MODE_HE)
 			break;
 		if (len >= sizeof(*elems->he_spr) &&
-		    len >= ieee80211_he_spr_size(data))
+		    len >= ieee80211_he_spr_size(data) - 1)
 			elems->he_spr = data;
 		break;
 	case WLAN_EID_EXT_HE_6GHZ_CAPA:
-- 
cgit v1.2.3


From a26d8dc5227f449a54518a8b40733a54c6600a8b Mon Sep 17 00:00:00 2001
From: Lingbo Kong <quic_lingbok@quicinc.com>
Date: Thu, 16 May 2024 10:18:54 +0800
Subject: wifi: mac80211: correctly parse Spatial Reuse Parameter Set element

Currently, the way of parsing Spatial Reuse Parameter Set element is
incorrect and some members of struct ieee80211_he_obss_pd are not assigned.

To address this issue, it must be parsed in the order of the elements of
Spatial Reuse Parameter Set defined in the IEEE Std 802.11ax specification.

The diagram of the Spatial Reuse Parameter Set element (IEEE Std 802.11ax
-2021-9.4.2.252).

-------------------------------------------------------------------------
|       |      |         |       |Non-SRG|  SRG  | SRG   | SRG  | SRG   |
|Element|Length| Element |  SR   |OBSS PD|OBSS PD|OBSS PD| BSS  |Partial|
|   ID  |      |   ID    |Control|  Max  |  Min  | Max   |Color | BSSID |
|       |      |Extension|       | Offset| Offset|Offset |Bitmap|Bitmap |
-------------------------------------------------------------------------

Fixes: 1ced169cc1c2 ("mac80211: allow setting spatial reuse parameters from bss_conf")
Signed-off-by: Lingbo Kong <quic_lingbok@quicinc.com>
Link: https://msgid.link/20240516021854.5682-3-quic_lingbok@quicinc.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/he.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 9f5ffdc9db28..ecbb042dd043 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -230,15 +230,21 @@ ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
 
 	if (!he_spr_ie_elem)
 		return;
+
+	he_obss_pd->sr_ctrl = he_spr_ie_elem->he_sr_control;
 	data = he_spr_ie_elem->optional;
 
 	if (he_spr_ie_elem->he_sr_control &
 	    IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT)
-		data++;
+		he_obss_pd->non_srg_max_offset = *data++;
+
 	if (he_spr_ie_elem->he_sr_control &
 	    IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) {
-		he_obss_pd->max_offset = *data++;
 		he_obss_pd->min_offset = *data++;
+		he_obss_pd->max_offset = *data++;
+		memcpy(he_obss_pd->bss_color_bitmap, data, 8);
+		data += 8;
+		memcpy(he_obss_pd->partial_bssid_bitmap, data, 8);
 		he_obss_pd->enable = true;
 	}
 }
-- 
cgit v1.2.3


From 92ecbb3ac6f3fe8ae9edf3226c76aa17b6800699 Mon Sep 17 00:00:00 2001
From: Dmitry Antipov <dmantipov@yandex.ru>
Date: Fri, 17 May 2024 18:33:32 +0300
Subject: wifi: mac80211: fix UBSAN noise in ieee80211_prep_hw_scan()

When testing the previous patch with CONFIG_UBSAN_BOUNDS, I've
noticed the following:

UBSAN: array-index-out-of-bounds in net/mac80211/scan.c:372:4
index 0 is out of range for type 'struct ieee80211_channel *[]'
CPU: 0 PID: 1435 Comm: wpa_supplicant Not tainted 6.9.0+ #1
Hardware name: LENOVO 20UN005QRT/20UN005QRT <...BIOS details...>
Call Trace:
 <TASK>
 dump_stack_lvl+0x2d/0x90
 __ubsan_handle_out_of_bounds+0xe7/0x140
 ? timerqueue_add+0x98/0xb0
 ieee80211_prep_hw_scan+0x2db/0x480 [mac80211]
 ? __kmalloc+0xe1/0x470
 __ieee80211_start_scan+0x541/0x760 [mac80211]
 rdev_scan+0x1f/0xe0 [cfg80211]
 nl80211_trigger_scan+0x9b6/0xae0 [cfg80211]
 ...<the rest is not too useful...>

Since '__ieee80211_start_scan()' leaves 'hw_scan_req->req.n_channels'
uninitialized, actual boundaries of 'hw_scan_req->req.channels' can't
be checked in 'ieee80211_prep_hw_scan()'. Although an initialization
of 'hw_scan_req->req.n_channels' introduces some confusion around
allocated vs. used VLA members, this shouldn't be a problem since
everything is correctly adjusted soon in 'ieee80211_prep_hw_scan()'.

Cleanup 'kmalloc()' math in '__ieee80211_start_scan()' by using the
convenient 'struct_size()' as well.

Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
Link: https://msgid.link/20240517153332.18271-2-dmantipov@yandex.ru
[improve (imho) indentation a bit]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/scan.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 3da1c5c45035..8ecc4b710b0e 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -744,15 +744,21 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 			local->hw_scan_ies_bufsize *= n_bands;
 		}
 
-		local->hw_scan_req = kmalloc(
-				sizeof(*local->hw_scan_req) +
-				req->n_channels * sizeof(req->channels[0]) +
-				local->hw_scan_ies_bufsize, GFP_KERNEL);
+		local->hw_scan_req = kmalloc(struct_size(local->hw_scan_req,
+							 req.channels,
+							 req->n_channels) +
+					     local->hw_scan_ies_bufsize,
+					     GFP_KERNEL);
 		if (!local->hw_scan_req)
 			return -ENOMEM;
 
 		local->hw_scan_req->req.ssids = req->ssids;
 		local->hw_scan_req->req.n_ssids = req->n_ssids;
+		/* None of the channels are actually set
+		 * up but let UBSAN know the boundaries.
+		 */
+		local->hw_scan_req->req.n_channels = req->n_channels;
+
 		ies = (u8 *)local->hw_scan_req +
 			sizeof(*local->hw_scan_req) +
 			req->n_channels * sizeof(req->channels[0]);
-- 
cgit v1.2.3


From 9dedabe95b49ec9b0d16ce8f0ed1f9a12dd4a040 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 29 May 2024 11:42:35 -0400
Subject: spi: Assign dummy scatterlist to unidirectional transfers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 8cc3bad9d9d6 ("spi: Remove unneded check for orig_nents")
introduced a regression: unmapped data could now be passed to the DMA
APIs, resulting in null pointer dereferences. Commit 9f788ba457b4 ("spi:
Don't mark message DMA mapped when no transfer in it is") and commit
da560097c056 ("spi: Check if transfer is mapped before calling DMA sync
APIs") addressed the problem, but only partially. Unidirectional
transactions will still result in null pointer dereference. To prevent
that from happening, assign a dummy scatterlist when no data is mapped,
so that the DMA API can be called and not result in a null pointer
dereference.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reported-by: Neil Armstrong <neil.armstrong@linaro.org>
Closes: https://lore.kernel.org/r/8ae675b5-fcf9-4c9b-b06a-4462f70e1322@linaro.org
Reported-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Closes: https://lore.kernel.org/all/d3679496-2e4e-4a7c-97ed-f193bd53af1d@notapiano
Closes: https://lore.kernel.org/all/4748499f-789c-45a8-b50a-2dd09f4bac8c@notapiano
Fixes: 8cc3bad9d9d6 ("spi: Remove unneded check for orig_nents")
Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
[nfraprado: wrote the commit message]
Signed-off-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Link: https://msgid.link/r/20240529-dma-oops-dummy-v1-1-bb43aacfb11b@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index f94420858c22..9bc9fd10d538 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1220,6 +1220,11 @@ void spi_unmap_buf(struct spi_controller *ctlr, struct device *dev,
 	spi_unmap_buf_attrs(ctlr, dev, sgt, dir, 0);
 }
 
+/* Dummy SG for unidirect transfers */
+static struct scatterlist dummy_sg = {
+	.page_link = SG_END,
+};
+
 static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
 {
 	struct device *tx_dev, *rx_dev;
@@ -1258,6 +1263,8 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
 						attrs);
 			if (ret != 0)
 				return ret;
+		} else {
+			xfer->tx_sg.sgl = &dummy_sg;
 		}
 
 		if (xfer->rx_buf != NULL) {
@@ -1271,6 +1278,8 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
 
 				return ret;
 			}
+		} else {
+			xfer->rx_sg.sgl = &dummy_sg;
 		}
 	}
 	/* No transfer has been mapped, bail out with success */
-- 
cgit v1.2.3


From 06fe9b1df1086b42718d632aa57e8f7cd1a66a21 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 29 May 2024 09:38:38 -0600
Subject: io_uring: don't attempt to mmap larger than what the user asks for
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If IORING_FEAT_SINGLE_MMAP is ignored, as can happen if an application
uses an ancient liburing or does setup manually, then 3 mmap's are
required to map the ring into userspace. The kernel will still have
collapsed the mappings, however userspace may ask for mapping them
individually. If so, then we should not use the full number of ring
pages, as it may exceed the partial mapping. Doing so will yield an
-EFAULT from vm_insert_pages(), as we pass in more pages than what the
application asked for.

Cap the number of pages to match what the application asked for, for
the particular mapping operation.

Reported-by: Lucas Mülling <lmulling@proton.me>
Link: https://github.com/axboe/liburing/issues/1157
Fixes: 3ab1db3c6039 ("io_uring: get rid of remap_pfn_range() for mapping rings/sqes")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/memmap.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/io_uring/memmap.c b/io_uring/memmap.c
index 4785d6af5fee..a0f32a255fd1 100644
--- a/io_uring/memmap.c
+++ b/io_uring/memmap.c
@@ -244,6 +244,7 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
 	struct io_ring_ctx *ctx = file->private_data;
 	size_t sz = vma->vm_end - vma->vm_start;
 	long offset = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned int npages;
 	void *ptr;
 
 	ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
@@ -253,8 +254,8 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
 	switch (offset & IORING_OFF_MMAP_MASK) {
 	case IORING_OFF_SQ_RING:
 	case IORING_OFF_CQ_RING:
-		return io_uring_mmap_pages(ctx, vma, ctx->ring_pages,
-						ctx->n_ring_pages);
+		npages = min(ctx->n_ring_pages, (sz + PAGE_SIZE - 1) >> PAGE_SHIFT);
+		return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, npages);
 	case IORING_OFF_SQES:
 		return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages,
 						ctx->n_sqe_pages);
-- 
cgit v1.2.3


From 3bd27a847a3a4827a948387cc8f0dbc9fa5931d5 Mon Sep 17 00:00:00 2001
From: Matthias Maennich <maennich@google.com>
Date: Tue, 28 May 2024 11:32:43 +0000
Subject: kheaders: explicitly define file modes for archived headers

Build environments might be running with different umask settings
resulting in indeterministic file modes for the files contained in
kheaders.tar.xz. The file itself is served with 444, i.e. world
readable. Archive the files explicitly with 744,a+X to improve
reproducibility across build environments.

--mode=0444 is not suitable as directories need to be executable. Also,
444 makes it hard to delete all the readonly files after extraction.

Cc: stable@vger.kernel.org
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 kernel/gen_kheaders.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index 6d443ea22bb7..8b6e0c2bc0df 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -84,7 +84,7 @@ find $cpio_dir -type f -print0 |
 
 # Create archive and try to normalize metadata for reproducibility.
 tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
-    --owner=0 --group=0 --sort=name --numeric-owner \
+    --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \
     -I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null
 
 echo $headers_md5 > kernel/kheaders.md5
-- 
cgit v1.2.3


From 6e58e0173507e506a5627741358bc770f220e356 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Tue, 28 May 2024 18:31:50 +0200
Subject: kheaders: use `command -v` to test for existence of `cpio`

Commit 13e1df09284d ("kheaders: explicitly validate existence of cpio
command") added an explicit check for `cpio` using `type`.

However, `type` in `dash` (which is used in some popular distributions
and base images as the shell script runner) prints the missing message
to standard output, and thus no error is printed:

    $ bash -c 'type missing >/dev/null'
    bash: line 1: type: missing: not found
    $ dash -c 'type missing >/dev/null'
    $

For instance, this issue may be seen by loongarch builders, given its
defconfig enables CONFIG_IKHEADERS since commit 9cc1df421f00 ("LoongArch:
Update Loongson-3 default config file").

Therefore, use `command -v` instead to have consistent behavior, and
take the chance to provide a more explicit error.

Fixes: 13e1df09284d ("kheaders: explicitly validate existence of cpio command")
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 kernel/gen_kheaders.sh | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index 8b6e0c2bc0df..383fd43ac612 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -14,7 +14,12 @@ include/
 arch/$SRCARCH/include/
 "
 
-type cpio > /dev/null
+if ! command -v cpio >/dev/null; then
+	echo >&2 "***"
+	echo >&2 "*** 'cpio' could not be found."
+	echo >&2 "***"
+	exit 1
+fi
 
 # Support incremental builds by skipping archive generation
 # if timestamps of files being archived are not changed.
-- 
cgit v1.2.3


From 1b1c9f0fd3fb70adf1f3b0aec58ab037d6e595d0 Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Tue, 28 May 2024 15:02:32 -0500
Subject: dt-bindings: kbuild: Fix dt_binding_check on unconfigured build

The 'dt_binding_check' target shouldn't depend on the kernel
configuration, but it has since commit 604a57ba9781 ("dt-bindings:
kbuild: Add separate target/dependency for processed-schema.json").
That is because CHECK_DT_BINDING make variable was dropped, but
scripts/dtc/Makefile was missed. The CHECK_DTBS variable can be used
instead.

Reported-by: Francesco Dolcini <francesco.dolcini@toradex.com>
Fixes: 604a57ba9781 ("dt-bindings: kbuild: Add separate target/dependency for processed-schema.json")
Signed-off-by: "Rob Herring (Arm)" <robh@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/dtc/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/dtc/Makefile b/scripts/dtc/Makefile
index a18657072541..b47f4daa4515 100644
--- a/scripts/dtc/Makefile
+++ b/scripts/dtc/Makefile
@@ -3,7 +3,7 @@
 
 # *** Also keep .gitignore in sync when changing ***
 hostprogs-always-$(CONFIG_DTC)		+= dtc fdtoverlay
-hostprogs-always-$(CHECK_DT_BINDING)	+= dtc
+hostprogs-always-$(CHECK_DTBS)		+= dtc
 
 dtc-objs	:= dtc.o flattree.o fstree.o data.o livetree.o treesource.o \
 		   srcpos.o checks.o util.o
-- 
cgit v1.2.3


From 4a4be1ad3a6efea16c56615f31117590fd881358 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 29 May 2024 09:39:34 -0700
Subject: Revert "vfs: Delete the associated dentry when deleting a file"

This reverts commit 681ce8623567ba7e7333908e9826b77145312dda.

We gave it a try, but it turns out the kernel test robot did in fact
find performance regressions for it, so we'll have to look at the more
involved alternative fixes for Yafang Shao's Elasticsearch load issue.

There were several alternatives discussed, they just weren't as simple
as this first attempt.

The report is of a -7.4% regression of filebench.sum_operations/s, which
appears significant enough to trigger my "this patch may get reverted if
somebody finds a performance regression on some other load" rule.

So it's still the case that we should end up deleting dentries more
aggressively - or just be better at pruning them later - but it needs a
bit more finesse than this simple thing.

Link: https://lore.kernel.org/all/202405291318.4dfbb352-oliver.sang@intel.com/
Cc: Yafang Shao <laoar.shao@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dcache.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 1ee6404b430b..407095188f83 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2360,17 +2360,19 @@ EXPORT_SYMBOL(d_hash_and_lookup);
  * - unhash this dentry and free it.
  *
  * Usually, we want to just turn this into
- * a negative dentry, but certain workloads can
- * generate a large number of negative dentries.
- * Therefore, it would be better to simply
- * unhash it.
+ * a negative dentry, but if anybody else is
+ * currently using the dentry or the inode
+ * we can't do that and we fall back on removing
+ * it from the hash queues and waiting for
+ * it to be deleted later when it has no users
  */
  
 /**
  * d_delete - delete a dentry
  * @dentry: The dentry to delete
  *
- * Remove the dentry from the hash queues so it can be deleted later.
+ * Turn the dentry into a negative dentry if possible, otherwise
+ * remove it from the hash queues so it can be deleted later
  */
  
 void d_delete(struct dentry * dentry)
@@ -2379,8 +2381,6 @@ void d_delete(struct dentry * dentry)
 
 	spin_lock(&inode->i_lock);
 	spin_lock(&dentry->d_lock);
-	__d_drop(dentry);
-
 	/*
 	 * Are we the only user?
 	 */
@@ -2388,6 +2388,7 @@ void d_delete(struct dentry * dentry)
 		dentry->d_flags &= ~DCACHE_CANT_MOUNT;
 		dentry_unlink_inode(dentry);
 	} else {
+		__d_drop(dentry);
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&inode->i_lock);
 	}
-- 
cgit v1.2.3


From ac0d71ee534e67c7e53439e8e9cb45ed40731660 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 29 May 2024 18:47:16 +0200
Subject: ALSA: ump: Don't accept an invalid UMP protocol number

When a UMP Stream Configuration message is received, the driver tries
to switch the protocol, but there was no sanity check of the protocol,
hence it can pass an invalid value.  Add the check and bail out if a
wrong value is passed.

Fixes: a79807683781 ("ALSA: ump: Add helper to change MIDI protocol")
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20240529164723.18309-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/ump.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/core/ump.c b/sound/core/ump.c
index fd6a68a54278..117c7ecc4856 100644
--- a/sound/core/ump.c
+++ b/sound/core/ump.c
@@ -685,10 +685,17 @@ static void seq_notify_protocol(struct snd_ump_endpoint *ump)
  */
 int snd_ump_switch_protocol(struct snd_ump_endpoint *ump, unsigned int protocol)
 {
+	unsigned int type;
+
 	protocol &= ump->info.protocol_caps;
 	if (protocol == ump->info.protocol)
 		return 0;
 
+	type = protocol & SNDRV_UMP_EP_INFO_PROTO_MIDI_MASK;
+	if (type != SNDRV_UMP_EP_INFO_PROTO_MIDI1 &&
+	    type != SNDRV_UMP_EP_INFO_PROTO_MIDI2)
+		return 0;
+
 	ump->info.protocol = protocol;
 	ump_dbg(ump, "New protocol = %x (caps = %x)\n",
 		protocol, ump->info.protocol_caps);
-- 
cgit v1.2.3


From bc42ca002d5d211f9c57334b9b4c25ddb0b4ec35 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 29 May 2024 18:47:17 +0200
Subject: ALSA: ump: Set default protocol when not given explicitly

When an inquiry of the current protocol via UMP Stream Configuration
message fails by some reason, we may leave the current protocol
undefined, which may lead to unexpected behavior.  Better to assume a
valid protocol found in the protocol capability bits instead.

For a device that doesn't support the UMP v1.2 feature, it won't reach
to this code path, and USB MIDI GTB descriptor would be used for
determining the protocol, instead.

Link: https://lore.kernel.org/r/20240529164723.18309-2-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/ump.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sound/core/ump.c b/sound/core/ump.c
index 117c7ecc4856..3f61220c23b4 100644
--- a/sound/core/ump.c
+++ b/sound/core/ump.c
@@ -967,6 +967,14 @@ int snd_ump_parse_endpoint(struct snd_ump_endpoint *ump)
 	if (err < 0)
 		ump_dbg(ump, "Unable to get UMP EP stream config\n");
 
+	/* If no protocol is set by some reason, assume the valid one */
+	if (!(ump->info.protocol & SNDRV_UMP_EP_INFO_PROTO_MIDI_MASK)) {
+		if (ump->info.protocol_caps & SNDRV_UMP_EP_INFO_PROTO_MIDI2)
+			ump->info.protocol |= SNDRV_UMP_EP_INFO_PROTO_MIDI2;
+		else if (ump->info.protocol_caps & SNDRV_UMP_EP_INFO_PROTO_MIDI1)
+			ump->info.protocol |= SNDRV_UMP_EP_INFO_PROTO_MIDI1;
+	}
+
 	/* Query and create blocks from Function Blocks */
 	for (blk = 0; blk < ump->info.num_blocks; blk++) {
 		err = create_block_from_fb_info(ump, blk);
-- 
cgit v1.2.3


From 95d7c452a26564ef0c427f2806761b857106d8c4 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 21 May 2024 12:52:42 +0200
Subject: spi: stm32: Don't warn about spurious interrupts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dev_warn to notify about a spurious interrupt was introduced with
the reasoning that these are unexpected. However spurious interrupts
tend to trigger continously and the error message on the serial console
prevents that the core's detection of spurious interrupts kicks in
(which disables the irq) and just floods the console.

Fixes: c64e7efe46b7 ("spi: stm32: make spurious and overrun interrupts visible")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://msgid.link/r/20240521105241.62400-2-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-stm32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index e4e7ddb7524a..4c4ff074e3f6 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -1057,7 +1057,7 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id)
 		mask |= STM32H7_SPI_SR_TXP | STM32H7_SPI_SR_RXP;
 
 	if (!(sr & mask)) {
-		dev_warn(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n",
+		dev_vdbg(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n",
 			 sr, ier);
 		spin_unlock_irqrestore(&spi->lock, flags);
 		return IRQ_NONE;
-- 
cgit v1.2.3


From 84081a885394fc94055c24c727c99c321df6abac Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Fri, 3 May 2024 10:44:02 -0500
Subject: dt-bindings: arm: sunxi: Fix incorrect '-' usage

Commit 6bc6bf8a940a ("dt-bindings: arm: sunxi: document Anbernic RG35XX
handheld gaming device variants") mistakenly added '-' on each line
which created empty (i.e. description only) schemas matching anything.
This causes validation to fail on all the root node compatibles as
there are multiple oneOf clauses passing.

Fixes: 6bc6bf8a940a ("dt-bindings: arm: sunxi: document Anbernic RG35XX handheld gaming device variants")
Reviewed-by: Ryan Walklin <ryan@testtoast.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20240503154402.967632-1-robh@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/sunxi.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml
index c6d0d8d81ed4..c2a158b75e49 100644
--- a/Documentation/devicetree/bindings/arm/sunxi.yaml
+++ b/Documentation/devicetree/bindings/arm/sunxi.yaml
@@ -57,17 +57,17 @@ properties:
           - const: allwinner,sun8i-v3s
 
       - description: Anbernic RG35XX (2024)
-      - items:
+        items:
           - const: anbernic,rg35xx-2024
           - const: allwinner,sun50i-h700
 
       - description: Anbernic RG35XX Plus
-      - items:
+        items:
           - const: anbernic,rg35xx-plus
           - const: allwinner,sun50i-h700
 
       - description: Anbernic RG35XX H
-      - items:
+        items:
           - const: anbernic,rg35xx-h
           - const: allwinner,sun50i-h700
 
-- 
cgit v1.2.3


From 321e4fa68ce15660ec578bdec5cc9607635087cf Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Thu, 23 May 2024 10:42:07 -0500
Subject: dt-bindings: arm: stm32: st,mlahb: Drop spurious "reg" property from
 example

"reg" is not documented nor used for st,mlahb, so drop it from the
example to fix the warning:

Documentation/devicetree/bindings/arm/stm32/st,mlahb.example.dtb: ahb@38000000: Unevaluated properties are not allowed ('reg' was unexpected)
        from schema $id: http://devicetree.org/schemas/arm/stm32/st,mlahb.yaml#

Since "reg" is dropped, the unit-address must be as well.

Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240523154208.2457864-1-robh@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
index d2dce238ff5d..3e996346b264 100644
--- a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
+++ b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
@@ -54,11 +54,10 @@ unevaluatedProperties: false
 
 examples:
   - |
-    mlahb: ahb@38000000 {
+    ahb {
       compatible = "st,mlahb", "simple-bus";
       #address-cells = <1>;
       #size-cells = <1>;
-      reg = <0x10000000 0x40000>;
       ranges;
       dma-ranges = <0x00000000 0x38000000 0x10000>,
                    <0x10000000 0x10000000 0x60000>,
-- 
cgit v1.2.3


From 2032e61e24fe9fe55d6c7a34fb5506c911b3e280 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 16 May 2024 16:27:33 +0100
Subject: kselftest/alsa: Ensure _GNU_SOURCE is defined

The pcmtest driver tests use the kselftest harness which requires that
_GNU_SOURCE is defined but nothing causes it to be defined.  Since the
KHDR_INCLUDES Makefile variable has had the required define added let's
use that, this should provide some futureproofing.

Fixes: daef47b89efd ("selftests: Compile kselftest headers with -D_GNU_SOURCE")
Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/alsa/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile
index 5af9ba8a4645..c1ce39874e2b 100644
--- a/tools/testing/selftests/alsa/Makefile
+++ b/tools/testing/selftests/alsa/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 #
 
-CFLAGS += $(shell pkg-config --cflags alsa)
+CFLAGS += $(shell pkg-config --cflags alsa) $(KHDR_INCLUDES)
 LDLIBS += $(shell pkg-config --libs alsa)
 ifeq ($(LDLIBS),)
 LDLIBS += -lasound
-- 
cgit v1.2.3


From f6c3c83db1d939ebdb8c8922748ae647d8126d91 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Tue, 21 May 2024 09:00:22 +0900
Subject: selftests/ftrace: Fix to check required event file

The dynevent/test_duplicates.tc test case uses `syscalls/sys_enter_openat`
event for defining eprobe on it. Since this `syscalls` events depend on
CONFIG_FTRACE_SYSCALLS=y, if it is not set, the test will fail.

Add the event file to `required` line so that the test will return
`unsupported` result.

Fixes: 297e1dcdca3d ("selftests/ftrace: Add selftest for testing duplicate eprobes and kprobes")
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
index d3a79da215c8..5f72abe6fa79 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
@@ -1,7 +1,7 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 # description: Generic dynamic event - check if duplicate events are caught
-# requires: dynamic_events "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README
+# requires: dynamic_events "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README events/syscalls/sys_enter_openat
 
 echo 0 > events/enable
 
-- 
cgit v1.2.3


From 7ea794604bf6a3be8bb4b0f1483eb1d3972eac93 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Tue, 21 May 2024 09:00:32 +0900
Subject: selftests/ftrace: Update required config

Update required config options for running all tests.
This also sorts the config entries alphabetically.

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/ftrace/config | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config
index e59d985eeff0..048a312abf40 100644
--- a/tools/testing/selftests/ftrace/config
+++ b/tools/testing/selftests/ftrace/config
@@ -1,16 +1,28 @@
-CONFIG_KPROBES=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_EPROBE_EVENTS=y
+CONFIG_FPROBE=y
+CONFIG_FPROBE_EVENTS=y
 CONFIG_FTRACE=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
 CONFIG_FUNCTION_PROFILER=y
-CONFIG_TRACER_SNAPSHOT=y
-CONFIG_STACK_TRACER=y
 CONFIG_HIST_TRIGGERS=y
-CONFIG_SCHED_TRACER=y
-CONFIG_PREEMPT_TRACER=y
 CONFIG_IRQSOFF_TRACER=y
-CONFIG_PREEMPTIRQ_DELAY_TEST=m
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KPROBES=y
+CONFIG_KPROBE_EVENTS=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
+CONFIG_PREEMPTIRQ_DELAY_TEST=m
+CONFIG_PREEMPT_TRACER=y
+CONFIG_PROBE_EVENTS_BTF_ARGS=y
 CONFIG_SAMPLES=y
 CONFIG_SAMPLE_FTRACE_DIRECT=m
 CONFIG_SAMPLE_TRACE_PRINTK=m
-CONFIG_KALLSYMS_ALL=y
+CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_UPROBES=y
+CONFIG_UPROBE_EVENTS=y
-- 
cgit v1.2.3


From 23a4b108accc29a6125ed14de4a044689ffeda78 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Mon, 20 May 2024 20:57:37 -0400
Subject: tracing/selftests: Fix kprobe event name test for .isra. functions

The kprobe_eventname.tc test checks if a function with .isra. can have a
kprobe attached to it. It loops through the kallsyms file for all the
functions that have the .isra. name, and checks if it exists in the
available_filter_functions file, and if it does, it uses it to attach a
kprobe to it.

The issue is that kprobes can not attach to functions that are listed more
than once in available_filter_functions. With the latest kernel, the
function that is found is: rapl_event_update.isra.0

  # grep rapl_event_update.isra.0 /sys/kernel/tracing/available_filter_functions
  rapl_event_update.isra.0
  rapl_event_update.isra.0

It is listed twice. This causes the attached kprobe to it to fail which in
turn fails the test. Instead of just picking the function function that is
found in available_filter_functions, pick the first one that is listed
only once in available_filter_functions.

Cc: stable@vger.kernel.org
Fixes: 604e3548236d ("selftests/ftrace: Select an existing function in kprobe_eventname test")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
index 1f6981ef7afa..ba19b81cef39 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
@@ -30,7 +30,8 @@ find_dot_func() {
 	fi
 
 	grep " [tT] .*\.isra\..*" /proc/kallsyms | cut -f 3 -d " " | while read f; do
-		if grep -s $f available_filter_functions; then
+		cnt=`grep -s $f available_filter_functions | wc -l`;
+		if [ $cnt -eq 1 ]; then
 			echo $f
 			break
 		fi
-- 
cgit v1.2.3


From bc4d5f5d2debf8bb65fba188313481549ead8576 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 21 May 2024 13:01:11 +1000
Subject: selftests: cachestat: Fix build warnings on ppc64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix warnings like:
  test_cachestat.c: In function ‘print_cachestat’:
  test_cachestat.c:30:38: warning: format ‘%llu’ expects argument of
  type ‘long long unsigned int’, but argument 2 has type ‘__u64’ {aka
  ‘long unsigned int’} [-Wformat=]

By switching to unsigned long long for u64 for ppc64 builds.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/cachestat/test_cachestat.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
index b171fd53b004..632ab44737ec 100644
--- a/tools/testing/selftests/cachestat/test_cachestat.c
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
 
 #include <stdio.h>
 #include <stdbool.h>
-- 
cgit v1.2.3


From 84b6df4c49a1cc2854a16937acd5fd3e6315d083 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 21 May 2024 13:03:25 +1000
Subject: selftests/openat2: Fix build warnings on ppc64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix warnings like:

  openat2_test.c: In function ‘test_openat2_flags’:
  openat2_test.c:303:73: warning: format ‘%llX’ expects argument of type
  ‘long long unsigned int’, but argument 5 has type ‘__u64’ {aka ‘long
  unsigned int’} [-Wformat=]

By switching to unsigned long long for u64 for ppc64 builds.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/openat2/openat2_test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index 9024754530b2..5790ab446527 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -5,6 +5,7 @@
  */
 
 #define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
 #include <fcntl.h>
 #include <sched.h>
 #include <sys/stat.h>
-- 
cgit v1.2.3


From e8b8c5264d4ebd248f60a5cef077fe615806e7a0 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 21 May 2024 12:26:16 +1000
Subject: selftests/overlayfs: Fix build error on ppc64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix build error on ppc64:
  dev_in_maps.c: In function ‘get_file_dev_and_inode’:
  dev_in_maps.c:60:59: error: format ‘%llu’ expects argument of type
  ‘long long unsigned int *’, but argument 7 has type ‘__u64 *’ {aka ‘long
  unsigned int *’} [-Werror=format=]

By switching to unsigned long long for u64 for ppc64 builds.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
index 759f86e7d263..2862aae58b79 100644
--- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
+++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
 
 #include <inttypes.h>
 #include <unistd.h>
-- 
cgit v1.2.3


From 2607133196c35f31892ee199ce7ffa717bea4ad1 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland@sifive.com>
Date: Mon, 27 May 2024 17:14:12 -0700
Subject: clk: sifive: Do not register clkdevs for PRCI clocks

These clkdevs were unnecessary, because systems using this driver always
look up clocks using the devicetree. And as Russell King points out[1],
since the provided device name was truncated, lookups via clkdev would
never match.

Recently, commit 8d532528ff6a ("clkdev: report over-sized strings when
creating clkdev entries") caused clkdev registration to fail due to the
truncation, and this now prevents the driver from probing. Fix the
driver by removing the clkdev registration.

Link: https://lore.kernel.org/linux-clk/ZkfYqj+OcAxd9O2t@shell.armlinux.org.uk/ [1]
Fixes: 30b8e27e3b58 ("clk: sifive: add a driver for the SiFive FU540 PRCI IP block")
Fixes: 8d532528ff6a ("clkdev: report over-sized strings when creating clkdev entries")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Closes: https://lore.kernel.org/linux-clk/7eda7621-0dde-4153-89e4-172e4c095d01@roeck-us.net/
Suggested-by: Russell King <linux@armlinux.org.uk>
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
Link: https://lore.kernel.org/r/20240528001432.1200403-1-samuel.holland@sifive.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/sifive/sifive-prci.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/clk/sifive/sifive-prci.c b/drivers/clk/sifive/sifive-prci.c
index 25b8e1a80ddc..b32a59fe55e7 100644
--- a/drivers/clk/sifive/sifive-prci.c
+++ b/drivers/clk/sifive/sifive-prci.c
@@ -4,7 +4,6 @@
  * Copyright (C) 2020 Zong Li
  */
 
-#include <linux/clkdev.h>
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/module.h>
@@ -537,13 +536,6 @@ static int __prci_register_clocks(struct device *dev, struct __prci_data *pd,
 			return r;
 		}
 
-		r = clk_hw_register_clkdev(&pic->hw, pic->name, dev_name(dev));
-		if (r) {
-			dev_warn(dev, "Failed to register clkdev for %s: %d\n",
-				 init.name, r);
-			return r;
-		}
-
 		pd->hw_clks.hws[i] = &pic->hw;
 	}
 
-- 
cgit v1.2.3


From 7b038b564b3e2a752d2211e7b0c3c29fd2f6e197 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Wed, 29 May 2024 16:28:41 -0400
Subject: bcachefs: Fix failure to return error on misaligned dio write

This was reported as an error when running coreutils shred.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/fs-io-direct.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c
index 09d21aef879a..049b61bc9a5b 100644
--- a/fs/bcachefs/fs-io-direct.c
+++ b/fs/bcachefs/fs-io-direct.c
@@ -609,8 +609,10 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
 	if (unlikely(ret))
 		goto err_put_write_ref;
 
-	if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
+	if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) {
+		ret = -EINVAL;
 		goto err_put_write_ref;
+	}
 
 	inode_dio_begin(&inode->v);
 	bch2_pagecache_block_get(inode);
-- 
cgit v1.2.3


From ba46b3bda296c4f82b061ac40b90f49d2a00a380 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 15 May 2024 11:25:49 -0400
Subject: drm/amdgpu: Adjust logic in amdgpu_device_partner_bandwidth()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use current speed/width on devices which don't support
dynamic PCIe switching.

Fixes: 466a7d115326 ("drm/amd: Use the first non-dGPU PCI device for BW limits")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3289
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 861ccff78af9..932dc93b2e63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5944,13 +5944,18 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
 	*speed = PCI_SPEED_UNKNOWN;
 	*width = PCIE_LNK_WIDTH_UNKNOWN;
 
-	while ((parent = pci_upstream_bridge(parent))) {
-		/* skip upstream/downstream switches internal to dGPU*/
-		if (parent->vendor == PCI_VENDOR_ID_ATI)
-			continue;
-		*speed = pcie_get_speed_cap(parent);
-		*width = pcie_get_width_cap(parent);
-		break;
+	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
+		while ((parent = pci_upstream_bridge(parent))) {
+			/* skip upstream/downstream switches internal to dGPU*/
+			if (parent->vendor == PCI_VENDOR_ID_ATI)
+				continue;
+			*speed = pcie_get_speed_cap(parent);
+			*width = pcie_get_width_cap(parent);
+			break;
+		}
+	} else {
+		/* use the current speeds rather than max if switching is not supported */
+		pcie_bandwidth_available(adev->pdev, NULL, speed, width);
 	}
 }
 
-- 
cgit v1.2.3


From 05d9e24ddb15160164ba6e917a88c00907dc2434 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 16 May 2024 09:51:26 -0400
Subject: drm/amdgpu: silence UBSAN warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert a variable sized array from [1] to [].

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/include/atomfirmware.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index 1acb2d2c5597..571691837200 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -3583,7 +3583,7 @@ struct atom_gpio_voltage_object_v4
    uint8_t  phase_delay_us;                      // phase delay in unit of micro second
    uint8_t  reserved;   
    uint32_t gpio_mask_val;                         // GPIO Mask value
-   struct atom_voltage_gpio_map_lut voltage_gpio_lut[1];
+   struct atom_voltage_gpio_map_lut voltage_gpio_lut[] __counted_by(gpio_entry_num);
 };
 
 struct  atom_svid2_voltage_object_v4
-- 
cgit v1.2.3


From a0cf36546cc24ae1c95d72253c7795d4d2fc77aa Mon Sep 17 00:00:00 2001
From: Jesse Zhang <jesse.zhang@amd.com>
Date: Thu, 23 May 2024 17:14:45 +0800
Subject: drm/amdgpu: fix dereference null return value for the function
 amdgpu_vm_pt_parent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pointer parent may be NULLed by the function amdgpu_vm_pt_parent.
To make the code more robust, check the pointer parent.

Signed-off-by: Jesse Zhang <Jesse.Zhang@amd.com>
Suggested-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 7fdd306a48a0..f07647a9a9d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -706,11 +706,15 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
 			 struct amdgpu_vm_bo_base *entry)
 {
 	struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry);
-	struct amdgpu_bo *bo = parent->bo, *pbo;
+	struct amdgpu_bo *bo, *pbo;
 	struct amdgpu_vm *vm = params->vm;
 	uint64_t pde, pt, flags;
 	unsigned int level;
 
+	if (WARN_ON(!parent))
+		return -EINVAL;
+
+	bo = parent->bo;
 	for (level = 0, pbo = bo->parent; pbo; ++level)
 		pbo = pbo->parent;
 
-- 
cgit v1.2.3


From dd2b75fd9a79bf418e088656822af06fc253dbe3 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 20 May 2024 14:41:31 -0400
Subject: Revert "drm/amdkfd: fix gfx_target_version for certain 11.0.3
 devices"

This reverts commit 28ebbb4981cb1fad12e0b1227dbecc88810b1ee8.

Revert this commit as apparently the LLVM code to take advantage of
this never landed.

Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Feifei Xu <feifei.xu@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9596bca57212..afc57df421cd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -408,15 +408,8 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
 			f2g = &gfx_v11_kfd2kgd;
 			break;
 		case IP_VERSION(11, 0, 3):
-			if ((adev->pdev->device == 0x7460 &&
-			     adev->pdev->revision == 0x00) ||
-			    (adev->pdev->device == 0x7461 &&
-			     adev->pdev->revision == 0x00))
-				/* Note: Compiler version is 11.0.5 while HW version is 11.0.3 */
-				gfx_target_version = 110005;
-			else
-				/* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */
-				gfx_target_version = 110001;
+			/* Note: Compiler version is 11.0.1 while HW version is 11.0.3 */
+			gfx_target_version = 110001;
 			f2g = &gfx_v11_kfd2kgd;
 			break;
 		case IP_VERSION(11, 5, 0):
-- 
cgit v1.2.3


From 1f327dfc846ae82e16e52ed9c559d566826486d2 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 22 May 2024 15:26:50 -0400
Subject: drm/amdkfd: simplify APU VRAM handling

With commit 89773b85599a
("drm/amdkfd: Let VRAM allocations go to GTT domain on small APUs")
big and small APU "VRAM" handling in KFD was unified.  Since AMD_IS_APU
is set for both big and small APUs, we can simplify the checks in
the code.

v2: clean up a few more places (Lang)

Acked-by: Felix Kuehling <felix.kuehling@amd.com>
Reviewed-by: Lang Yu <Lang.Yu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16 ++++++++--------
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c         |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c             |  6 ++----
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h             |  1 -
 4 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 8975cf41a91a..48ad0c04aa72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -196,7 +196,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
 			return -EINVAL;
 
 		vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
-		if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) {
+		if (adev->flags & AMD_IS_APU) {
 			system_mem_needed = size;
 			ttm_mem_needed = size;
 		}
@@ -233,7 +233,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
 	if (adev && xcp_id >= 0) {
 		adev->kfd.vram_used[xcp_id] += vram_needed;
 		adev->kfd.vram_used_aligned[xcp_id] +=
-				(adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) ?
+				(adev->flags & AMD_IS_APU) ?
 				vram_needed :
 				ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
 	}
@@ -261,7 +261,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
 
 		if (adev) {
 			adev->kfd.vram_used[xcp_id] -= size;
-			if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) {
+			if (adev->flags & AMD_IS_APU) {
 				adev->kfd.vram_used_aligned[xcp_id] -= size;
 				kfd_mem_limit.system_mem_used -= size;
 				kfd_mem_limit.ttm_mem_used -= size;
@@ -890,7 +890,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 	 * if peer device has large BAR. In contrast, access over xGMI is
 	 * allowed for both small and large BAR configurations of peer device
 	 */
-	if ((adev != bo_adev && !(adev->gmc.is_app_apu || adev->flags & AMD_IS_APU)) &&
+	if ((adev != bo_adev && !(adev->flags & AMD_IS_APU)) &&
 	    ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
 	     (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
 	     (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
@@ -1658,7 +1658,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
 		- atomic64_read(&adev->vram_pin_size)
 		- reserved_for_pt;
 
-	if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) {
+	if (adev->flags & AMD_IS_APU) {
 		system_mem_available = no_system_mem_limit ?
 					kfd_mem_limit.max_system_mem_limit :
 					kfd_mem_limit.max_system_mem_limit -
@@ -1706,7 +1706,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
 		domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
 
-		if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) {
+		if (adev->flags & AMD_IS_APU) {
 			domain = AMDGPU_GEM_DOMAIN_GTT;
 			alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
 			alloc_flags = 0;
@@ -1953,7 +1953,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	if (size) {
 		if (!is_imported &&
 		   (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
-		   ((adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) &&
+		   ((adev->flags & AMD_IS_APU) &&
 		    mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
 			*size = bo_size;
 		else
@@ -2376,7 +2376,7 @@ static int import_obj_create(struct amdgpu_device *adev,
 	(*mem)->bo = bo;
 	(*mem)->va = va;
 	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) &&
-			 !(adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) ?
+			 !(adev->flags & AMD_IS_APU) ?
 			 AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
 
 	(*mem)->mapped_to_gpu_memory = 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 4816fcb9803a..8ee3d07ffbdf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -1023,7 +1023,7 @@ int kgd2kfd_init_zone_device(struct amdgpu_device *adev)
 	if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 1))
 		return -EINVAL;
 
-	if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU)
+	if (adev->flags & AMD_IS_APU)
 		return 0;
 
 	pgmap = &kfddev->pgmap;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 069b81eeea03..31e500859ab0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2619,8 +2619,7 @@ svm_range_best_restore_location(struct svm_range *prange,
 		return -1;
 	}
 
-	if (node->adev->gmc.is_app_apu ||
-	    node->adev->flags & AMD_IS_APU)
+	if (node->adev->flags & AMD_IS_APU)
 		return 0;
 
 	if (prange->preferred_loc == gpuid ||
@@ -3338,8 +3337,7 @@ svm_range_best_prefetch_location(struct svm_range *prange)
 		goto out;
 	}
 
-	if (bo_node->adev->gmc.is_app_apu ||
-	    bo_node->adev->flags & AMD_IS_APU) {
+	if (bo_node->adev->flags & AMD_IS_APU) {
 		best_loc = 0;
 		goto out;
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 9c37bd0567ef..70c1776611c4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -201,7 +201,6 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s
  * is initialized to not 0 when page migration register device memory.
  */
 #define KFD_IS_SVM_API_SUPPORTED(adev) ((adev)->kfd.pgmap.type != 0 ||\
-					(adev)->gmc.is_app_apu ||\
 					((adev)->flags & AMD_IS_APU))
 
 void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);
-- 
cgit v1.2.3


From a9bc5a19e4958fe664254d1ad2dc2a9f5868c210 Mon Sep 17 00:00:00 2001
From: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Date: Wed, 22 May 2024 15:04:29 -0400
Subject: drm/amdgpu: Make CPX mode auto default in NPS4

On GFXIP9.4.3, make CPX mode as the default compute mode if the node is
setup in NPS4 memory partition mode. This change is only applicable for
dGPU, for APU, continue to use TPX mode.

Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index 414ea3f560a7..d4e2aed2efa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -422,7 +422,7 @@ __aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
 
 	if (adev->gmc.num_mem_partitions == num_xcc / 2)
 		return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE :
-						    AMDGPU_QPX_PARTITION_MODE;
+						    AMDGPU_CPX_PARTITION_MODE;
 
 	if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU))
 		return AMDGPU_DPX_PARTITION_MODE;
-- 
cgit v1.2.3


From 67c7d4fa267bcfe8d68fb36d938e3c6e0912b57d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 9 May 2024 13:37:27 +0200
Subject: drm/amd/pm: remove deprecated I2C_CLASS_SPD support from newly added
 SMU_14_0_2

Support for I2C_CLASS_SPD  is currently being removed from the kernel.
Only remaining step is to remove the definition of I2C_CLASS_SPD.
Setting I2C_CLASS_SPD  in a driver is a no-op meanwhile, so remove it
here.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
index 706265220292..90703f4542ab 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
@@ -1562,7 +1562,6 @@ static int smu_v14_0_2_i2c_control_init(struct smu_context *smu)
 		smu_i2c->port = i;
 		mutex_init(&smu_i2c->mutex);
 		control->owner = THIS_MODULE;
-		control->class = I2C_CLASS_SPD;
 		control->dev.parent = &adev->pdev->dev;
 		control->algo = &smu_v14_0_2_i2c_algo;
 		snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i);
-- 
cgit v1.2.3


From fa0bc8f297b29126b5ae983406e9bc76d48a9a8e Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Wed, 22 May 2024 23:08:09 +0200
Subject: hwmon: (dell-smm) Add Dell G15 5511 to fan control whitelist
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A user reported that he needs to disable BIOS fan control on his
Dell G15 5511 in order to be able to control the fans.

Closes: https://github.com/Wer-Wolf/i8kutils/issues/5
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Acked-by: Pali Rohár <pali@kernel.org>
Link: https://lore.kernel.org/r/20240522210809.294488-1-W_Armin@gmx.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/dell-smm-hwmon.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index 48a81c64f00d..942526bd4775 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -1545,6 +1545,14 @@ static const struct dmi_system_id i8k_whitelist_fan_control[] __initconst = {
 		},
 		.driver_data = (void *)&i8k_fan_control_data[I8K_FAN_30A3_31A3],
 	},
+	{
+		.ident = "Dell G15 5511",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Dell G15 5511"),
+		},
+		.driver_data = (void *)&i8k_fan_control_data[I8K_FAN_30A3_31A3],
+	},
 	{ }
 };
 
-- 
cgit v1.2.3


From a94ff8e50c20bde6d50864849a98b106e45d30c6 Mon Sep 17 00:00:00 2001
From: Javier Carrasco <javier.carrasco.cruz@gmail.com>
Date: Thu, 23 May 2024 17:47:14 +0200
Subject: hwmon: (ltc2992) Fix memory leak in ltc2992_parse_dt()

A new error path was added to the fwnode_for_each_available_node() loop
in ltc2992_parse_dt(), which leads to an early return that requires a
call to fwnode_handle_put() to avoid a memory leak in that case.

Add the missing fwnode_handle_put() in the error path from a zero value
shunt resistor.

Cc: stable@vger.kernel.org
Fixes: 10b029020487 ("hwmon: (ltc2992) Avoid division by zero")
Signed-off-by: Javier Carrasco <javier.carrasco.cruz@gmail.com>
Link: https://lore.kernel.org/r/20240523-fwnode_for_each_available_child_node_scoped-v2-1-701f3a03f2fb@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ltc2992.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c
index 229aed15d5ca..d4a93223cd3b 100644
--- a/drivers/hwmon/ltc2992.c
+++ b/drivers/hwmon/ltc2992.c
@@ -876,9 +876,11 @@ static int ltc2992_parse_dt(struct ltc2992_state *st)
 
 		ret = fwnode_property_read_u32(child, "shunt-resistor-micro-ohms", &val);
 		if (!ret) {
-			if (!val)
+			if (!val) {
+				fwnode_handle_put(child);
 				return dev_err_probe(&st->client->dev, -EINVAL,
 						     "shunt resistor value cannot be zero\n");
+			}
 			st->r_sense_uohm[addr] = val;
 		}
 	}
-- 
cgit v1.2.3


From 92f1655aa2b2294d0b49925f3b875a634bd3b59e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 28 May 2024 11:43:53 +0000
Subject: net: fix __dst_negative_advice() race

__dst_negative_advice() does not enforce proper RCU rules when
sk->dst_cache must be cleared, leading to possible UAF.

RCU rules are that we must first clear sk->sk_dst_cache,
then call dst_release(old_dst).

Note that sk_dst_reset(sk) is implementing this protocol correctly,
while __dst_negative_advice() uses the wrong order.

Given that ip6_negative_advice() has special logic
against RTF_CACHE, this means each of the three ->negative_advice()
existing methods must perform the sk_dst_reset() themselves.

Note the check against NULL dst is centralized in
__dst_negative_advice(), there is no need to duplicate
it in various callbacks.

Many thanks to Clement Lecigne for tracking this issue.

This old bug became visible after the blamed commit, using UDP sockets.

Fixes: a87cb3e48ee8 ("net: Facility to report route quality of connected sockets")
Reported-by: Clement Lecigne <clecigne@google.com>
Diagnosed-by: Clement Lecigne <clecigne@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <tom@herbertland.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20240528114353.1794151-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dst_ops.h  |  2 +-
 include/net/sock.h     | 13 +++----------
 net/ipv4/route.c       | 22 ++++++++--------------
 net/ipv6/route.c       | 29 +++++++++++++++--------------
 net/xfrm/xfrm_policy.c | 11 +++--------
 5 files changed, 30 insertions(+), 47 deletions(-)

diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 6d1c8541183d..3a9001a042a5 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -24,7 +24,7 @@ struct dst_ops {
 	void			(*destroy)(struct dst_entry *);
 	void			(*ifdown)(struct dst_entry *,
 					  struct net_device *dev);
-	struct dst_entry *	(*negative_advice)(struct dst_entry *);
+	void			(*negative_advice)(struct sock *sk, struct dst_entry *);
 	void			(*link_failure)(struct sk_buff *);
 	void			(*update_pmtu)(struct dst_entry *dst, struct sock *sk,
 					       struct sk_buff *skb, u32 mtu,
diff --git a/include/net/sock.h b/include/net/sock.h
index 5f4d0629348f..953c8dc4e259 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2063,17 +2063,10 @@ sk_dst_get(const struct sock *sk)
 
 static inline void __dst_negative_advice(struct sock *sk)
 {
-	struct dst_entry *ndst, *dst = __sk_dst_get(sk);
+	struct dst_entry *dst = __sk_dst_get(sk);
 
-	if (dst && dst->ops->negative_advice) {
-		ndst = dst->ops->negative_advice(dst);
-
-		if (ndst != dst) {
-			rcu_assign_pointer(sk->sk_dst_cache, ndst);
-			sk_tx_queue_clear(sk);
-			WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
-		}
-	}
+	if (dst && dst->ops->negative_advice)
+		dst->ops->negative_advice(sk, dst);
 }
 
 static inline void dst_negative_advice(struct sock *sk)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5fd54103174f..b3073d1c8f8f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,7 +129,8 @@ struct dst_entry	*ipv4_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int	 ipv4_default_advmss(const struct dst_entry *dst);
 INDIRECT_CALLABLE_SCOPE
 unsigned int		ipv4_mtu(const struct dst_entry *dst);
-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
+static void		ipv4_negative_advice(struct sock *sk,
+					     struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
 static void		 ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 					   struct sk_buff *skb, u32 mtu,
@@ -825,22 +826,15 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
 	__ip_do_redirect(rt, skb, &fl4, true);
 }
 
-static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
+static void ipv4_negative_advice(struct sock *sk,
+				 struct dst_entry *dst)
 {
 	struct rtable *rt = dst_rtable(dst);
-	struct dst_entry *ret = dst;
 
-	if (rt) {
-		if (dst->obsolete > 0) {
-			ip_rt_put(rt);
-			ret = NULL;
-		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
-			   rt->dst.expires) {
-			ip_rt_put(rt);
-			ret = NULL;
-		}
-	}
-	return ret;
+	if ((dst->obsolete > 0) ||
+	    (rt->rt_flags & RTCF_REDIRECTED) ||
+	    rt->dst.expires)
+		sk_dst_reset(sk);
 }
 
 /*
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index bbc2a0dd9314..a504b88ec06b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -87,7 +87,8 @@ struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
 INDIRECT_CALLABLE_SCOPE
 unsigned int		ip6_mtu(const struct dst_entry *dst);
-static struct dst_entry *ip6_negative_advice(struct dst_entry *);
+static void		ip6_negative_advice(struct sock *sk,
+					    struct dst_entry *dst);
 static void		ip6_dst_destroy(struct dst_entry *);
 static void		ip6_dst_ifdown(struct dst_entry *,
 				       struct net_device *dev);
@@ -2770,24 +2771,24 @@ INDIRECT_CALLABLE_SCOPE struct dst_entry *ip6_dst_check(struct dst_entry *dst,
 }
 EXPORT_INDIRECT_CALLABLE(ip6_dst_check);
 
-static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
+static void ip6_negative_advice(struct sock *sk,
+				struct dst_entry *dst)
 {
 	struct rt6_info *rt = dst_rt6_info(dst);
 
-	if (rt) {
-		if (rt->rt6i_flags & RTF_CACHE) {
-			rcu_read_lock();
-			if (rt6_check_expired(rt)) {
-				rt6_remove_exception_rt(rt);
-				dst = NULL;
-			}
-			rcu_read_unlock();
-		} else {
-			dst_release(dst);
-			dst = NULL;
+	if (rt->rt6i_flags & RTF_CACHE) {
+		rcu_read_lock();
+		if (rt6_check_expired(rt)) {
+			/* counteract the dst_release() in sk_dst_reset() */
+			dst_hold(dst);
+			sk_dst_reset(sk);
+
+			rt6_remove_exception_rt(rt);
 		}
+		rcu_read_unlock();
+		return;
 	}
-	return dst;
+	sk_dst_reset(sk);
 }
 
 static void ip6_link_failure(struct sk_buff *skb)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 475b904fe68b..66e07de2de35 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3910,15 +3910,10 @@ static void xfrm_link_failure(struct sk_buff *skb)
 	/* Impossible. Such dst must be popped before reaches point of failure. */
 }
 
-static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
+static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst)
 {
-	if (dst) {
-		if (dst->obsolete) {
-			dst_release(dst);
-			dst = NULL;
-		}
-	}
-	return dst;
+	if (dst->obsolete)
+		sk_dst_reset(sk);
 }
 
 static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
-- 
cgit v1.2.3


From b8c8abefc07b47f0dc9342530b7618237df96724 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Date: Tue, 28 May 2024 22:30:30 +0200
Subject: ipv4: correctly iterate over the target netns in inet_dump_ifaddr()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A recent change to inet_dump_ifaddr had the function incorrectly iterate
over net rather than tgt_net, resulting in the data coming for the
incorrect network namespace.

Fixes: cdb2f80f1c10 ("inet: use xa_array iterator to implement inet_dump_ifaddr()")
Reported-by: Stéphane Graber <stgraber@stgraber.org>
Closes: https://github.com/lxc/incus/issues/892
Bisected-by: Stéphane Graber <stgraber@stgraber.org>
Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@canonical.com>
Tested-by: Stéphane Graber <stgraber@stgraber.org>
Acked-by: Christian Brauner <brauner@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240528203030.10839-1-aleksandr.mikhalitsyn@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/devinet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e827da128c5f..f3892ee9dfb3 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1903,7 +1903,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 
 	cb->seq = inet_base_seq(tgt_net);
 
-	for_each_netdev_dump(net, dev, ctx->ifindex) {
+	for_each_netdev_dump(tgt_net, dev, ctx->ifindex) {
 		in_dev = __in_dev_get_rcu(dev);
 		if (!in_dev)
 			continue;
-- 
cgit v1.2.3


From 278d65ccdadb5f0fa0ceaf7b9cc97b305cd72822 Mon Sep 17 00:00:00 2001
From: Tristram Ha <tristram.ha@microchip.com>
Date: Tue, 28 May 2024 14:34:26 -0700
Subject: net: dsa: microchip: fix RGMII error in KSZ DSA driver

The driver should return RMII interface when XMII is running in RMII mode.

Fixes: 0ab7f6bf1675 ("net: dsa: microchip: ksz9477: use common xmii function")
Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
Acked-by: Arun Ramadoss <arun.ramadoss@microchip.com>
Acked-by: Jerry Ray <jerry.ray@microchip.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/1716932066-3342-1-git-send-email-Tristram.Ha@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/microchip/ksz_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 1e0085cd9a9a..2818e24e2a51 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -3142,7 +3142,7 @@ phy_interface_t ksz_get_xmii(struct ksz_device *dev, int port, bool gbit)
 		else
 			interface = PHY_INTERFACE_MODE_MII;
 	} else if (val == bitval[P_RMII_SEL]) {
-		interface = PHY_INTERFACE_MODE_RGMII;
+		interface = PHY_INTERFACE_MODE_RMII;
 	} else {
 		interface = PHY_INTERFACE_MODE_RGMII;
 		if (data8 & P_RGMII_ID_EG_ENABLE)
-- 
cgit v1.2.3


From bfd546a552e140b0a4c8a21527c39d6d21addb28 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Tue, 28 May 2024 15:06:04 -0700
Subject: e1000e: move force SMBUS near the end of enable_ulp function

The commit 861e8086029e ("e1000e: move force SMBUS from enable ulp
function to avoid PHY loss issue") introduces a regression on
PCH_MTP_I219_LM18 (PCIID: 0x8086550A). Without the referred commit, the
ethernet works well after suspend and resume, but after applying the
commit, the ethernet couldn't work anymore after the resume and the
dmesg shows that the NIC link changes to 10Mbps (1000Mbps originally):

    [   43.305084] e1000e 0000:00:1f.6 enp0s31f6: NIC Link is Up 10 Mbps Full Duplex, Flow Control: Rx/Tx

Without the commit, the force SMBUS code will not be executed if
"return 0" or "goto out" is executed in the enable_ulp(), and in my
case, the "goto out" is executed since FWSM_FW_VALID is set. But after
applying the commit, the force SMBUS code will be ran unconditionally.

Here move the force SMBUS code back to enable_ulp() and put it
immediately ahead of hw->phy.ops.release(hw), this could allow the
longest settling time as possible for interface in this function and
doesn't change the original code logic.

The issue was found on a Lenovo laptop with the ethernet hw as below:
00:1f.6 Ethernet controller [0200]: Intel Corporation Device [8086:550a]
(rev 20).

And this patch is verified (cable plug and unplug, system suspend
and resume) on Lenovo laptops with ethernet hw: [8086:550a],
[8086:550b], [8086:15bb], [8086:15be], [8086:1a1f], [8086:1a1c] and
[8086:0dc7].

Fixes: 861e8086029e ("e1000e: move force SMBUS from enable ulp function to avoid PHY loss issue")
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Acked-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
Tested-by: Naama Meir <naamax.meir@linux.intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20240528-net-2024-05-28-intel-net-fixes-v1-1-dc8593d2bbc6@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/e1000e/ich8lan.c | 22 ++++++++++++++++++++++
 drivers/net/ethernet/intel/e1000e/netdev.c  | 18 ------------------
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index f9e94be36e97..2e98a2a0bead 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1225,6 +1225,28 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
 	}
 
 release:
+	/* Switching PHY interface always returns MDI error
+	 * so disable retry mechanism to avoid wasting time
+	 */
+	e1000e_disable_phy_retry(hw);
+
+	/* Force SMBus mode in PHY */
+	ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
+	if (ret_val) {
+		e1000e_enable_phy_retry(hw);
+		hw->phy.ops.release(hw);
+		goto out;
+	}
+	phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
+	e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+
+	e1000e_enable_phy_retry(hw);
+
+	/* Force SMBus mode in MAC */
+	mac_reg = er32(CTRL_EXT);
+	mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
+	ew32(CTRL_EXT, mac_reg);
+
 	hw->phy.ops.release(hw);
 out:
 	if (ret_val)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 220d62fca55d..da5c59daf8ba 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6623,7 +6623,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
 	struct e1000_hw *hw = &adapter->hw;
 	u32 ctrl, ctrl_ext, rctl, status, wufc;
 	int retval = 0;
-	u16 smb_ctrl;
 
 	/* Runtime suspend should only enable wakeup for link changes */
 	if (runtime)
@@ -6697,23 +6696,6 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
 			if (retval)
 				return retval;
 		}
-
-		/* Force SMBUS to allow WOL */
-		/* Switching PHY interface always returns MDI error
-		 * so disable retry mechanism to avoid wasting time
-		 */
-		e1000e_disable_phy_retry(hw);
-
-		e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl);
-		smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS;
-		e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl);
-
-		e1000e_enable_phy_retry(hw);
-
-		/* Force SMBus mode in MAC */
-		ctrl_ext = er32(CTRL_EXT);
-		ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS;
-		ew32(CTRL_EXT, ctrl_ext);
 	}
 
 	/* Ensure that the appropriate bits are set in LPI_CTRL
-- 
cgit v1.2.3


From 218ed820d364ddc2b0150951e6b1a1bd1e49469d Mon Sep 17 00:00:00 2001
From: Thinh Tran <thinhtr@linux.ibm.com>
Date: Tue, 28 May 2024 15:06:05 -0700
Subject: i40e: factoring out i40e_suspend/i40e_resume

Two new functions, i40e_io_suspend() and i40e_io_resume(), have been
introduced.  These functions were factored out from the existing
i40e_suspend() and i40e_resume() respectively.  This factoring was
done due to concerns about the logic of the I40E_SUSPENSED state, which
caused the device to be unable to recover.  The functions are now used
in the EEH handling for device suspend/resume callbacks.

The function i40e_enable_mc_magic_wake() has been moved ahead of
i40e_io_suspend() to ensure it is declared before being used.

Tested-by: Robert Thomas <rob.thomas@ibm.com>
Signed-off-by: Thinh Tran <thinhtr@linux.ibm.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20240528-net-2024-05-28-intel-net-fixes-v1-2-dc8593d2bbc6@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 249 +++++++++++++++-------------
 1 file changed, 135 insertions(+), 114 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 1f188c052828..d5f25ea304bf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -16334,6 +16334,139 @@ unmap:
 	pci_disable_device(pdev);
 }
 
+/**
+ * i40e_enable_mc_magic_wake - enable multicast magic packet wake up
+ * using the mac_address_write admin q function
+ * @pf: pointer to i40e_pf struct
+ **/
+static void i40e_enable_mc_magic_wake(struct i40e_pf *pf)
+{
+	struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf);
+	struct i40e_hw *hw = &pf->hw;
+	u8 mac_addr[6];
+	u16 flags = 0;
+	int ret;
+
+	/* Get current MAC address in case it's an LAA */
+	if (main_vsi && main_vsi->netdev) {
+		ether_addr_copy(mac_addr, main_vsi->netdev->dev_addr);
+	} else {
+		dev_err(&pf->pdev->dev,
+			"Failed to retrieve MAC address; using default\n");
+		ether_addr_copy(mac_addr, hw->mac.addr);
+	}
+
+	/* The FW expects the mac address write cmd to first be called with
+	 * one of these flags before calling it again with the multicast
+	 * enable flags.
+	 */
+	flags = I40E_AQC_WRITE_TYPE_LAA_WOL;
+
+	if (hw->func_caps.flex10_enable && hw->partition_id != 1)
+		flags = I40E_AQC_WRITE_TYPE_LAA_ONLY;
+
+	ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"Failed to update MAC address registers; cannot enable Multicast Magic packet wake up");
+		return;
+	}
+
+	flags = I40E_AQC_MC_MAG_EN
+			| I40E_AQC_WOL_PRESERVE_ON_PFR
+			| I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG;
+	ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
+	if (ret)
+		dev_err(&pf->pdev->dev,
+			"Failed to enable Multicast Magic Packet wake up\n");
+}
+
+/**
+ * i40e_io_suspend - suspend all IO operations
+ * @pf: pointer to i40e_pf struct
+ *
+ **/
+static int i40e_io_suspend(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+
+	set_bit(__I40E_DOWN, pf->state);
+
+	/* Ensure service task will not be running */
+	del_timer_sync(&pf->service_timer);
+	cancel_work_sync(&pf->service_task);
+
+	/* Client close must be called explicitly here because the timer
+	 * has been stopped.
+	 */
+	i40e_notify_client_of_netdev_close(pf, false);
+
+	if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) &&
+	    pf->wol_en)
+		i40e_enable_mc_magic_wake(pf);
+
+	/* Since we're going to destroy queues during the
+	 * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
+	 * whole section
+	 */
+	rtnl_lock();
+
+	i40e_prep_for_reset(pf);
+
+	wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
+	wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
+
+	/* Clear the interrupt scheme and release our IRQs so that the system
+	 * can safely hibernate even when there are a large number of CPUs.
+	 * Otherwise hibernation might fail when mapping all the vectors back
+	 * to CPU0.
+	 */
+	i40e_clear_interrupt_scheme(pf);
+
+	rtnl_unlock();
+
+	return 0;
+}
+
+/**
+ * i40e_io_resume - resume IO operations
+ * @pf: pointer to i40e_pf struct
+ *
+ **/
+static int i40e_io_resume(struct i40e_pf *pf)
+{
+	struct device *dev = &pf->pdev->dev;
+	int err;
+
+	/* We need to hold the RTNL lock prior to restoring interrupt schemes,
+	 * since we're going to be restoring queues
+	 */
+	rtnl_lock();
+
+	/* We cleared the interrupt scheme when we suspended, so we need to
+	 * restore it now to resume device functionality.
+	 */
+	err = i40e_restore_interrupt_scheme(pf);
+	if (err) {
+		dev_err(dev, "Cannot restore interrupt scheme: %d\n",
+			err);
+	}
+
+	clear_bit(__I40E_DOWN, pf->state);
+	i40e_reset_and_rebuild(pf, false, true);
+
+	rtnl_unlock();
+
+	/* Clear suspended state last after everything is recovered */
+	clear_bit(__I40E_SUSPENDED, pf->state);
+
+	/* Restart the service task */
+	mod_timer(&pf->service_timer,
+		  round_jiffies(jiffies + pf->service_timer_period));
+
+	return 0;
+}
+
 /**
  * i40e_pci_error_detected - warning that something funky happened in PCI land
  * @pdev: PCI device information struct
@@ -16446,53 +16579,6 @@ static void i40e_pci_error_resume(struct pci_dev *pdev)
 	i40e_handle_reset_warning(pf, false);
 }
 
-/**
- * i40e_enable_mc_magic_wake - enable multicast magic packet wake up
- * using the mac_address_write admin q function
- * @pf: pointer to i40e_pf struct
- **/
-static void i40e_enable_mc_magic_wake(struct i40e_pf *pf)
-{
-	struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf);
-	struct i40e_hw *hw = &pf->hw;
-	u8 mac_addr[6];
-	u16 flags = 0;
-	int ret;
-
-	/* Get current MAC address in case it's an LAA */
-	if (main_vsi && main_vsi->netdev) {
-		ether_addr_copy(mac_addr, main_vsi->netdev->dev_addr);
-	} else {
-		dev_err(&pf->pdev->dev,
-			"Failed to retrieve MAC address; using default\n");
-		ether_addr_copy(mac_addr, hw->mac.addr);
-	}
-
-	/* The FW expects the mac address write cmd to first be called with
-	 * one of these flags before calling it again with the multicast
-	 * enable flags.
-	 */
-	flags = I40E_AQC_WRITE_TYPE_LAA_WOL;
-
-	if (hw->func_caps.flex10_enable && hw->partition_id != 1)
-		flags = I40E_AQC_WRITE_TYPE_LAA_ONLY;
-
-	ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
-	if (ret) {
-		dev_err(&pf->pdev->dev,
-			"Failed to update MAC address registers; cannot enable Multicast Magic packet wake up");
-		return;
-	}
-
-	flags = I40E_AQC_MC_MAG_EN
-			| I40E_AQC_WOL_PRESERVE_ON_PFR
-			| I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG;
-	ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
-	if (ret)
-		dev_err(&pf->pdev->dev,
-			"Failed to enable Multicast Magic Packet wake up\n");
-}
-
 /**
  * i40e_shutdown - PCI callback for shutting down
  * @pdev: PCI device information struct
@@ -16552,48 +16638,11 @@ static void i40e_shutdown(struct pci_dev *pdev)
 static int i40e_suspend(struct device *dev)
 {
 	struct i40e_pf *pf = dev_get_drvdata(dev);
-	struct i40e_hw *hw = &pf->hw;
 
 	/* If we're already suspended, then there is nothing to do */
 	if (test_and_set_bit(__I40E_SUSPENDED, pf->state))
 		return 0;
-
-	set_bit(__I40E_DOWN, pf->state);
-
-	/* Ensure service task will not be running */
-	del_timer_sync(&pf->service_timer);
-	cancel_work_sync(&pf->service_task);
-
-	/* Client close must be called explicitly here because the timer
-	 * has been stopped.
-	 */
-	i40e_notify_client_of_netdev_close(pf, false);
-
-	if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) &&
-	    pf->wol_en)
-		i40e_enable_mc_magic_wake(pf);
-
-	/* Since we're going to destroy queues during the
-	 * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
-	 * whole section
-	 */
-	rtnl_lock();
-
-	i40e_prep_for_reset(pf);
-
-	wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
-	wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
-
-	/* Clear the interrupt scheme and release our IRQs so that the system
-	 * can safely hibernate even when there are a large number of CPUs.
-	 * Otherwise hibernation might fail when mapping all the vectors back
-	 * to CPU0.
-	 */
-	i40e_clear_interrupt_scheme(pf);
-
-	rtnl_unlock();
-
-	return 0;
+	return i40e_io_suspend(pf);
 }
 
 /**
@@ -16603,39 +16652,11 @@ static int i40e_suspend(struct device *dev)
 static int i40e_resume(struct device *dev)
 {
 	struct i40e_pf *pf = dev_get_drvdata(dev);
-	int err;
 
 	/* If we're not suspended, then there is nothing to do */
 	if (!test_bit(__I40E_SUSPENDED, pf->state))
 		return 0;
-
-	/* We need to hold the RTNL lock prior to restoring interrupt schemes,
-	 * since we're going to be restoring queues
-	 */
-	rtnl_lock();
-
-	/* We cleared the interrupt scheme when we suspended, so we need to
-	 * restore it now to resume device functionality.
-	 */
-	err = i40e_restore_interrupt_scheme(pf);
-	if (err) {
-		dev_err(dev, "Cannot restore interrupt scheme: %d\n",
-			err);
-	}
-
-	clear_bit(__I40E_DOWN, pf->state);
-	i40e_reset_and_rebuild(pf, false, true);
-
-	rtnl_unlock();
-
-	/* Clear suspended state last after everything is recovered */
-	clear_bit(__I40E_SUSPENDED, pf->state);
-
-	/* Restart the service task */
-	mod_timer(&pf->service_timer,
-		  round_jiffies(jiffies + pf->service_timer_period));
-
-	return 0;
+	return i40e_io_resume(pf);
 }
 
 static const struct pci_error_handlers i40e_err_handler = {
-- 
cgit v1.2.3


From c80b6538d35a7a60d874c5a76c3c5a82b6a28fbb Mon Sep 17 00:00:00 2001
From: Thinh Tran <thinhtr@linux.ibm.com>
Date: Tue, 28 May 2024 15:06:06 -0700
Subject: i40e: Fully suspend and resume IO operations in EEH case

When EEH events occurs, the callback functions in the i40e, which are
managed by the EEH driver, will completely suspend and resume all IO
operations.

- In the PCI error detected callback, replaced i40e_prep_for_reset()
  with i40e_io_suspend(). The change is to fully suspend all I/O
  operations
- In the PCI error slot reset callback, replaced pci_enable_device_mem()
  with pci_enable_device(). This change enables both I/O and memory of
  the device.
- In the PCI error resume callback, replaced i40e_handle_reset_warning()
  with i40e_io_resume(). This change allows the system to resume I/O
  operations

Fixes: a5f3d2c17b07 ("powerpc/pseries/pci: Add MSI domains")
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Robert Thomas <rob.thomas@ibm.com>
Signed-off-by: Thinh Tran <thinhtr@linux.ibm.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20240528-net-2024-05-28-intel-net-fixes-v1-3-dc8593d2bbc6@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index d5f25ea304bf..284c3fad5a6e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11171,6 +11171,8 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit,
 	ret = i40e_reset(pf);
 	if (!ret)
 		i40e_rebuild(pf, reinit, lock_acquired);
+	else
+		dev_err(&pf->pdev->dev, "%s: i40e_reset() FAILED", __func__);
 }
 
 /**
@@ -16491,7 +16493,7 @@ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev,
 
 	/* shutdown all operations */
 	if (!test_bit(__I40E_SUSPENDED, pf->state))
-		i40e_prep_for_reset(pf);
+		i40e_io_suspend(pf);
 
 	/* Request a slot reset */
 	return PCI_ERS_RESULT_NEED_RESET;
@@ -16513,7 +16515,8 @@ static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev)
 	u32 reg;
 
 	dev_dbg(&pdev->dev, "%s\n", __func__);
-	if (pci_enable_device_mem(pdev)) {
+	/* enable I/O and memory of the device  */
+	if (pci_enable_device(pdev)) {
 		dev_info(&pdev->dev,
 			 "Cannot re-enable PCI device after reset.\n");
 		result = PCI_ERS_RESULT_DISCONNECT;
@@ -16576,7 +16579,7 @@ static void i40e_pci_error_resume(struct pci_dev *pdev)
 	if (test_bit(__I40E_SUSPENDED, pf->state))
 		return;
 
-	i40e_handle_reset_warning(pf, false);
+	i40e_io_resume(pf);
 }
 
 /**
-- 
cgit v1.2.3


From 2a6d8f2de2224ac46df94dc40f43f8b9701f6703 Mon Sep 17 00:00:00 2001
From: Paul Greenwalt <paul.greenwalt@intel.com>
Date: Tue, 28 May 2024 15:06:08 -0700
Subject: ice: fix 200G PHY types to link speed mapping

Commit 24407a01e57c ("ice: Add 200G speed/phy type use") added support
for 200G PHY speeds, but did not include the mapping of 200G PHY types
to link speed. As a result the driver is returning UNKNOWN link speed
when setting 200G ethtool advertised link modes.

To fix this add 200G PHY types to link speed mapping to
ice_get_link_speed_based_on_phy_type().

Fixes: 24407a01e57c ("ice: Add 200G speed/phy type use")
Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20240528-net-2024-05-28-intel-net-fixes-v1-5-dc8593d2bbc6@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/ice/ice_common.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 5649b257e631..24716a3b494c 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -3148,6 +3148,16 @@ ice_get_link_speed_based_on_phy_type(u64 phy_type_low, u64 phy_type_high)
 	case ICE_PHY_TYPE_HIGH_100G_AUI2:
 		speed_phy_type_high = ICE_AQ_LINK_SPEED_100GB;
 		break;
+	case ICE_PHY_TYPE_HIGH_200G_CR4_PAM4:
+	case ICE_PHY_TYPE_HIGH_200G_SR4:
+	case ICE_PHY_TYPE_HIGH_200G_FR4:
+	case ICE_PHY_TYPE_HIGH_200G_LR4:
+	case ICE_PHY_TYPE_HIGH_200G_DR4:
+	case ICE_PHY_TYPE_HIGH_200G_KR4_PAM4:
+	case ICE_PHY_TYPE_HIGH_200G_AUI4_AOC_ACC:
+	case ICE_PHY_TYPE_HIGH_200G_AUI4:
+		speed_phy_type_high = ICE_AQ_LINK_SPEED_200GB;
+		break;
 	default:
 		speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN;
 		break;
-- 
cgit v1.2.3


From a51c9b1c9ab2351e62933357fcad5bfad27f2400 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Tue, 28 May 2024 15:06:11 -0700
Subject: ice: check for unregistering correct number of devlink params

On module load, the ice driver checks for the lack of a specific PF
capability to determine if it should reduce the number of devlink params
to register.  One situation when this test returns true is when the
driver loads in safe mode.  The same check is not present on the unload
path when devlink params are unregistered.  This results in the driver
triggering a WARN_ON in the kernel devlink code.

The current check and code path uses a reduction in the number of elements
reported in the list of params.  This is fragile and not good for future
maintaining.

Change the parameters to be held in two lists, one always registered and
one dependent on the check.

Add a symmetrical check in the unload path so that the correct parameters
are unregistered as well.

Fixes: 109eb2917284 ("ice: Add tx_scheduling_layers devlink param")
CC: Lukasz Czapnik <lukasz.czapnik@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20240528-net-2024-05-28-intel-net-fixes-v1-8-dc8593d2bbc6@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/ice/devlink/devlink.c | 31 +++++++++++++++++-------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c
index c4b69655cdf5..704e9ad5144e 100644
--- a/drivers/net/ethernet/intel/ice/devlink/devlink.c
+++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c
@@ -1388,7 +1388,7 @@ enum ice_param_id {
 	ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS,
 };
 
-static const struct devlink_param ice_devlink_params[] = {
+static const struct devlink_param ice_dvl_rdma_params[] = {
 	DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
 			      ice_devlink_enable_roce_get,
 			      ice_devlink_enable_roce_set,
@@ -1397,6 +1397,9 @@ static const struct devlink_param ice_devlink_params[] = {
 			      ice_devlink_enable_iw_get,
 			      ice_devlink_enable_iw_set,
 			      ice_devlink_enable_iw_validate),
+};
+
+static const struct devlink_param ice_dvl_sched_params[] = {
 	DEVLINK_PARAM_DRIVER(ICE_DEVLINK_PARAM_ID_TX_SCHED_LAYERS,
 			     "tx_scheduling_layers",
 			     DEVLINK_PARAM_TYPE_U8,
@@ -1464,21 +1467,31 @@ int ice_devlink_register_params(struct ice_pf *pf)
 {
 	struct devlink *devlink = priv_to_devlink(pf);
 	struct ice_hw *hw = &pf->hw;
-	size_t params_size;
+	int status;
 
-	params_size =  ARRAY_SIZE(ice_devlink_params);
+	status = devl_params_register(devlink, ice_dvl_rdma_params,
+				      ARRAY_SIZE(ice_dvl_rdma_params));
+	if (status)
+		return status;
 
-	if (!hw->func_caps.common_cap.tx_sched_topo_comp_mode_en)
-		params_size--;
+	if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en)
+		status = devl_params_register(devlink, ice_dvl_sched_params,
+					      ARRAY_SIZE(ice_dvl_sched_params));
 
-	return devl_params_register(devlink, ice_devlink_params,
-				    params_size);
+	return status;
 }
 
 void ice_devlink_unregister_params(struct ice_pf *pf)
 {
-	devl_params_unregister(priv_to_devlink(pf), ice_devlink_params,
-			       ARRAY_SIZE(ice_devlink_params));
+	struct devlink *devlink = priv_to_devlink(pf);
+	struct ice_hw *hw = &pf->hw;
+
+	devl_params_unregister(devlink, ice_dvl_rdma_params,
+			       ARRAY_SIZE(ice_dvl_rdma_params));
+
+	if (hw->func_caps.common_cap.tx_sched_topo_comp_mode_en)
+		devl_params_unregister(devlink, ice_dvl_sched_params,
+				       ARRAY_SIZE(ice_dvl_sched_params));
 }
 
 #define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024)
-- 
cgit v1.2.3


From 2dc8b1e7177d4f49f492ce648440caf2de0c3616 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Tue, 28 May 2024 20:09:12 +0300
Subject: net: ena: Fix redundant device NUMA node override

The driver overrides the NUMA node id of the device regardless of
whether it knows its correct value (often setting it to -1 even though
the node id is advertised in 'struct device'). This can lead to
suboptimal configurations.

This patch fixes this behavior and makes the shared memory allocation
functions use the NUMA node id advertised by the underlying device.

Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)")
Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Link: https://lore.kernel.org/r/20240528170912.1204417-1-shayagr@amazon.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/amazon/ena/ena_com.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 2d8a66ea82fa..713a595370bf 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -312,7 +312,6 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
 			      struct ena_com_io_sq *io_sq)
 {
 	size_t size;
-	int dev_node = 0;
 
 	memset(&io_sq->desc_addr, 0x0, sizeof(io_sq->desc_addr));
 
@@ -325,12 +324,9 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
 	size = io_sq->desc_entry_size * io_sq->q_depth;
 
 	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) {
-		dev_node = dev_to_node(ena_dev->dmadev);
-		set_dev_node(ena_dev->dmadev, ctx->numa_node);
 		io_sq->desc_addr.virt_addr =
 			dma_alloc_coherent(ena_dev->dmadev, size, &io_sq->desc_addr.phys_addr,
 					   GFP_KERNEL);
-		set_dev_node(ena_dev->dmadev, dev_node);
 		if (!io_sq->desc_addr.virt_addr) {
 			io_sq->desc_addr.virt_addr =
 				dma_alloc_coherent(ena_dev->dmadev, size,
@@ -354,10 +350,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
 		size = (size_t)io_sq->bounce_buf_ctrl.buffer_size *
 			io_sq->bounce_buf_ctrl.buffers_num;
 
-		dev_node = dev_to_node(ena_dev->dmadev);
-		set_dev_node(ena_dev->dmadev, ctx->numa_node);
 		io_sq->bounce_buf_ctrl.base_buffer = devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
-		set_dev_node(ena_dev->dmadev, dev_node);
 		if (!io_sq->bounce_buf_ctrl.base_buffer)
 			io_sq->bounce_buf_ctrl.base_buffer =
 				devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
@@ -397,7 +390,6 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
 			      struct ena_com_io_cq *io_cq)
 {
 	size_t size;
-	int prev_node = 0;
 
 	memset(&io_cq->cdesc_addr, 0x0, sizeof(io_cq->cdesc_addr));
 
@@ -409,11 +401,8 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
 
 	size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth;
 
-	prev_node = dev_to_node(ena_dev->dmadev);
-	set_dev_node(ena_dev->dmadev, ctx->numa_node);
 	io_cq->cdesc_addr.virt_addr =
 		dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr, GFP_KERNEL);
-	set_dev_node(ena_dev->dmadev, prev_node);
 	if (!io_cq->cdesc_addr.virt_addr) {
 		io_cq->cdesc_addr.virt_addr =
 			dma_alloc_coherent(ena_dev->dmadev, size, &io_cq->cdesc_addr.phys_addr,
-- 
cgit v1.2.3


From b3dc6e8003b500861fa307e9a3400c52e78e4d3a Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Wed, 29 May 2024 17:56:33 +0800
Subject: ipvlan: Dont Use skb->sk in ipvlan_process_v{4,6}_outbound

Raw packet from PF_PACKET socket ontop of an IPv6-backed ipvlan device will
hit WARN_ON_ONCE() in sk_mc_loop() through sch_direct_xmit() path.

WARNING: CPU: 2 PID: 0 at net/core/sock.c:775 sk_mc_loop+0x2d/0x70
Modules linked in: sch_netem ipvlan rfkill cirrus drm_shmem_helper sg drm_kms_helper
CPU: 2 PID: 0 Comm: swapper/2 Kdump: loaded Not tainted 6.9.0+ #279
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
RIP: 0010:sk_mc_loop+0x2d/0x70
Code: fa 0f 1f 44 00 00 65 0f b7 15 f7 96 a3 4f 31 c0 66 85 d2 75 26 48 85 ff 74 1c
RSP: 0018:ffffa9584015cd78 EFLAGS: 00010212
RAX: 0000000000000011 RBX: ffff91e585793e00 RCX: 0000000002c6a001
RDX: 0000000000000000 RSI: 0000000000000040 RDI: ffff91e589c0f000
RBP: ffff91e5855bd100 R08: 0000000000000000 R09: 3d00545216f43d00
R10: ffff91e584fdcc50 R11: 00000060dd8616f4 R12: ffff91e58132d000
R13: ffff91e584fdcc68 R14: ffff91e5869ce800 R15: ffff91e589c0f000
FS:  0000000000000000(0000) GS:ffff91e898100000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f788f7c44c0 CR3: 0000000008e1a000 CR4: 00000000000006f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<IRQ>
 ? __warn (kernel/panic.c:693)
 ? sk_mc_loop (net/core/sock.c:760)
 ? report_bug (lib/bug.c:201 lib/bug.c:219)
 ? handle_bug (arch/x86/kernel/traps.c:239)
 ? exc_invalid_op (arch/x86/kernel/traps.c:260 (discriminator 1))
 ? asm_exc_invalid_op (./arch/x86/include/asm/idtentry.h:621)
 ? sk_mc_loop (net/core/sock.c:760)
 ip6_finish_output2 (net/ipv6/ip6_output.c:83 (discriminator 1))
 ? nf_hook_slow (net/netfilter/core.c:626)
 ip6_finish_output (net/ipv6/ip6_output.c:222)
 ? __pfx_ip6_finish_output (net/ipv6/ip6_output.c:215)
 ipvlan_xmit_mode_l3 (drivers/net/ipvlan/ipvlan_core.c:602) ipvlan
 ipvlan_start_xmit (drivers/net/ipvlan/ipvlan_main.c:226) ipvlan
 dev_hard_start_xmit (net/core/dev.c:3594)
 sch_direct_xmit (net/sched/sch_generic.c:343)
 __qdisc_run (net/sched/sch_generic.c:416)
 net_tx_action (net/core/dev.c:5286)
 handle_softirqs (kernel/softirq.c:555)
 __irq_exit_rcu (kernel/softirq.c:589)
 sysvec_apic_timer_interrupt (arch/x86/kernel/apic/apic.c:1043)

The warning triggers as this:
packet_sendmsg
   packet_snd //skb->sk is packet sk
      __dev_queue_xmit
         __dev_xmit_skb //q->enqueue is not NULL
             __qdisc_run
               sch_direct_xmit
                 dev_hard_start_xmit
                   ipvlan_start_xmit
                      ipvlan_xmit_mode_l3 //l3 mode
                        ipvlan_process_outbound //vepa flag
                          ipvlan_process_v6_outbound
                            ip6_local_out
                                __ip6_finish_output
                                  ip6_finish_output2 //multicast packet
                                    sk_mc_loop //sk->sk_family is AF_PACKET

Call ip{6}_local_out() with NULL sk in ipvlan as other tunnels to fix this.

Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.")
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240529095633.613103-1-yuehaibing@huawei.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ipvlan/ipvlan_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 2d5b021b4ea6..fef4eff7753a 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -439,7 +439,7 @@ static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb)
 
 	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 
-	err = ip_local_out(net, skb->sk, skb);
+	err = ip_local_out(net, NULL, skb);
 	if (unlikely(net_xmit_eval(err)))
 		DEV_STATS_INC(dev, tx_errors);
 	else
@@ -494,7 +494,7 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
 
 	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 
-	err = ip6_local_out(dev_net(dev), skb->sk, skb);
+	err = ip6_local_out(dev_net(dev), NULL, skb);
 	if (unlikely(net_xmit_eval(err)))
 		DEV_STATS_INC(dev, tx_errors);
 	else
-- 
cgit v1.2.3


From 700fe6fd093d08c6da2bda8efe00479b0e617327 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 30 May 2024 12:10:43 +0200
Subject: ALSA: seq: Fix yet another spot for system message conversion

We fixed the incorrect UMP type for system messages in the recent
commit, but it missed one place in system_ev_to_ump_midi1().
Fix it now.

Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events")
Fixes: c2bb79613fed ("ALSA: seq: Fix incorrect UMP type for system messages")
Link: https://lore.kernel.org/r/20240530101044.17524-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_ump_convert.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c
index add70b4c2885..f0f332017e66 100644
--- a/sound/core/seq/seq_ump_convert.c
+++ b/sound/core/seq/seq_ump_convert.c
@@ -729,6 +729,7 @@ static int system_ev_to_ump_midi1(const struct snd_seq_event *event,
 				  union snd_ump_midi1_msg *data,
 				  unsigned char status)
 {
+	data->system.type = UMP_MSG_TYPE_SYSTEM; // override
 	data->system.status = status;
 	return 1;
 }
-- 
cgit v1.2.3


From 13c7c941e72908b8cce5a84b45a7b5e485ca12ed Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 29 May 2024 09:35:47 -0700
Subject: netdev: add qstat for csum complete

Recent commit 0cfe71f45f42 ("netdev: add queue stats") added
a lot of useful stats, but only those immediately needed by virtio.
Presumably virtio does not support CHECKSUM_COMPLETE,
so statistic for that form of checksumming wasn't included.
Other drivers will definitely need it, in fact we expect it
to be needed in net-next soon (mlx5). So let's add the definition
of the counter for CHECKSUM_COMPLETE to uAPI in net already,
so that the counters are in a more natural order (all subsequent
counters have not been present in any released kernel, yet).

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Joe Damato <jdamato@fastly.com>
Fixes: 0cfe71f45f42 ("netdev: add queue stats")
Link: https://lore.kernel.org/r/20240529163547.3693194-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/netlink/specs/netdev.yaml | 4 ++++
 include/uapi/linux/netdev.h             | 1 +
 tools/include/uapi/linux/netdev.h       | 1 +
 3 files changed, 6 insertions(+)

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 11a32373365a..959755be4d7f 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -349,6 +349,10 @@ attribute-sets:
           Number of packets dropped due to transient lack of resources, such as
           buffer space, host descriptors etc.
         type: uint
+      -
+        name: rx-csum-complete
+        doc: Number of packets that were marked as CHECKSUM_COMPLETE.
+        type: uint
       -
         name: rx-csum-unnecessary
         doc: Number of packets that were marked as CHECKSUM_UNNECESSARY.
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index a8188202413e..43742ac5b00d 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -148,6 +148,7 @@ enum {
 	NETDEV_A_QSTATS_RX_ALLOC_FAIL,
 	NETDEV_A_QSTATS_RX_HW_DROPS,
 	NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS,
+	NETDEV_A_QSTATS_RX_CSUM_COMPLETE,
 	NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY,
 	NETDEV_A_QSTATS_RX_CSUM_NONE,
 	NETDEV_A_QSTATS_RX_CSUM_BAD,
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index a8188202413e..43742ac5b00d 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -148,6 +148,7 @@ enum {
 	NETDEV_A_QSTATS_RX_ALLOC_FAIL,
 	NETDEV_A_QSTATS_RX_HW_DROPS,
 	NETDEV_A_QSTATS_RX_HW_DROP_OVERRUNS,
+	NETDEV_A_QSTATS_RX_CSUM_COMPLETE,
 	NETDEV_A_QSTATS_RX_CSUM_UNNECESSARY,
 	NETDEV_A_QSTATS_RX_CSUM_NONE,
 	NETDEV_A_QSTATS_RX_CSUM_BAD,
-- 
cgit v1.2.3


From 49cb894d567980235b6e64d5e69950ff77debd8c Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Thu, 30 May 2024 14:19:14 +0300
Subject: ASoC: SOF: ipc4-topology: Add support for NHLT with 16-bit only DMIC
 blob

The ACPI NHLT table always had 32-bit DMIC blob even if 16-bit was also
present and taken as a 'rule' which obviously got broken and there is at
least one device on the market which ships with only 16-bit DMIC
configuration blob.
This corner case has never been supported and it is going to need topology
updates for DMIC copier to support multiple formats.

As for the kernel side: if the copier supports multiple formats and the
preferred 32-bit DMIC blob is not found then we will try to get a 16-bit
DMIC configuration and look for a 16-bit copier config.

Fixes: f9209644ae76 ("ASoC: SOF: ipc4-topology: Correct DAI copier config and NHLT blob request")
Link: https://github.com/thesofproject/linux/issues/4973
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Link: https://msgid.link/r/20240530111918.21974-2-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc4-topology.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index beff10989324..521b4dcba601 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -1483,6 +1483,8 @@ snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_sof_dai *dai
 					   dir, dev_type);
 
 	if (!cfg) {
+		bool get_new_blob = false;
+
 		if (format_change) {
 			/*
 			 * The 32-bit blob was not found in NHLT table, try to
@@ -1490,7 +1492,20 @@ snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_sof_dai *dai
 			 */
 			bit_depth = params_width(params);
 			format_change = false;
+			get_new_blob = true;
+		} else if (linktype == SOF_DAI_INTEL_DMIC && !single_format) {
+			/*
+			 * The requested 32-bit blob (no format change for the
+			 * blob request) was not found in NHLT table, try to
+			 * look for 16-bit blob if the copier supports multiple
+			 * formats
+			 */
+			bit_depth = 16;
+			format_change = true;
+			get_new_blob = true;
+		}
 
+		if (get_new_blob) {
 			cfg = intel_nhlt_get_endpoint_blob(sdev->dev, ipc4_data->nhlt,
 							   dai_index, nhlt_type,
 							   bit_depth, bit_depth,
@@ -1513,8 +1528,8 @@ out:
 
 	if (format_change) {
 		/*
-		 * Update the params to reflect that we have loaded 32-bit blob
-		 * instead of the 16-bit.
+		 * Update the params to reflect that different blob was loaded
+		 * instead of the requested bit depth (16 -> 32 or 32 -> 16).
 		 * This information is going to be used by the caller to find
 		 * matching copier format on the dai side.
 		 */
@@ -1522,7 +1537,11 @@ out:
 
 		m = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
 		snd_mask_none(m);
-		snd_mask_set_format(m, SNDRV_PCM_FORMAT_S32_LE);
+		if (bit_depth == 16)
+			snd_mask_set_format(m, SNDRV_PCM_FORMAT_S16_LE);
+		else
+			snd_mask_set_format(m, SNDRV_PCM_FORMAT_S32_LE);
+
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 2a865c9c3fb0289a95f1cb51b42d248736ff45cb Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Thu, 30 May 2024 14:19:15 +0300
Subject: ASoC: SOF: ipc4-topology: Print out the channel count in
 sof_ipc4_dbg_audio_format

Print out the number of channels for the format explicitly instead of
having the reader to understand how to interpret the ch_map and ch_cfg
values.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://msgid.link/r/20240530111918.21974-3-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc4-topology.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index 521b4dcba601..2e0234b865ad 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -195,9 +195,10 @@ static void sof_ipc4_dbg_audio_format(struct device *dev, struct sof_ipc4_pin_fo
 	for (i = 0; i < num_formats; i++) {
 		struct sof_ipc4_audio_format *fmt = &pin_fmt[i].audio_fmt;
 		dev_dbg(dev,
-			"Pin index #%d: %uHz, %ubit (ch_map %#x ch_cfg %u interleaving_style %u fmt_cfg %#x) buffer size %d\n",
-			pin_fmt[i].pin_index, fmt->sampling_frequency, fmt->bit_depth, fmt->ch_map,
-			fmt->ch_cfg, fmt->interleaving_style, fmt->fmt_cfg,
+			"Pin index #%d: %uHz, %ubit, %luch (ch_map %#x ch_cfg %u interleaving_style %u fmt_cfg %#x) buffer size %d\n",
+			pin_fmt[i].pin_index, fmt->sampling_frequency, fmt->bit_depth,
+			SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(fmt->fmt_cfg),
+			fmt->ch_map, fmt->ch_cfg, fmt->interleaving_style, fmt->fmt_cfg,
 			pin_fmt[i].buffer_size);
 	}
 }
-- 
cgit v1.2.3


From 3b64fd2f83f203f5a34faed3dadf6464313f827d Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Thu, 30 May 2024 14:19:16 +0300
Subject: ASoC: SOF: ipc4-topology/pcm: Rename
 sof_ipc4_copier_is_single_format()

Rename the sof_ipc4_copier_is_single_format() to
sof_ipc4_copier_is_single_bitdepth() to clear the confusion of the use of
'format' when we are querying information on the bit depth.
Format is used to describe a combination of parameters (rate, channels,
sample format / bit depth).
Rename the flags used to store the result at the same time.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Link: https://msgid.link/r/20240530111918.21974-4-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc4-pcm.c      | 12 ++++++------
 sound/soc/sof/ipc4-topology.c | 40 ++++++++++++++++++++--------------------
 sound/soc/sof/ipc4-topology.h |  6 +++---
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c
index 307bee63756b..4df2be3d39eb 100644
--- a/sound/soc/sof/ipc4-pcm.c
+++ b/sound/soc/sof/ipc4-pcm.c
@@ -650,7 +650,7 @@ static int sof_ipc4_pcm_dai_link_fixup(struct snd_soc_pcm_runtime *rtd,
 	struct snd_soc_dai *cpu_dai = snd_soc_rtd_to_cpu(rtd, 0);
 	struct sof_ipc4_audio_format *ipc4_fmt;
 	struct sof_ipc4_copier *ipc4_copier;
-	bool single_fmt = false;
+	bool single_bitdepth = false;
 	u32 valid_bits = 0;
 	int dir, ret;
 
@@ -682,18 +682,18 @@ static int sof_ipc4_pcm_dai_link_fixup(struct snd_soc_pcm_runtime *rtd,
 				return 0;
 
 			if (dir == SNDRV_PCM_STREAM_PLAYBACK) {
-				if (sof_ipc4_copier_is_single_format(sdev,
+				if (sof_ipc4_copier_is_single_bitdepth(sdev,
 					available_fmt->output_pin_fmts,
 					available_fmt->num_output_formats)) {
 					ipc4_fmt = &available_fmt->output_pin_fmts->audio_fmt;
-					single_fmt = true;
+					single_bitdepth = true;
 				}
 			} else {
-				if (sof_ipc4_copier_is_single_format(sdev,
+				if (sof_ipc4_copier_is_single_bitdepth(sdev,
 					available_fmt->input_pin_fmts,
 					available_fmt->num_input_formats)) {
 					ipc4_fmt = &available_fmt->input_pin_fmts->audio_fmt;
-					single_fmt = true;
+					single_bitdepth = true;
 				}
 			}
 		}
@@ -703,7 +703,7 @@ static int sof_ipc4_pcm_dai_link_fixup(struct snd_soc_pcm_runtime *rtd,
 	if (ret)
 		return ret;
 
-	if (single_fmt) {
+	if (single_bitdepth) {
 		snd_mask_none(fmt);
 		valid_bits = SOF_IPC4_AUDIO_FORMAT_CFG_V_BIT_DEPTH(ipc4_fmt->fmt_cfg);
 		dev_dbg(component->dev, "Set %s to %d bit format\n", dai->name, valid_bits);
diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index 2e0234b865ad..afb7908766a3 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -1423,7 +1423,7 @@ static int snd_sof_get_hw_config_params(struct snd_sof_dev *sdev, struct snd_sof
 
 static int
 snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_sof_dai *dai,
-			       bool single_format,
+			       bool single_bitdepth,
 			       struct snd_pcm_hw_params *params, u32 dai_index,
 			       u32 linktype, u8 dir, u32 **dst, u32 *len)
 {
@@ -1446,7 +1446,7 @@ snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_sof_dai *dai
 		 * Look for 32-bit blob first instead of 16-bit if copier
 		 * supports multiple formats
 		 */
-		if (bit_depth == 16 && !single_format) {
+		if (bit_depth == 16 && !single_bitdepth) {
 			dev_dbg(sdev->dev, "Looking for 32-bit blob first for DMIC\n");
 			format_change = true;
 			bit_depth = 32;
@@ -1494,7 +1494,7 @@ snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_sof_dai *dai
 			bit_depth = params_width(params);
 			format_change = false;
 			get_new_blob = true;
-		} else if (linktype == SOF_DAI_INTEL_DMIC && !single_format) {
+		} else if (linktype == SOF_DAI_INTEL_DMIC && !single_bitdepth) {
 			/*
 			 * The requested 32-bit blob (no format change for the
 			 * blob request) was not found in NHLT table, try to
@@ -1550,7 +1550,7 @@ out:
 #else
 static int
 snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_sof_dai *dai,
-			       bool single_format,
+			       bool single_bitdepth,
 			       struct snd_pcm_hw_params *params, u32 dai_index,
 			       u32 linktype, u8 dir, u32 **dst, u32 *len)
 {
@@ -1558,9 +1558,9 @@ snd_sof_get_nhlt_endpoint_data(struct snd_sof_dev *sdev, struct snd_sof_dai *dai
 }
 #endif
 
-bool sof_ipc4_copier_is_single_format(struct snd_sof_dev *sdev,
-				      struct sof_ipc4_pin_format *pin_fmts,
-				      u32 pin_fmts_size)
+bool sof_ipc4_copier_is_single_bitdepth(struct snd_sof_dev *sdev,
+					struct sof_ipc4_pin_format *pin_fmts,
+					u32 pin_fmts_size)
 {
 	struct sof_ipc4_audio_format *fmt;
 	u32 valid_bits;
@@ -1591,7 +1591,7 @@ sof_ipc4_prepare_dai_copier(struct snd_sof_dev *sdev, struct snd_sof_dai *dai,
 	struct snd_pcm_hw_params dai_params = *params;
 	struct sof_ipc4_copier_data *copier_data;
 	struct sof_ipc4_copier *ipc4_copier;
-	bool single_format;
+	bool single_bitdepth;
 	int ret;
 
 	ipc4_copier = dai->private;
@@ -1605,12 +1605,12 @@ sof_ipc4_prepare_dai_copier(struct snd_sof_dev *sdev, struct snd_sof_dai *dai,
 	 * format lookup
 	 */
 	if (dir == SNDRV_PCM_STREAM_PLAYBACK) {
-		single_format = sof_ipc4_copier_is_single_format(sdev,
+		single_bitdepth = sof_ipc4_copier_is_single_bitdepth(sdev,
 						available_fmt->output_pin_fmts,
 						available_fmt->num_output_formats);
 
 		/* Update the dai_params with the only supported format */
-		if (single_format) {
+		if (single_bitdepth) {
 			ret = sof_ipc4_update_hw_params(sdev, &dai_params,
 					&available_fmt->output_pin_fmts[0].audio_fmt,
 					BIT(SNDRV_PCM_HW_PARAM_FORMAT));
@@ -1618,12 +1618,12 @@ sof_ipc4_prepare_dai_copier(struct snd_sof_dev *sdev, struct snd_sof_dai *dai,
 				return ret;
 		}
 	} else {
-		single_format = sof_ipc4_copier_is_single_format(sdev,
+		single_bitdepth = sof_ipc4_copier_is_single_bitdepth(sdev,
 						available_fmt->input_pin_fmts,
 						available_fmt->num_input_formats);
 
 		/* Update the dai_params with the only supported format */
-		if (single_format) {
+		if (single_bitdepth) {
 			ret = sof_ipc4_update_hw_params(sdev, &dai_params,
 					&available_fmt->input_pin_fmts[0].audio_fmt,
 					BIT(SNDRV_PCM_HW_PARAM_FORMAT));
@@ -1632,7 +1632,7 @@ sof_ipc4_prepare_dai_copier(struct snd_sof_dev *sdev, struct snd_sof_dai *dai,
 		}
 	}
 
-	ret = snd_sof_get_nhlt_endpoint_data(sdev, dai, single_format,
+	ret = snd_sof_get_nhlt_endpoint_data(sdev, dai, single_bitdepth,
 					     &dai_params,
 					     ipc4_copier->dai_index,
 					     ipc4_copier->dai_type, dir,
@@ -1667,7 +1667,7 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget,
 	u32 out_ref_rate, out_ref_channels;
 	u32 deep_buffer_dma_ms = 0;
 	int output_fmt_index;
-	bool single_output_format;
+	bool single_output_bitdepth;
 	int i;
 
 	dev_dbg(sdev->dev, "copier %s, type %d", swidget->widget->name, swidget->id);
@@ -1804,9 +1804,9 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget,
 		return ret;
 
 	/* set the reference params for output format selection */
-	single_output_format = sof_ipc4_copier_is_single_format(sdev,
-								available_fmt->output_pin_fmts,
-								available_fmt->num_output_formats);
+	single_output_bitdepth = sof_ipc4_copier_is_single_bitdepth(sdev,
+					available_fmt->output_pin_fmts,
+					available_fmt->num_output_formats);
 	switch (swidget->id) {
 	case snd_soc_dapm_aif_in:
 	case snd_soc_dapm_dai_out:
@@ -1818,7 +1818,7 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget,
 		out_ref_rate = in_fmt->sampling_frequency;
 		out_ref_channels = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(in_fmt->fmt_cfg);
 
-		if (!single_output_format)
+		if (!single_output_bitdepth)
 			out_ref_valid_bits =
 				SOF_IPC4_AUDIO_FORMAT_CFG_V_BIT_DEPTH(in_fmt->fmt_cfg);
 		break;
@@ -1827,7 +1827,7 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget,
 	case snd_soc_dapm_dai_in:
 		out_ref_rate = params_rate(fe_params);
 		out_ref_channels = params_channels(fe_params);
-		if (!single_output_format) {
+		if (!single_output_bitdepth) {
 			out_ref_valid_bits = sof_ipc4_get_valid_bits(sdev, fe_params);
 			if (out_ref_valid_bits < 0)
 				return out_ref_valid_bits;
@@ -1845,7 +1845,7 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget,
 	 * if the output format is the same across all available output formats, choose
 	 * that as the reference.
 	 */
-	if (single_output_format) {
+	if (single_output_bitdepth) {
 		struct sof_ipc4_audio_format *out_fmt;
 
 		out_fmt = &available_fmt->output_pin_fmts[0].audio_fmt;
diff --git a/sound/soc/sof/ipc4-topology.h b/sound/soc/sof/ipc4-topology.h
index 4488762f6a71..f4dc499c0ffe 100644
--- a/sound/soc/sof/ipc4-topology.h
+++ b/sound/soc/sof/ipc4-topology.h
@@ -476,7 +476,7 @@ struct sof_ipc4_process {
 	u32 init_config;
 };
 
-bool sof_ipc4_copier_is_single_format(struct snd_sof_dev *sdev,
-				      struct sof_ipc4_pin_format *pin_fmts,
-				      u32 pin_fmts_size);
+bool sof_ipc4_copier_is_single_bitdepth(struct snd_sof_dev *sdev,
+					struct sof_ipc4_pin_format *pin_fmts,
+					u32 pin_fmts_size);
 #endif
-- 
cgit v1.2.3


From 2fcad03eaba1b86e6b829f73a9e75e681b7f3106 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Thu, 30 May 2024 14:19:17 +0300
Subject: ASoC: SOF: ipc4-topology: Improve readability of
 sof_ipc4_prepare_dai_copier()

Remove the duplicated code paths to check for single bit depth and to
update the params with storing the parameters needed by the function and
have a single code section.

No functional change but the code is easier to follow.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Link: https://msgid.link/r/20240530111918.21974-5-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc4-topology.c | 42 ++++++++++++++++++------------------------
 1 file changed, 18 insertions(+), 24 deletions(-)

diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index afb7908766a3..645252789cfe 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -1590,8 +1590,10 @@ sof_ipc4_prepare_dai_copier(struct snd_sof_dev *sdev, struct snd_sof_dai *dai,
 	struct sof_ipc4_available_audio_format *available_fmt;
 	struct snd_pcm_hw_params dai_params = *params;
 	struct sof_ipc4_copier_data *copier_data;
+	struct sof_ipc4_pin_format *pin_fmts;
 	struct sof_ipc4_copier *ipc4_copier;
 	bool single_bitdepth;
+	u32 num_pin_fmts;
 	int ret;
 
 	ipc4_copier = dai->private;
@@ -1605,31 +1607,23 @@ sof_ipc4_prepare_dai_copier(struct snd_sof_dev *sdev, struct snd_sof_dai *dai,
 	 * format lookup
 	 */
 	if (dir == SNDRV_PCM_STREAM_PLAYBACK) {
-		single_bitdepth = sof_ipc4_copier_is_single_bitdepth(sdev,
-						available_fmt->output_pin_fmts,
-						available_fmt->num_output_formats);
-
-		/* Update the dai_params with the only supported format */
-		if (single_bitdepth) {
-			ret = sof_ipc4_update_hw_params(sdev, &dai_params,
-					&available_fmt->output_pin_fmts[0].audio_fmt,
-					BIT(SNDRV_PCM_HW_PARAM_FORMAT));
-			if (ret)
-				return ret;
-		}
+		pin_fmts = available_fmt->output_pin_fmts;
+		num_pin_fmts = available_fmt->num_output_formats;
 	} else {
-		single_bitdepth = sof_ipc4_copier_is_single_bitdepth(sdev,
-						available_fmt->input_pin_fmts,
-						available_fmt->num_input_formats);
-
-		/* Update the dai_params with the only supported format */
-		if (single_bitdepth) {
-			ret = sof_ipc4_update_hw_params(sdev, &dai_params,
-					&available_fmt->input_pin_fmts[0].audio_fmt,
-					BIT(SNDRV_PCM_HW_PARAM_FORMAT));
-			if (ret)
-				return ret;
-		}
+		pin_fmts = available_fmt->input_pin_fmts;
+		num_pin_fmts = available_fmt->num_input_formats;
+	}
+
+	single_bitdepth = sof_ipc4_copier_is_single_bitdepth(sdev, pin_fmts,
+							     num_pin_fmts);
+
+	/* Update the dai_params with the only supported format */
+	if (single_bitdepth) {
+		ret = sof_ipc4_update_hw_params(sdev, &dai_params,
+						&pin_fmts[0].audio_fmt,
+						BIT(SNDRV_PCM_HW_PARAM_FORMAT));
+		if (ret)
+			return ret;
 	}
 
 	ret = snd_sof_get_nhlt_endpoint_data(sdev, dai, single_bitdepth,
-- 
cgit v1.2.3


From b65456b7b379e20ab225a4e906dc4a0c98fddd7a Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Thu, 30 May 2024 14:19:18 +0300
Subject: ASoC: SOF: ipc4-topology: Adjust the params based on DAI formats

Currently we only check the bit depth value among to DAI formats, but other
parameters might be constant, like number of channels and/or rate.

In capture we use the fe params as a reference to find the format and blob
which should be used, but in the path we can have components which can
handle expanding/narrowing number of channels or do a resample.

In these cases the topology is expected to have 'fixed' parameter for
channels/rates/bit depth and the conversion to the fe format is going to
be done within the path.

In practice this patch fixes issues like:
All DMIC formats are fixed four channels
We have a component which converts the four channel to stereo
FE is opened with 2 channel

Even if we have the correct bit depth format and blob (for four channel) we
will still be looking for stereo configurations, which will fail.

Note: the adjustment of params have switched order with the checking of
single bit depth (needed for the NHLT blob fallback support). This change
is non function, just that if the sof_ipc4_narrow_params_to_format() would
fail, there is no point of checking the single bit depth.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Link: https://msgid.link/r/20240530111918.21974-6-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc4-topology.c | 71 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 57 insertions(+), 14 deletions(-)

diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index 645252789cfe..1bea4bab958b 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -1583,6 +1583,55 @@ bool sof_ipc4_copier_is_single_bitdepth(struct snd_sof_dev *sdev,
 	return true;
 }
 
+static int
+sof_ipc4_adjust_params_to_dai_format(struct snd_sof_dev *sdev,
+				     struct snd_pcm_hw_params *params,
+				     struct sof_ipc4_pin_format *pin_fmts,
+				     u32 pin_fmts_size)
+{
+	u32 params_mask = BIT(SNDRV_PCM_HW_PARAM_RATE) |
+			  BIT(SNDRV_PCM_HW_PARAM_CHANNELS) |
+			  BIT(SNDRV_PCM_HW_PARAM_FORMAT);
+	struct sof_ipc4_audio_format *fmt;
+	u32 rate, channels, valid_bits;
+	int i;
+
+	fmt = &pin_fmts[0].audio_fmt;
+	rate = fmt->sampling_frequency;
+	channels = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(fmt->fmt_cfg);
+	valid_bits = SOF_IPC4_AUDIO_FORMAT_CFG_V_BIT_DEPTH(fmt->fmt_cfg);
+
+	/* check if parameters in topology defined formats are the same */
+	for (i = 1; i < pin_fmts_size; i++) {
+		u32 val;
+
+		fmt = &pin_fmts[i].audio_fmt;
+
+		if (params_mask & BIT(SNDRV_PCM_HW_PARAM_RATE)) {
+			val = fmt->sampling_frequency;
+			if (val != rate)
+				params_mask &= ~BIT(SNDRV_PCM_HW_PARAM_RATE);
+		}
+		if (params_mask & BIT(SNDRV_PCM_HW_PARAM_CHANNELS)) {
+			val = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(fmt->fmt_cfg);
+			if (val != channels)
+				params_mask &= ~BIT(SNDRV_PCM_HW_PARAM_CHANNELS);
+		}
+		if (params_mask & BIT(SNDRV_PCM_HW_PARAM_FORMAT)) {
+			val = SOF_IPC4_AUDIO_FORMAT_CFG_V_BIT_DEPTH(fmt->fmt_cfg);
+			if (val != valid_bits)
+				params_mask &= ~BIT(SNDRV_PCM_HW_PARAM_FORMAT);
+		}
+	}
+
+	if (params_mask)
+		return sof_ipc4_update_hw_params(sdev, params,
+						 &pin_fmts[0].audio_fmt,
+						 params_mask);
+
+	return 0;
+}
+
 static int
 sof_ipc4_prepare_dai_copier(struct snd_sof_dev *sdev, struct snd_sof_dai *dai,
 			    struct snd_pcm_hw_params *params, int dir)
@@ -1601,10 +1650,9 @@ sof_ipc4_prepare_dai_copier(struct snd_sof_dev *sdev, struct snd_sof_dai *dai,
 	available_fmt = &ipc4_copier->available_fmt;
 
 	/*
-	 * If the copier on the DAI side supports only single bit depth then
-	 * this depth (format) should be used to look for the NHLT blob (if
-	 * needed) and in case of capture this should be used for the input
-	 * format lookup
+	 * Fixup the params based on the format parameters of the DAI. If any
+	 * of the RATE, CHANNELS, bit depth is static among the formats then
+	 * narrow the params to only allow that specific parameter value.
 	 */
 	if (dir == SNDRV_PCM_STREAM_PLAYBACK) {
 		pin_fmts = available_fmt->output_pin_fmts;
@@ -1614,18 +1662,13 @@ sof_ipc4_prepare_dai_copier(struct snd_sof_dev *sdev, struct snd_sof_dai *dai,
 		num_pin_fmts = available_fmt->num_input_formats;
 	}
 
+	ret = sof_ipc4_adjust_params_to_dai_format(sdev, &dai_params, pin_fmts,
+						   num_pin_fmts);
+	if (ret)
+		return ret;
+
 	single_bitdepth = sof_ipc4_copier_is_single_bitdepth(sdev, pin_fmts,
 							     num_pin_fmts);
-
-	/* Update the dai_params with the only supported format */
-	if (single_bitdepth) {
-		ret = sof_ipc4_update_hw_params(sdev, &dai_params,
-						&pin_fmts[0].audio_fmt,
-						BIT(SNDRV_PCM_HW_PARAM_FORMAT));
-		if (ret)
-			return ret;
-	}
-
 	ret = snd_sof_get_nhlt_endpoint_data(sdev, dai, single_bitdepth,
 					     &dai_params,
 					     ipc4_copier->dai_index,
-- 
cgit v1.2.3


From 12870ae3818e39ea65bf710f645972277b634f72 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathanl@linux.ibm.com>
Date: Fri, 24 May 2024 14:29:54 -0500
Subject: powerpc/pseries/lparcfg: drop error message from guest name lookup

It's not an error or exceptional situation when the hosting
environment does not expose a name for the LP/guest via RTAS or the
device tree. This happens with qemu when run without the '-name'
option. The message also lacks a newline. Remove it.

Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Fixes: eddaa9a40275 ("powerpc/pseries: read the lpar name from the firmware")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20240524-lparcfg-updates-v2-1-62e2e9d28724@linux.ibm.com
---
 arch/powerpc/platforms/pseries/lparcfg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 6e7029640c0c..62da20f9700a 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -371,8 +371,8 @@ static int read_dt_lpar_name(struct seq_file *m)
 
 static void read_lpar_name(struct seq_file *m)
 {
-	if (read_rtas_lpar_name(m) && read_dt_lpar_name(m))
-		pr_err_once("Error can't get the LPAR name");
+	if (read_rtas_lpar_name(m))
+		read_dt_lpar_name(m);
 }
 
 #define SPLPAR_MAXLENGTH 1026*(sizeof(char))
-- 
cgit v1.2.3


From 2d43cc701b96f910f50915ac4c2a0cae5deb734c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 29 May 2024 22:30:28 +1000
Subject: powerpc/uaccess: Fix build errors seen with GCC 13/14

Building ppc64le_defconfig with GCC 14 fails with assembler errors:

    CC      fs/readdir.o
  /tmp/ccdQn0mD.s: Assembler messages:
  /tmp/ccdQn0mD.s:212: Error: operand out of domain (18 is not a multiple of 4)
  /tmp/ccdQn0mD.s:226: Error: operand out of domain (18 is not a multiple of 4)
  ... [6 lines]
  /tmp/ccdQn0mD.s:1699: Error: operand out of domain (18 is not a multiple of 4)

A snippet of the asm shows:

  # ../fs/readdir.c:210:         unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
         ld 9,0(29)       # MEM[(u64 *)name_38(D) + _88 * 1], MEM[(u64 *)name_38(D) + _88 * 1]
  # 210 "../fs/readdir.c" 1
         1:      std 9,18(8)     # put_user       # *__pus_addr_52, MEM[(u64 *)name_38(D) + _88 * 1]

The 'std' instruction requires a 4-byte aligned displacement because
it is a DS-form instruction, and as the assembler says, 18 is not a
multiple of 4.

A similar error is seen with GCC 13 and CONFIG_UBSAN_SIGNED_WRAP=y.

The fix is to change the constraint on the memory operand to put_user(),
from "m" which is a general memory reference to "YZ".

The "Z" constraint is documented in the GCC manual PowerPC machine
constraints, and specifies a "memory operand accessed with indexed or
indirect addressing". "Y" is not documented in the manual but specifies
a "memory operand for a DS-form instruction". Using both allows the
compiler to generate a DS-form "std" or X-form "stdx" as appropriate.

The change has to be conditional on CONFIG_PPC_KERNEL_PREFIXED because
the "Y" constraint does not guarantee 4-byte alignment when prefixed
instructions are enabled.

Unfortunately clang doesn't support the "Y" constraint so that has to be
behind an ifdef.

Although the build error is only seen with GCC 13/14, that appears
to just be luck. The constraint has been incorrect since it was first
added.

Fixes: c20beffeec3c ("powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()")
Cc: stable@vger.kernel.org # v5.10+
Suggested-by: Kewen Lin <linkw@gcc.gnu.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20240529123029.146953-1-mpe@ellerman.id.au
---
 arch/powerpc/include/asm/uaccess.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index de10437fd206..4cba724c8899 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -92,9 +92,25 @@ __pu_failed:							\
 		: label)
 #endif
 
+#ifdef CONFIG_CC_IS_CLANG
+#define DS_FORM_CONSTRAINT "Z<>"
+#else
+#define DS_FORM_CONSTRAINT "YZ<>"
+#endif
+
 #ifdef __powerpc64__
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
 #define __put_user_asm2_goto(x, ptr, label)			\
 	__put_user_asm_goto(x, ptr, label, "std")
+#else
+#define __put_user_asm2_goto(x, addr, label)			\
+	asm goto ("1: std%U1%X1 %0,%1	# put_user\n"		\
+		EX_TABLE(1b, %l2)				\
+		:						\
+		: "r" (x), DS_FORM_CONSTRAINT (*addr)		\
+		:						\
+		: label)
+#endif // CONFIG_PPC_KERNEL_PREFIXED
 #else /* __powerpc64__ */
 #define __put_user_asm2_goto(x, addr, label)			\
 	asm goto(					\
-- 
cgit v1.2.3


From 50934945d54238d2d6d8db4b7c1d4c90d2696c57 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 29 May 2024 22:30:29 +1000
Subject: powerpc/uaccess: Use YZ asm constraint for ld

The 'ld' instruction requires a 4-byte aligned displacement because it
is a DS-form instruction. But the "m" asm constraint doesn't enforce
that.

Add a special case of __get_user_asm2_goto() so that the "YZ" constraint
can be used for "ld".

The "Z" constraint is documented in the GCC manual PowerPC machine
constraints, and specifies a "memory operand accessed with indexed or
indirect addressing". "Y" is not documented in the manual but specifies
a "memory operand for a DS-form instruction". Using both allows the
compiler to generate a DS-form "ld" or X-form "ldx" as appropriate.

The change has to be conditional on CONFIG_PPC_KERNEL_PREFIXED because
the "Y" constraint does not guarantee 4-byte alignment when prefixed
instructions are enabled.

No build errors have been reported due to this, but the possibility is
there depending on compiler code generation decisions.

Fixes: c20beffeec3c ("powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20240529123029.146953-2-mpe@ellerman.id.au
---
 arch/powerpc/include/asm/uaccess.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 4cba724c8899..fd594bf6c6a9 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -181,8 +181,19 @@ do {								\
 #endif
 
 #ifdef __powerpc64__
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
 #define __get_user_asm2_goto(x, addr, label)			\
 	__get_user_asm_goto(x, addr, label, "ld")
+#else
+#define __get_user_asm2_goto(x, addr, label)			\
+	asm_goto_output(					\
+		"1:	ld%U1%X1 %0, %1	# get_user\n"		\
+		EX_TABLE(1b, %l2)				\
+		: "=r" (x)					\
+		: DS_FORM_CONSTRAINT (*addr)			\
+		:						\
+		: label)
+#endif // CONFIG_PPC_KERNEL_PREFIXED
 #else /* __powerpc64__ */
 #define __get_user_asm2_goto(x, addr, label)			\
 	asm_goto_output(					\
-- 
cgit v1.2.3


From be2fc65d66e0406cc9d39d40becaecdf4ee765f3 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland@sifive.com>
Date: Wed, 29 May 2024 09:28:50 -0700
Subject: powerpc: Limit ARCH_HAS_KERNEL_FPU_SUPPORT to PPC64

When building a 32-bit kernel, some toolchains do not allow mixing soft
float and hard float object files:

    LD      vmlinux.o
  powerpc64le-unknown-linux-musl-ld: lib/test_fpu_impl.o uses hard float, arch/powerpc/kernel/udbg.o uses soft float
  powerpc64le-unknown-linux-musl-ld: failed to merge target specific data of file lib/test_fpu_impl.o
  make[2]: *** [scripts/Makefile.vmlinux_o:62: vmlinux.o] Error 1
  make[1]: *** [Makefile:1152: vmlinux_o] Error 2
  make: *** [Makefile:240: __sub-make] Error 2

This is not an issue when building a 64-bit kernel. To unbreak the
build, limit ARCH_HAS_KERNEL_FPU_SUPPORT to 64-bit kernels. This is okay
because the only real user of this option, amdgpu, was previously
limited to PPC64 anyway; see commit a28e4b672f04 ("drm/amd/display: use
ARCH_HAS_KERNEL_FPU_SUPPORT").

Fixes: 01db473e1aa3 ("powerpc: implement ARCH_HAS_KERNEL_FPU_SUPPORT")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202405250851.Z4daYSWG-lkp@intel.com/
Reported-by: Guenter Roeck <linux@roeck-us.net>
Closes: https://lore.kernel.org/lkml/eeffaec3-df63-4e55-ab7a-064a65c00efa@roeck-us.net/
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20240529162852.1209-1-samuel.holland@sifive.com
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3c968f2f4ac4..c88c6d46a5bc 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -137,7 +137,7 @@ config PPC
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_HUGEPD			if HUGETLB_PAGE
 	select ARCH_HAS_KCOV
-	select ARCH_HAS_KERNEL_FPU_SUPPORT	if PPC_FPU
+	select ARCH_HAS_KERNEL_FPU_SUPPORT	if PPC64 && PPC_FPU
 	select ARCH_HAS_MEMBARRIER_CALLBACKS
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_MEMREMAP_COMPAT_ALIGN	if PPC_64S_HASH_MMU
-- 
cgit v1.2.3


From 34bf6bae3286a58762711cfbce2cf74ecd42e1b5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 28 May 2024 22:21:31 +0200
Subject: x86/topology/amd: Evaluate SMT in CPUID leaf 0x8000001e only on
 family 0x17 and greater

The new AMD/HYGON topology parser evaluates the SMT information in CPUID leaf
0x8000001e unconditionally while the original code restricted it to CPUs with
family 0x17 and greater.

This breaks family 0x15 CPUs which advertise that leaf and have a non-zero
value in the SMT section. The machine boots, but the scheduler complains loudly
about the mismatch of the core IDs:

  WARNING: CPU: 1 PID: 0 at kernel/sched/core.c:6482 sched_cpu_starting+0x183/0x250
  WARNING: CPU: 0 PID: 1 at kernel/sched/topology.c:2408 build_sched_domains+0x76b/0x12b0

Add the condition back to cure it.

  [ bp: Make it actually build because grandpa is not concerned with
    trivial stuff. :-P ]

Fixes: f7fb3b2dd92c ("x86/cpu: Provide an AMD/HYGON specific topology parser")
Closes: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/56
Reported-by: Tim Teichmann <teichmanntim@outlook.de>
Reported-by: Christian Heusel <christian@heusel.eu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Tim Teichmann <teichmanntim@outlook.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/7skhx6mwe4hxiul64v6azhlxnokheorksqsdbp7qw6g2jduf6c@7b5pvomauugk
---
 arch/x86/kernel/cpu/topology_amd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c
index d419deed6a48..7d476fa697ca 100644
--- a/arch/x86/kernel/cpu/topology_amd.c
+++ b/arch/x86/kernel/cpu/topology_amd.c
@@ -84,9 +84,9 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_topoext)
 
 	/*
 	 * If leaf 0xb is available, then the domain shifts are set
-	 * already and nothing to do here.
+	 * already and nothing to do here. Only valid for family >= 0x17.
 	 */
-	if (!has_topoext) {
+	if (!has_topoext && tscan->c->x86 >= 0x17) {
 		/*
 		 * Leaf 0x80000008 set the CORE domain shift already.
 		 * Update the SMT domain, but do not propagate it.
-- 
cgit v1.2.3


From e112311615a24e1618a591c73506571dc304eb8d Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Thu, 30 May 2024 07:23:39 -0700
Subject: io_uring/rw: Free iovec before cleaning async data

kmemleak shows that there is a memory leak in io_uring read operation,
where a buffer is allocated at iovec import, but never de-allocated.

The memory is allocated at io_async_rw->free_iovec, but, then
io_async_rw is kfreed, taking the allocated memory with it. I saw this
happening when the read operation fails with -11 (EAGAIN).

This is the kmemleak splat.

    unreferenced object 0xffff8881da591c00 (size 256):
...
      backtrace (crc 7a15bdee):
	[<00000000256f2de4>] __kmalloc+0x2d6/0x410
	[<000000007a9f5fc7>] iovec_from_user.part.0+0xc6/0x160
	[<00000000cecdf83a>] __import_iovec+0x50/0x220
	[<00000000d1d586a2>] __io_import_iovec+0x13d/0x220
	[<0000000054ee9bd2>] io_prep_rw+0x186/0x340
	[<00000000a9c0372d>] io_prep_rwv+0x31/0x120
	[<000000001d1170b9>] io_prep_readv+0xe/0x30
	[<0000000070b8eb67>] io_submit_sqes+0x1bd/0x780
	[<00000000812496d4>] __do_sys_io_uring_enter+0x3ed/0x5b0
	[<0000000081499602>] do_syscall_64+0x5d/0x170
	[<00000000de1c5a4d>] entry_SYSCALL_64_after_hwframe+0x76/0x7e

This occurs because the async data cleanup functions are not set for
read/write operations. As a result, the potentially allocated iovec in
the rw async data is not freed before the async data is released,
leading to a memory leak.

With this following patch, kmemleak does not show the leaked memory
anymore, and all liburing tests pass.

Fixes: a9165b83c193 ("io_uring/rw: always setup io_async_rw for read/write requests")
Signed-off-by: Breno Leitao <leitao@debian.org>
Link: https://lore.kernel.org/r/20240530142340.1248216-1-leitao@debian.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/opdef.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 2de5cca9504e..2e3b7b16effb 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -516,10 +516,12 @@ const struct io_cold_def io_cold_defs[] = {
 	},
 	[IORING_OP_READ_FIXED] = {
 		.name			= "READ_FIXED",
+		.cleanup		= io_readv_writev_cleanup,
 		.fail			= io_rw_fail,
 	},
 	[IORING_OP_WRITE_FIXED] = {
 		.name			= "WRITE_FIXED",
+		.cleanup		= io_readv_writev_cleanup,
 		.fail			= io_rw_fail,
 	},
 	[IORING_OP_POLL_ADD] = {
@@ -582,10 +584,12 @@ const struct io_cold_def io_cold_defs[] = {
 	},
 	[IORING_OP_READ] = {
 		.name			= "READ",
+		.cleanup		= io_readv_writev_cleanup,
 		.fail			= io_rw_fail,
 	},
 	[IORING_OP_WRITE] = {
 		.name			= "WRITE",
+		.cleanup		= io_readv_writev_cleanup,
 		.fail			= io_rw_fail,
 	},
 	[IORING_OP_FADVISE] = {
@@ -692,6 +696,7 @@ const struct io_cold_def io_cold_defs[] = {
 	},
 	[IORING_OP_READ_MULTISHOT] = {
 		.name			= "READ_MULTISHOT",
+		.cleanup		= io_readv_writev_cleanup,
 	},
 	[IORING_OP_WAITID] = {
 		.name			= "WAITID",
-- 
cgit v1.2.3


From 328af04b1ac279f17c09ddd5958aa6ec47131c80 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Mon, 27 May 2024 23:43:38 +0300
Subject: media: mei: csi: Put the IPU device reference

The mei csi's probe function obtains a reference to the IPU device but
never puts that reference. Do that now.

Fixes: 33116eb12c6b ("media: ivsc: csi: Use IPU bridge")
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 drivers/media/pci/intel/ivsc/mei_csi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/pci/intel/ivsc/mei_csi.c b/drivers/media/pci/intel/ivsc/mei_csi.c
index 89b582a221ab..53151d016188 100644
--- a/drivers/media/pci/intel/ivsc/mei_csi.c
+++ b/drivers/media/pci/intel/ivsc/mei_csi.c
@@ -677,6 +677,7 @@ static int mei_csi_probe(struct mei_cl_device *cldev,
 		return -ENODEV;
 
 	ret = ipu_bridge_init(&ipu->dev, ipu_bridge_parse_ssdb);
+	put_device(&ipu->dev);
 	if (ret < 0)
 		return ret;
 	if (WARN_ON(!dev_fwnode(dev)))
-- 
cgit v1.2.3


From cc864821c7e8b921ebbfb21b17c92f8b3ea3d7ff Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Mon, 27 May 2024 23:13:27 +0300
Subject: media: mei: csi: Warn less verbosely of a missing device fwnode

The check for having device fwnode was meant to be a sanity check but this
also happens if the ACPI DSDT has graph port nodes on sensor device(s) but
not on the IVSC device. Use a more meaningful warning message to tell
about this.

Fixes: 33116eb12c6b ("media: ivsc: csi: Use IPU bridge")
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 drivers/media/pci/intel/ivsc/mei_csi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/media/pci/intel/ivsc/mei_csi.c b/drivers/media/pci/intel/ivsc/mei_csi.c
index 53151d016188..f04a89584334 100644
--- a/drivers/media/pci/intel/ivsc/mei_csi.c
+++ b/drivers/media/pci/intel/ivsc/mei_csi.c
@@ -680,8 +680,10 @@ static int mei_csi_probe(struct mei_cl_device *cldev,
 	put_device(&ipu->dev);
 	if (ret < 0)
 		return ret;
-	if (WARN_ON(!dev_fwnode(dev)))
+	if (!dev_fwnode(dev)) {
+		dev_err(dev, "mei-csi probed without device fwnode!\n");
 		return -ENXIO;
+	}
 
 	csi = devm_kzalloc(dev, sizeof(struct mei_csi), GFP_KERNEL);
 	if (!csi)
-- 
cgit v1.2.3


From 54880795b494e6557b47fe3cf3f21f2f54c2aa68 Mon Sep 17 00:00:00 2001
From: Bingbu Cao <bingbu.cao@intel.com>
Date: Wed, 29 May 2024 11:44:32 +0800
Subject: media: intel/ipu6: update the maximum supported csi2 port number to 6

IPU6EP on Meteor Lake SoC supports maximum 6 csi2 ports instead of 4.

Fixes: 25fedc021985 ("media: intel/ipu6: add Intel IPU6 PCI device driver")
Signed-off-by: Bingbu Cao <bingbu.cao@intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 drivers/media/pci/intel/ipu6/ipu6.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/pci/intel/ipu6/ipu6.c b/drivers/media/pci/intel/ipu6/ipu6.c
index f587f609259d..bbd646378ab3 100644
--- a/drivers/media/pci/intel/ipu6/ipu6.c
+++ b/drivers/media/pci/intel/ipu6/ipu6.c
@@ -285,7 +285,7 @@ EXPORT_SYMBOL_NS_GPL(ipu6_configure_spc, INTEL_IPU6);
 #define IPU6_ISYS_CSI2_NPORTS		4
 #define IPU6SE_ISYS_CSI2_NPORTS		4
 #define IPU6_TGL_ISYS_CSI2_NPORTS	8
-#define IPU6EP_MTL_ISYS_CSI2_NPORTS	4
+#define IPU6EP_MTL_ISYS_CSI2_NPORTS	6
 
 static void ipu6_internal_pdata_init(struct ipu6_device *isp)
 {
-- 
cgit v1.2.3


From ffb9072bce200a4d004006e8b40c366933cf517b Mon Sep 17 00:00:00 2001
From: Bingbu Cao <bingbu.cao@intel.com>
Date: Wed, 29 May 2024 14:43:21 +0800
Subject: media: intel/ipu6: add csi2 port sanity check in notifier bound

Invalid csi2 port will break the isys notifier bound ops as it is
trying to access an invalid csi2 sub-device instance based on the
port. It will trigger a mc warning, and it will cause the sensor
driver to unbound an inexistent isys csi2 and crash. Adding a
csi2 port sanity check, return error to avoid such case.

Fixes: f50c4ca0a820 ("media: intel/ipu6: add the main input system driver")
Signed-off-by: Bingbu Cao <bingbu.cao@intel.com>
[Sakari Ailus: Fix spelling of "nports" field.]
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
---
 drivers/media/pci/intel/ipu6/ipu6-isys.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/media/pci/intel/ipu6/ipu6-isys.c b/drivers/media/pci/intel/ipu6/ipu6-isys.c
index 1998b72ac07d..8b9b77719bb1 100644
--- a/drivers/media/pci/intel/ipu6/ipu6-isys.c
+++ b/drivers/media/pci/intel/ipu6/ipu6-isys.c
@@ -678,6 +678,12 @@ static int isys_notifier_bound(struct v4l2_async_notifier *notifier,
 		container_of(asc, struct sensor_async_sd, asc);
 	int ret;
 
+	if (s_asd->csi2.port >= isys->pdata->ipdata->csi2.nports) {
+		dev_err(&isys->adev->auxdev.dev, "invalid csi2 port %u\n",
+			s_asd->csi2.port);
+		return -EINVAL;
+	}
+
 	ret = ipu_bridge_instantiate_vcm(sd->dev);
 	if (ret) {
 		dev_err(&isys->adev->auxdev.dev, "instantiate vcm failed\n");
-- 
cgit v1.2.3


From 2a38e4ca302280fdcce370ba2bee79bac16c4587 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 17 May 2024 13:05:34 -0700
Subject: x86/cpu: Provide default cache line size if not enumerated
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

tl;dr: CPUs with CPUID.80000008H but without CPUID.01H:EDX[CLFSH]
will end up reporting cache_line_size()==0 and bad things happen.
Fill in a default on those to avoid the problem.

Long Story:

The kernel dies a horrible death if c->x86_cache_alignment (aka.
cache_line_size() is 0.  Normally, this value is populated from
c->x86_clflush_size.

Right now the code is set up to get c->x86_clflush_size from two
places.  First, modern CPUs get it from CPUID.  Old CPUs that don't
have leaf 0x80000008 (or CPUID at all) just get some sane defaults
from the kernel in get_cpu_address_sizes().

The vast majority of CPUs that have leaf 0x80000008 also get
->x86_clflush_size from CPUID.  But there are oddballs.

Intel Quark CPUs[1] and others[2] have leaf 0x80000008 but don't set
CPUID.01H:EDX[CLFSH], so they skip over filling in ->x86_clflush_size:

	cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
	if (cap0 & (1<<19))
		c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;

So they: land in get_cpu_address_sizes() and see that CPUID has level
0x80000008 and jump into the side of the if() that does not fill in
c->x86_clflush_size.  That assigns a 0 to c->x86_cache_alignment, and
hilarity ensues in code like:

        buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
                         GFP_KERNEL);

To fix this, always provide a sane value for ->x86_clflush_size.

Big thanks to Andy Shevchenko for finding and reporting this and also
providing a first pass at a fix. But his fix was only partial and only
worked on the Quark CPUs.  It would not, for instance, have worked on
the QEMU config.

1. https://raw.githubusercontent.com/InstLatx64/InstLatx64/master/GenuineIntel/GenuineIntel0000590_Clanton_03_CPUID.txt
2. You can also get this behavior if you use "-cpu 486,+clzero"
   in QEMU.

[ dhansen: remove 'vp_bits_from_cpuid' reference in changelog
	   because bpetkov brutally murdered it recently. ]

Fixes: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach")
Reported-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Jörn Heusipp <osmanx@heusipp.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/all/20240516173928.3960193-1-andriy.shevchenko@linux.intel.com/
Link: https://lore.kernel.org/lkml/5e31cad3-ad4d-493e-ab07-724cfbfaba44@heusipp.de/
Link: https://lore.kernel.org/all/20240517200534.8EC5F33E%40davehans-spike.ostc.intel.com
---
 arch/x86/kernel/cpu/common.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2b170da84f97..e31293c9609f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1075,6 +1075,10 @@ void get_cpu_address_sizes(struct cpuinfo_x86 *c)
 
 		c->x86_virt_bits = (eax >> 8) & 0xff;
 		c->x86_phys_bits = eax & 0xff;
+
+		/* Provide a sane default if not enumerated: */
+		if (!c->x86_clflush_size)
+			c->x86_clflush_size = 32;
 	}
 
 	c->x86_cache_bits = c->x86_phys_bits;
-- 
cgit v1.2.3


From 027a44fedd55fbdf1d45603894634acd960ad04b Mon Sep 17 00:00:00 2001
From: Peter Colberg <peter.colberg@intel.com>
Date: Tue, 21 May 2024 14:12:46 -0400
Subject: hwmon: (intel-m10-bmc-hwmon) Fix multiplier for N6000 board power
 sensor

The Intel N6000 BMC outputs the board power value in milliwatt, whereas
the hwmon sysfs interface must provide power values in microwatt.

Fixes: e1983220ae14 ("hwmon: intel-m10-bmc-hwmon: Add N6000 sensors")
Signed-off-by: Peter Colberg <peter.colberg@intel.com>
Reviewed-by: Matthew Gerlach <matthew.gerlach@linux.intel.com>
Link: https://lore.kernel.org/r/20240521181246.683833-1-peter.colberg@intel.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/intel-m10-bmc-hwmon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/intel-m10-bmc-hwmon.c b/drivers/hwmon/intel-m10-bmc-hwmon.c
index 6500ca548f9c..ca2dff158925 100644
--- a/drivers/hwmon/intel-m10-bmc-hwmon.c
+++ b/drivers/hwmon/intel-m10-bmc-hwmon.c
@@ -429,7 +429,7 @@ static const struct m10bmc_sdata n6000bmc_curr_tbl[] = {
 };
 
 static const struct m10bmc_sdata n6000bmc_power_tbl[] = {
-	{ 0x724, 0x0, 0x0, 0x0, 0x0, 1, "Board Power" },
+	{ 0x724, 0x0, 0x0, 0x0, 0x0, 1000, "Board Power" },
 };
 
 static const struct hwmon_channel_info * const n6000bmc_hinfo[] = {
-- 
cgit v1.2.3


From 52a2c70c3ec555e670a34dd1ab958986451d2dd2 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 30 May 2024 08:20:14 -0700
Subject: hwmon: (shtc1) Fix property misspelling

The property name is "sensirion,low-precision", not
"sensicon,low-precision".

Cc: Chris Ruehl <chris.ruehl@gtsys.com.hk>
Fixes: be7373b60df5 ("hwmon: shtc1: add support for device tree bindings")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/shtc1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/shtc1.c b/drivers/hwmon/shtc1.c
index 1f96e94967ee..439dd3dba5fc 100644
--- a/drivers/hwmon/shtc1.c
+++ b/drivers/hwmon/shtc1.c
@@ -238,7 +238,7 @@ static int shtc1_probe(struct i2c_client *client)
 
 	if (np) {
 		data->setup.blocking_io = of_property_read_bool(np, "sensirion,blocking-io");
-		data->setup.high_precision = !of_property_read_bool(np, "sensicon,low-precision");
+		data->setup.high_precision = !of_property_read_bool(np, "sensirion,low-precision");
 	} else {
 		if (client->dev.platform_data)
 			data->setup = *(struct shtc1_platform_data *)dev->platform_data;
-- 
cgit v1.2.3


From 41011e2de3480f9adb6420b35b67628b2d903355 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 28 May 2024 11:06:31 +0100
Subject: KVM: arm64: nv: Fix relative priorities of exceptions generated by
 ERETAx

ERETAx can fail in multiple ways:

(1) ELR_EL2 points lalaland
(2) we get a PAC failure
(3) SPSR_EL2 has the wrong mode

(1) is easy, as we just let the CPU do its thing and deliver an
Instruction Abort. However, (2) and (3) are interesting, because
the PAC failure priority is way below that of the Illegal Execution
State exception.

Which means that if we have detected a PAC failure (and that we have
FPACCOMBINE), we must be careful to give priority to the Illegal
Execution State exception, should one be pending.

Solving this involves hoisting the SPSR calculation earlier and
testing for the IL bit before injecting the FPAC exception.

In the extreme case of a ERETAx returning to an invalid mode *and*
failing its PAC check, we end up with an Instruction Abort (due
to the new PC being mangled by the failed Auth) *and* PSTATE.IL
being set. Which matches the requirements of the architecture.

Whilst we're at it, remove a stale comment that states the obvious
and only confuses the reader.

Fixes: 213b3d1ea161 ("KVM: arm64: nv: Handle ERETA[AB] instructions")
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20240528100632.1831995-2-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/emulate-nested.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 72d733c74a38..54090967a335 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -2181,16 +2181,23 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
 	if (forward_traps(vcpu, HCR_NV))
 		return;
 
+	spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
+	spsr = kvm_check_illegal_exception_return(vcpu, spsr);
+
 	/* Check for an ERETAx */
 	esr = kvm_vcpu_get_esr(vcpu);
 	if (esr_iss_is_eretax(esr) && !kvm_auth_eretax(vcpu, &elr)) {
 		/*
-		 * Oh no, ERETAx failed to authenticate.  If we have
-		 * FPACCOMBINE, deliver an exception right away.  If we
-		 * don't, then let the mangled ELR value trickle down the
+		 * Oh no, ERETAx failed to authenticate.
+		 *
+		 * If we have FPACCOMBINE and we don't have a pending
+		 * Illegal Execution State exception (which has priority
+		 * over FPAC), deliver an exception right away.
+		 *
+		 * Otherwise, let the mangled ELR value trickle down the
 		 * ERET handling, and the guest will have a little surprise.
 		 */
-		if (kvm_has_pauth(vcpu->kvm, FPACCOMBINE)) {
+		if (kvm_has_pauth(vcpu->kvm, FPACCOMBINE) && !(spsr & PSR_IL_BIT)) {
 			esr &= ESR_ELx_ERET_ISS_ERETA;
 			esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_FPAC);
 			kvm_inject_nested_sync(vcpu, esr);
@@ -2201,17 +2208,11 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
 	preempt_disable();
 	kvm_arch_vcpu_put(vcpu);
 
-	spsr = __vcpu_sys_reg(vcpu, SPSR_EL2);
-	spsr = kvm_check_illegal_exception_return(vcpu, spsr);
 	if (!esr_iss_is_eretax(esr))
 		elr = __vcpu_sys_reg(vcpu, ELR_EL2);
 
 	trace_kvm_nested_eret(vcpu, elr, spsr);
 
-	/*
-	 * Note that the current exception level is always the virtual EL2,
-	 * since we set HCR_EL2.NV bit only when entering the virtual EL2.
-	 */
 	*vcpu_pc(vcpu) = elr;
 	*vcpu_cpsr(vcpu) = spsr;
 
-- 
cgit v1.2.3


From 47eb2d68d10208e6a9e89d10b66018e8d6ca0623 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 28 May 2024 11:06:32 +0100
Subject: KVM: arm64: nv: Expose BTI and CSV_frac to a guest hypervisor

Now that we expose PAC to NV guests, we can also expose BTI (as
the two as joined at the hip, due to some of the PAC instructions
being landing pads).

While we're at it, also propagate CSV_frac, which requires no
particular emulation.

Fixes: f4f6a95bac49 ("KVM: arm64: nv: Advertise support for PAuth")
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Link: https://lore.kernel.org/r/20240528100632.1831995-3-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/nested.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 6813c7c7f00a..bae8536cbf00 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -58,8 +58,10 @@ static u64 limit_nv_id_reg(u32 id, u64 val)
 		break;
 
 	case SYS_ID_AA64PFR1_EL1:
-		/* Only support SSBS */
-		val &= NV_FTR(PFR1, SSBS);
+		/* Only support BTI, SSBS, CSV2_frac */
+		val &= (NV_FTR(PFR1, BT)	|
+			NV_FTR(PFR1, SSBS)	|
+			NV_FTR(PFR1, CSV2_frac));
 		break;
 
 	case SYS_ID_AA64MMFR0_EL1:
-- 
cgit v1.2.3


From a638b0461b58aa3205cd9d5f14d6f703d795b4af Mon Sep 17 00:00:00 2001
From: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
Date: Thu, 23 May 2024 11:43:23 +0300
Subject: riscv: prevent pt_regs corruption for secondary idle threads

Top of the kernel thread stack should be reserved for pt_regs. However
this is not the case for the idle threads of the secondary boot harts.
Their stacks overlap with their pt_regs, so both may get corrupted.

Similar issue has been fixed for the primary hart, see c7cdd96eca28
("riscv: prevent stack corruption by reserving task_pt_regs(p) early").
However that fix was not propagated to the secondary harts. The problem
has been noticed in some CPU hotplug tests with V enabled. The function
smp_callin stored several registers on stack, corrupting top of pt_regs
structure including status field. As a result, kernel attempted to save
or restore inexistent V context.

Fixes: 9a2451f18663 ("RISC-V: Avoid using per cpu array for ordered booting")
Fixes: 2875fe056156 ("RISC-V: Add cpu_ops and modify default booting method")
Signed-off-by: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Link: https://lore.kernel.org/r/20240523084327.2013211-1-geomatsi@gmail.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/cpu_ops_sbi.c      | 2 +-
 arch/riscv/kernel/cpu_ops_spinwait.c | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c
index 1cc7df740edd..e6fbaaf54956 100644
--- a/arch/riscv/kernel/cpu_ops_sbi.c
+++ b/arch/riscv/kernel/cpu_ops_sbi.c
@@ -72,7 +72,7 @@ static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle)
 	/* Make sure tidle is updated */
 	smp_mb();
 	bdata->task_ptr = tidle;
-	bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE;
+	bdata->stack_ptr = task_pt_regs(tidle);
 	/* Make sure boot data is updated */
 	smp_mb();
 	hsm_data = __pa(bdata);
diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c
index 613872b0a21a..24869eb88908 100644
--- a/arch/riscv/kernel/cpu_ops_spinwait.c
+++ b/arch/riscv/kernel/cpu_ops_spinwait.c
@@ -34,8 +34,7 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid,
 
 	/* Make sure tidle is updated */
 	smp_mb();
-	WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid],
-		   task_stack_page(tidle) + THREAD_SIZE);
+	WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], task_pt_regs(tidle));
 	WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle);
 }
 
-- 
cgit v1.2.3


From 7bed51617401dab2be930b13ed5aacf581f7c8ef Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao@linutronix.de>
Date: Sun, 26 May 2024 13:01:04 +0200
Subject: riscv: enable HAVE_ARCH_HUGE_VMAP for XIP kernel

HAVE_ARCH_HUGE_VMAP also works on XIP kernel, so remove its dependency on
!XIP_KERNEL.

This also fixes a boot problem for XIP kernel introduced by the commit in
"Fixes:". This commit used huge page mapping for vmemmap, but huge page
vmap was not enabled for XIP kernel.

Fixes: ff172d4818ad ("riscv: Use hugepage mappings for vmemmap")
Signed-off-by: Nam Cao <namcao@linutronix.de>
Cc: <stable@vger.kernel.org>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Link: https://lore.kernel.org/r/20240526110104.470429-1-namcao@linutronix.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index b94176e25be1..0525ee2d63c7 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -106,7 +106,7 @@ config RISCV
 	select HAS_IOPORT if MMU
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
-	select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL
+	select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
 	select HAVE_ARCH_KASAN if MMU && 64BIT
-- 
cgit v1.2.3


From 982a7eb97be685d1129c06671aed4c26d6919af4 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@rivosinc.com>
Date: Fri, 24 May 2024 11:56:00 -0700
Subject: Documentation: RISC-V: uabi: Only scalar misaligned loads are
 supported

We're stuck supporting scalar misaligned loads in userspace because they
were part of the ISA at the time we froze the uABI.  That wasn't the
case for vector misaligned accesses, so depending on them
unconditionally is a userspace bug.  All extant vector hardware traps on
these misaligned accesses.

Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20240524185600.5919-1-palmer@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 Documentation/arch/riscv/uabi.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/arch/riscv/uabi.rst b/Documentation/arch/riscv/uabi.rst
index 54d199dce78b..2b420bab0527 100644
--- a/Documentation/arch/riscv/uabi.rst
+++ b/Documentation/arch/riscv/uabi.rst
@@ -65,4 +65,6 @@ the extension, or may have deliberately removed it from the listing.
 Misaligned accesses
 -------------------
 
-Misaligned accesses are supported in userspace, but they may perform poorly.
+Misaligned scalar accesses are supported in userspace, but they may perform
+poorly.  Misaligned vector accesses are only supported if the Zicclsm extension
+is supported.
-- 
cgit v1.2.3


From 1d84afaf02524d2558e8ca3ca169be2ef720380b Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alexghiti@rivosinc.com>
Date: Thu, 30 May 2024 16:55:46 +0200
Subject: riscv: Fix fully ordered LR/SC xchg[8|16]() implementations

The fully ordered versions of xchg[8|16]() using LR/SC lack the
necessary memory barriers to guarantee the order.

Fix this by matching what is already implemented in the fully ordered
versions of cmpxchg() using LR/SC.

Suggested-by: Andrea Parri <parri.andrea@gmail.com>
Reported-by: Andrea Parri <parri.andrea@gmail.com>
Closes: https://lore.kernel.org/linux-riscv/ZlYbupL5XgzgA0MX@andrea/T/#u
Fixes: a8ed2b7a2c13 ("riscv/cmpxchg: Implement xchg for variables of size 1 and 2")
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Andrea Parri <parri.andrea@gmail.com>
Link: https://lore.kernel.org/r/20240530145546.394248-1-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/cmpxchg.h | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index ddb002ed89de..808b4c78462e 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -10,7 +10,7 @@
 
 #include <asm/fence.h>
 
-#define __arch_xchg_masked(prepend, append, r, p, n)			\
+#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n)		\
 ({									\
 	u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);			\
 	ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;	\
@@ -25,7 +25,7 @@
 	       "0:	lr.w %0, %2\n"					\
 	       "	and  %1, %0, %z4\n"				\
 	       "	or   %1, %1, %z3\n"				\
-	       "	sc.w %1, %1, %2\n"				\
+	       "	sc.w" sc_sfx " %1, %1, %2\n"			\
 	       "	bnez %1, 0b\n"					\
 	       append							\
 	       : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))	\
@@ -46,7 +46,8 @@
 		: "memory");						\
 })
 
-#define _arch_xchg(ptr, new, sfx, prepend, append)			\
+#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,			\
+		   sc_append, swap_append)				\
 ({									\
 	__typeof__(ptr) __ptr = (ptr);					\
 	__typeof__(*(__ptr)) __new = (new);				\
@@ -55,15 +56,15 @@
 	switch (sizeof(*__ptr)) {					\
 	case 1:								\
 	case 2:								\
-		__arch_xchg_masked(prepend, append,			\
+		__arch_xchg_masked(sc_sfx, prepend, sc_append,		\
 				   __ret, __ptr, __new);		\
 		break;							\
 	case 4:								\
-		__arch_xchg(".w" sfx, prepend, append,			\
+		__arch_xchg(".w" swap_sfx, prepend, swap_append,	\
 			      __ret, __ptr, __new);			\
 		break;							\
 	case 8:								\
-		__arch_xchg(".d" sfx, prepend, append,			\
+		__arch_xchg(".d" swap_sfx, prepend, swap_append,	\
 			      __ret, __ptr, __new);			\
 		break;							\
 	default:							\
@@ -73,16 +74,17 @@
 })
 
 #define arch_xchg_relaxed(ptr, x)					\
-	_arch_xchg(ptr, x, "", "", "")
+	_arch_xchg(ptr, x, "", "", "", "", "")
 
 #define arch_xchg_acquire(ptr, x)					\
-	_arch_xchg(ptr, x, "", "", RISCV_ACQUIRE_BARRIER)
+	_arch_xchg(ptr, x, "", "", "",					\
+		   RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
 
 #define arch_xchg_release(ptr, x)					\
-	_arch_xchg(ptr, x, "", RISCV_RELEASE_BARRIER, "")
+	_arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
 
 #define arch_xchg(ptr, x)						\
-	_arch_xchg(ptr, x, ".aqrl", "", "")
+	_arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
 
 #define xchg32(ptr, x)							\
 ({									\
-- 
cgit v1.2.3


From cb708ab9f584f159798b60853edcf0c8b67ce295 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Tue, 28 May 2024 19:29:38 -0700
Subject: selftests/futex: pass _GNU_SOURCE without a value to the compiler

It's slightly better to set _GNU_SOURCE in the source code, but if one
must do it via the compiler invocation, then the best way to do so is
this:

    $(CC) -D_GNU_SOURCE=

...because otherwise, if this form is used:

    $(CC) -D_GNU_SOURCE

...then that leads the compiler to set a value, as if you had passed in:

    $(CC) -D_GNU_SOURCE=1

That, in turn, leads to warnings under both gcc and clang, like this:

    futex_requeue_pi.c:20: warning: "_GNU_SOURCE" redefined

Fix this by using the "-D_GNU_SOURCE=" form.

Reviewed-by: Edward Liaw <edliaw@google.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/futex/functional/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index a392d0917b4e..994fa3468f17 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 INCLUDES := -I../include -I../../ $(KHDR_INCLUDES)
-CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) $(KHDR_INCLUDES)
+CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE= -pthread $(INCLUDES) $(KHDR_INCLUDES)
 LDLIBS := -lpthread -lrt
 
 LOCAL_HDRS := \
-- 
cgit v1.2.3


From 18414a4a2eabb0281d12d374c92874327e0e3fe3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 30 May 2024 13:35:50 -0600
Subject: io_uring/net: assign kmsg inq/flags before buffer selection

syzbot reports that recv is using an uninitialized value:

=====================================================
BUG: KMSAN: uninit-value in io_req_cqe_overflow io_uring/io_uring.c:810 [inline]
BUG: KMSAN: uninit-value in io_req_complete_post io_uring/io_uring.c:937 [inline]
BUG: KMSAN: uninit-value in io_issue_sqe+0x1f1b/0x22c0 io_uring/io_uring.c:1763
 io_req_cqe_overflow io_uring/io_uring.c:810 [inline]
 io_req_complete_post io_uring/io_uring.c:937 [inline]
 io_issue_sqe+0x1f1b/0x22c0 io_uring/io_uring.c:1763
 io_wq_submit_work+0xa17/0xeb0 io_uring/io_uring.c:1860
 io_worker_handle_work+0xc04/0x2000 io_uring/io-wq.c:597
 io_wq_worker+0x447/0x1410 io_uring/io-wq.c:651
 ret_from_fork+0x6d/0x90 arch/x86/kernel/process.c:147
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244

Uninit was stored to memory at:
 io_req_set_res io_uring/io_uring.h:215 [inline]
 io_recv_finish+0xf10/0x1560 io_uring/net.c:861
 io_recv+0x12ec/0x1ea0 io_uring/net.c:1175
 io_issue_sqe+0x429/0x22c0 io_uring/io_uring.c:1751
 io_wq_submit_work+0xa17/0xeb0 io_uring/io_uring.c:1860
 io_worker_handle_work+0xc04/0x2000 io_uring/io-wq.c:597
 io_wq_worker+0x447/0x1410 io_uring/io-wq.c:651
 ret_from_fork+0x6d/0x90 arch/x86/kernel/process.c:147
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244

Uninit was created at:
 slab_post_alloc_hook mm/slub.c:3877 [inline]
 slab_alloc_node mm/slub.c:3918 [inline]
 __do_kmalloc_node mm/slub.c:4038 [inline]
 __kmalloc+0x6e4/0x1060 mm/slub.c:4052
 kmalloc include/linux/slab.h:632 [inline]
 io_alloc_async_data+0xc0/0x220 io_uring/io_uring.c:1662
 io_msg_alloc_async io_uring/net.c:166 [inline]
 io_recvmsg_prep_setup io_uring/net.c:725 [inline]
 io_recvmsg_prep+0xbe8/0x1a20 io_uring/net.c:806
 io_init_req io_uring/io_uring.c:2135 [inline]
 io_submit_sqe io_uring/io_uring.c:2182 [inline]
 io_submit_sqes+0x1135/0x2f10 io_uring/io_uring.c:2335
 __do_sys_io_uring_enter io_uring/io_uring.c:3246 [inline]
 __se_sys_io_uring_enter+0x40f/0x3c80 io_uring/io_uring.c:3183
 __x64_sys_io_uring_enter+0x11f/0x1a0 io_uring/io_uring.c:3183
 x64_sys_call+0x2c0/0x3b50 arch/x86/include/generated/asm/syscalls_64.h:427
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0xcf/0x1e0 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x77/0x7f

which appears to be io_recv_finish() reading kmsg->msg.msg_inq to decide
if it needs to set IORING_CQE_F_SOCK_NONEMPTY or not. If the recv is
entered with buffer selection, but no buffer is available, then we jump
error path which calls io_recv_finish() without having assigned
kmsg->msg_inq. This might cause an errant setting of the NONEMPTY flag
for a request get gets errored with -ENOBUFS.

Reported-by: syzbot+b1647099e82b3b349fbf@syzkaller.appspotmail.com
Fixes: 4a3223f7bfda ("io_uring/net: switch io_recv() to using io_async_msghdr")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/net.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/io_uring/net.c b/io_uring/net.c
index 0a48596429d9..7c98c4d50946 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1127,6 +1127,9 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 		flags |= MSG_DONTWAIT;
 
 retry_multishot:
+	kmsg->msg.msg_inq = -1;
+	kmsg->msg.msg_flags = 0;
+
 	if (io_do_buffer_select(req)) {
 		ret = io_recv_buf_select(req, kmsg, &len, issue_flags);
 		if (unlikely(ret))
@@ -1134,9 +1137,6 @@ retry_multishot:
 		sr->buf = NULL;
 	}
 
-	kmsg->msg.msg_inq = -1;
-	kmsg->msg.msg_flags = 0;
-
 	if (flags & MSG_WAITALL)
 		min_ret = iov_iter_count(&kmsg->msg.msg_iter);
 
-- 
cgit v1.2.3


From b164316808ec5de391c3e7b0148ec937d32d280d Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 30 May 2024 14:40:32 +0900
Subject: null_blk: Do not allow runt zone with zone capacity smaller then zone
 size

A zoned device with a smaller last zone together with a zone capacity
smaller than the zone size does make any sense as that does not
correspond to any possible setup for a real device:
1) For ZNS and zoned UFS devices, all zones are always the same size.
2) For SMR HDDs, all zones always have the same capacity.
In other words, if we have a smaller last runt zone, then this zone
capacity should always be equal to the zone size.

Add a check in null_init_zoned_dev() to prevent a configuration to have
both a smaller zone size and a zone capacity smaller than the zone size.

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20240530054035.491497-2-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/zoned.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 79c8e5e99f7f..f118d304f310 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -74,6 +74,17 @@ int null_init_zoned_dev(struct nullb_device *dev,
 		return -EINVAL;
 	}
 
+	/*
+	 * If a smaller zone capacity was requested, do not allow a smaller last
+	 * zone at the same time as such zone configuration does not correspond
+	 * to any real zoned device.
+	 */
+	if (dev->zone_capacity != dev->zone_size &&
+	    dev->size & (dev->zone_size - 1)) {
+		pr_err("A smaller last zone is not allowed with zone capacity smaller than zone size.\n");
+		return -EINVAL;
+	}
+
 	zone_capacity_sects = mb_to_sects(dev->zone_capacity);
 	dev_capacity_sects = mb_to_sects(dev->size);
 	dev->zone_size_sects = mb_to_sects(dev->zone_size);
-- 
cgit v1.2.3


From cd6399936869b4a042dd1270078cbf2bb871a407 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 30 May 2024 14:40:33 +0900
Subject: block: Fix validation of zoned device with a runt zone

Commit ecfe43b11b02 ("block: Remember zone capacity when revalidating
zones") introduced checks to ensure that the capacity of the zones of
a zoned device is constant for all zones. However, this check ignores
the possibility that a zoned device has a smaller last zone with a size
not equal to the capacity of other zones. Such device correspond in
practice to an SMR drive with a smaller last zone and all zones with a
capacity equal to the zone size, leading to the last zone capacity being
different than the capacity of other zones.

Correctly handle such device by fixing the check for the constant zone
capacity in blk_revalidate_seq_zone() using the new helper function
disk_zone_is_last(). This helper function is also used in
blk_revalidate_zone_cb() when checking the zone size.

Fixes: ecfe43b11b02 ("block: Remember zone capacity when revalidating zones")
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
Link: https://lore.kernel.org/r/20240530054035.491497-3-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-zoned.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 03aa4eead39e..402a50a1ac4d 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -450,6 +450,11 @@ static inline bool disk_zone_is_conv(struct gendisk *disk, sector_t sector)
 	return test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap);
 }
 
+static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone)
+{
+	return zone->start + zone->len >= get_capacity(disk);
+}
+
 static bool disk_insert_zone_wplug(struct gendisk *disk,
 				   struct blk_zone_wplug *zwplug)
 {
@@ -1693,11 +1698,13 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
 
 	/*
 	 * Remember the capacity of the first sequential zone and check
-	 * if it is constant for all zones.
+	 * if it is constant for all zones, ignoring the last zone as it can be
+	 * smaller.
 	 */
 	if (!args->zone_capacity)
 		args->zone_capacity = zone->capacity;
-	if (zone->capacity != args->zone_capacity) {
+	if (!disk_zone_is_last(disk, zone) &&
+	    zone->capacity != args->zone_capacity) {
 		pr_warn("%s: Invalid variable zone capacity\n",
 			disk->disk_name);
 		return -ENODEV;
@@ -1732,7 +1739,6 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
 {
 	struct blk_revalidate_zone_args *args = data;
 	struct gendisk *disk = args->disk;
-	sector_t capacity = get_capacity(disk);
 	sector_t zone_sectors = disk->queue->limits.chunk_sectors;
 	int ret;
 
@@ -1743,7 +1749,7 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
 		return -ENODEV;
 	}
 
-	if (zone->start >= capacity || !zone->len) {
+	if (zone->start >= get_capacity(disk) || !zone->len) {
 		pr_warn("%s: Invalid zone start %llu, length %llu\n",
 			disk->disk_name, zone->start, zone->len);
 		return -ENODEV;
@@ -1753,7 +1759,7 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
 	 * All zones must have the same size, with the exception on an eventual
 	 * smaller last zone.
 	 */
-	if (zone->start + zone->len < capacity) {
+	if (!disk_zone_is_last(disk, zone)) {
 		if (zone->len != zone_sectors) {
 			pr_warn("%s: Invalid zoned device with non constant zone size\n",
 				disk->disk_name);
-- 
cgit v1.2.3


From 29459c3eaa5c6261fbe0dea7bdeb9b48d35d862a Mon Sep 17 00:00:00 2001
From: Damien Le Moal <dlemoal@kernel.org>
Date: Thu, 30 May 2024 14:40:34 +0900
Subject: block: Fix zone write plugging handling of devices with a runt zone

A zoned device may have a last sequential write required zone that is
smaller than other zones. However, all tests to check if a zone write
plug write offset exceeds the zone capacity use the same capacity
value stored in the gendisk zone_capacity field. This is incorrect for a
zoned device with a last runt (smaller) zone.

Add the new field last_zone_capacity to struct gendisk to store the
capacity of the last zone of the device. blk_revalidate_seq_zone() and
blk_revalidate_conv_zone() are both modified to get this value when
disk_zone_is_last() returns true. Similarly to zone_capacity, the value
is first stored using the last_zone_capacity field of struct
blk_revalidate_zone_args. Once zone revalidation of all zones is done,
this is used to set the gendisk last_zone_capacity field.

The checks to determine if a zone is full or if a sector offset in a
zone exceeds the zone capacity in disk_should_remove_zone_wplug(),
disk_zone_wplug_abort_unaligned(), blk_zone_write_plug_init_request(),
and blk_zone_wplug_prepare_bio() are modified to use the new helper
functions disk_zone_is_full() and disk_zone_wplug_is_full().
disk_zone_is_full() uses the zone index to determine if the zone being
tested is the last one of the disk and uses the either the disk
zone_capacity or last_zone_capacity accordingly.

Fixes: dd291d77cc90 ("block: Introduce zone write plugging")
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Niklas Cassel <cassel@kernel.org>
Link: https://lore.kernel.org/r/20240530054035.491497-4-dlemoal@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-zoned.c      | 35 +++++++++++++++++++++++++++--------
 include/linux/blkdev.h |  1 +
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 402a50a1ac4d..52abebf56027 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -455,6 +455,20 @@ static bool disk_zone_is_last(struct gendisk *disk, struct blk_zone *zone)
 	return zone->start + zone->len >= get_capacity(disk);
 }
 
+static bool disk_zone_is_full(struct gendisk *disk,
+			      unsigned int zno, unsigned int offset_in_zone)
+{
+	if (zno < disk->nr_zones - 1)
+		return offset_in_zone >= disk->zone_capacity;
+	return offset_in_zone >= disk->last_zone_capacity;
+}
+
+static bool disk_zone_wplug_is_full(struct gendisk *disk,
+				    struct blk_zone_wplug *zwplug)
+{
+	return disk_zone_is_full(disk, zwplug->zone_no, zwplug->wp_offset);
+}
+
 static bool disk_insert_zone_wplug(struct gendisk *disk,
 				   struct blk_zone_wplug *zwplug)
 {
@@ -548,7 +562,7 @@ static inline bool disk_should_remove_zone_wplug(struct gendisk *disk,
 		return false;
 
 	/* We can remove zone write plugs for zones that are empty or full. */
-	return !zwplug->wp_offset || zwplug->wp_offset >= disk->zone_capacity;
+	return !zwplug->wp_offset || disk_zone_wplug_is_full(disk, zwplug);
 }
 
 static void disk_remove_zone_wplug(struct gendisk *disk,
@@ -669,13 +683,12 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
 static void disk_zone_wplug_abort_unaligned(struct gendisk *disk,
 					    struct blk_zone_wplug *zwplug)
 {
-	unsigned int zone_capacity = disk->zone_capacity;
 	unsigned int wp_offset = zwplug->wp_offset;
 	struct bio_list bl = BIO_EMPTY_LIST;
 	struct bio *bio;
 
 	while ((bio = bio_list_pop(&zwplug->bio_list))) {
-		if (wp_offset >= zone_capacity ||
+		if (disk_zone_is_full(disk, zwplug->zone_no, wp_offset) ||
 		    (bio_op(bio) != REQ_OP_ZONE_APPEND &&
 		     bio_offset_from_zone_start(bio) != wp_offset)) {
 			blk_zone_wplug_bio_io_error(zwplug, bio);
@@ -914,7 +927,6 @@ void blk_zone_write_plug_init_request(struct request *req)
 	sector_t req_back_sector = blk_rq_pos(req) + blk_rq_sectors(req);
 	struct request_queue *q = req->q;
 	struct gendisk *disk = q->disk;
-	unsigned int zone_capacity = disk->zone_capacity;
 	struct blk_zone_wplug *zwplug =
 		disk_get_zone_wplug(disk, blk_rq_pos(req));
 	unsigned long flags;
@@ -938,7 +950,7 @@ void blk_zone_write_plug_init_request(struct request *req)
 	 * into the back of the request.
 	 */
 	spin_lock_irqsave(&zwplug->lock, flags);
-	while (zwplug->wp_offset < zone_capacity) {
+	while (!disk_zone_wplug_is_full(disk, zwplug)) {
 		bio = bio_list_peek(&zwplug->bio_list);
 		if (!bio)
 			break;
@@ -984,7 +996,7 @@ static bool blk_zone_wplug_prepare_bio(struct blk_zone_wplug *zwplug,
 	 * We know such BIO will fail, and that would potentially overflow our
 	 * write pointer offset beyond the end of the zone.
 	 */
-	if (zwplug->wp_offset >= disk->zone_capacity)
+	if (disk_zone_wplug_is_full(disk, zwplug))
 		goto err;
 
 	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
@@ -1561,6 +1573,7 @@ void disk_free_zone_resources(struct gendisk *disk)
 	kfree(disk->conv_zones_bitmap);
 	disk->conv_zones_bitmap = NULL;
 	disk->zone_capacity = 0;
+	disk->last_zone_capacity = 0;
 	disk->nr_zones = 0;
 }
 
@@ -1605,6 +1618,7 @@ struct blk_revalidate_zone_args {
 	unsigned long	*conv_zones_bitmap;
 	unsigned int	nr_zones;
 	unsigned int	zone_capacity;
+	unsigned int	last_zone_capacity;
 	sector_t	sector;
 };
 
@@ -1622,6 +1636,7 @@ static int disk_update_zone_resources(struct gendisk *disk,
 
 	disk->nr_zones = args->nr_zones;
 	disk->zone_capacity = args->zone_capacity;
+	disk->last_zone_capacity = args->last_zone_capacity;
 	swap(disk->conv_zones_bitmap, args->conv_zones_bitmap);
 	if (disk->conv_zones_bitmap)
 		nr_conv_zones = bitmap_weight(disk->conv_zones_bitmap,
@@ -1673,6 +1688,9 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
 		return -ENODEV;
 	}
 
+	if (disk_zone_is_last(disk, zone))
+		args->last_zone_capacity = zone->capacity;
+
 	if (!disk_need_zone_resources(disk))
 		return 0;
 
@@ -1703,8 +1721,9 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx,
 	 */
 	if (!args->zone_capacity)
 		args->zone_capacity = zone->capacity;
-	if (!disk_zone_is_last(disk, zone) &&
-	    zone->capacity != args->zone_capacity) {
+	if (disk_zone_is_last(disk, zone)) {
+		args->last_zone_capacity = zone->capacity;
+	} else if (zone->capacity != args->zone_capacity) {
 		pr_warn("%s: Invalid variable zone capacity\n",
 			disk->disk_name);
 		return -ENODEV;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aefdda9f4ec7..24c36929920b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -186,6 +186,7 @@ struct gendisk {
 	 */
 	unsigned int		nr_zones;
 	unsigned int		zone_capacity;
+	unsigned int		last_zone_capacity;
 	unsigned long		*conv_zones_bitmap;
 	unsigned int            zone_wplugs_hash_bits;
 	spinlock_t              zone_wplugs_lock;
-- 
cgit v1.2.3


From d53b681ce9ca7db5ef4ecb8d2cf465ae4a031264 Mon Sep 17 00:00:00 2001
From: Chanwoo Lee <cw9316.lee@samsung.com>
Date: Fri, 24 May 2024 10:59:04 +0900
Subject: scsi: ufs: mcq: Fix error output and clean up ufshcd_mcq_abort()

An error unrelated to ufshcd_try_to_abort_task is being logged and can
cause confusion. Modify ufshcd_mcq_abort() to print the result of the abort
failure. For readability, return immediately instead of 'goto'.

Fixes: f1304d442077 ("scsi: ufs: mcq: Added ufshcd_mcq_abort()")
Signed-off-by: Chanwoo Lee <cw9316.lee@samsung.com>
Link: https://lore.kernel.org/r/20240524015904.1116005-1-cw9316.lee@samsung.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufs-mcq.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index 005d63ab1f44..8944548c30fa 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -634,20 +634,20 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
 	struct ufshcd_lrb *lrbp = &hba->lrb[tag];
 	struct ufs_hw_queue *hwq;
 	unsigned long flags;
-	int err = FAILED;
+	int err;
 
 	if (!ufshcd_cmd_inflight(lrbp->cmd)) {
 		dev_err(hba->dev,
 			"%s: skip abort. cmd at tag %d already completed.\n",
 			__func__, tag);
-		goto out;
+		return FAILED;
 	}
 
 	/* Skip task abort in case previous aborts failed and report failure */
 	if (lrbp->req_abort_skip) {
 		dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n",
 			__func__, tag);
-		goto out;
+		return FAILED;
 	}
 
 	hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd));
@@ -659,7 +659,7 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
 		 */
 		dev_err(hba->dev, "%s: cmd found in sq. hwq=%d, tag=%d\n",
 			__func__, hwq->id, tag);
-		goto out;
+		return FAILED;
 	}
 
 	/*
@@ -667,18 +667,17 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd)
 	 * in the completion queue either. Query the device to see if
 	 * the command is being processed in the device.
 	 */
-	if (ufshcd_try_to_abort_task(hba, tag)) {
+	err = ufshcd_try_to_abort_task(hba, tag);
+	if (err) {
 		dev_err(hba->dev, "%s: device abort failed %d\n", __func__, err);
 		lrbp->req_abort_skip = true;
-		goto out;
+		return FAILED;
 	}
 
-	err = SUCCESS;
 	spin_lock_irqsave(&hwq->cq_lock, flags);
 	if (ufshcd_cmd_inflight(lrbp->cmd))
 		ufshcd_release_scsi_cmd(hba, lrbp);
 	spin_unlock_irqrestore(&hwq->cq_lock, flags);
 
-out:
-	return err;
+	return SUCCESS;
 }
-- 
cgit v1.2.3


From 935df1bd40d43c4ee91838c42a20e9af751885cc Mon Sep 17 00:00:00 2001
From: "Rob Herring (Arm)" <robh@kernel.org>
Date: Wed, 29 May 2024 14:59:20 -0500
Subject: of/irq: Factor out parsing of interrupt-map parent phandle+args from
 of_irq_parse_raw()

Factor out the parsing of interrupt-map interrupt parent phandle and its
arg cells to a separate function, of_irq_parse_imap_parent(), so that it
can be used in other parsing scenarios (e.g. fw_devlink).

There was a refcount leak on non-matching entries when iterating thru
"interrupt-map" which is fixed.

Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Anup Patel <apatel@ventanamicro.com>
Link: https://lore.kernel.org/r/20240529-dt-interrupt-map-fix-v2-1-ef86dc5bcd2a@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 drivers/of/irq.c        | 125 ++++++++++++++++++++++++++++--------------------
 drivers/of/of_private.h |   3 ++
 2 files changed, 77 insertions(+), 51 deletions(-)

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 174900072c18..462375b293e4 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -25,6 +25,8 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 
+#include "of_private.h"
+
 /**
  * irq_of_parse_and_map - Parse and map an interrupt into linux virq space
  * @dev: Device node of the device whose interrupt is to be mapped
@@ -96,6 +98,57 @@ static const char * const of_irq_imap_abusers[] = {
 	NULL,
 };
 
+const __be32 *of_irq_parse_imap_parent(const __be32 *imap, int len, struct of_phandle_args *out_irq)
+{
+	u32 intsize, addrsize;
+	struct device_node *np;
+
+	/* Get the interrupt parent */
+	if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
+		np = of_node_get(of_irq_dflt_pic);
+	else
+		np = of_find_node_by_phandle(be32_to_cpup(imap));
+	imap++;
+
+	/* Check if not found */
+	if (!np) {
+		pr_debug(" -> imap parent not found !\n");
+		return NULL;
+	}
+
+	/* Get #interrupt-cells and #address-cells of new parent */
+	if (of_property_read_u32(np, "#interrupt-cells",
+					&intsize)) {
+		pr_debug(" -> parent lacks #interrupt-cells!\n");
+		of_node_put(np);
+		return NULL;
+	}
+	if (of_property_read_u32(np, "#address-cells",
+					&addrsize))
+		addrsize = 0;
+
+	pr_debug(" -> intsize=%d, addrsize=%d\n",
+		intsize, addrsize);
+
+	/* Check for malformed properties */
+	if (WARN_ON(addrsize + intsize > MAX_PHANDLE_ARGS)
+		|| (len < (addrsize + intsize))) {
+		of_node_put(np);
+		return NULL;
+	}
+
+	pr_debug(" -> imaplen=%d\n", len);
+
+	imap += addrsize + intsize;
+
+	out_irq->np = np;
+	for (int i = 0; i < intsize; i++)
+		out_irq->args[i] = be32_to_cpup(imap - intsize + i);
+	out_irq->args_count = intsize;
+
+	return imap;
+}
+
 /**
  * of_irq_parse_raw - Low level interrupt tree parsing
  * @addr:	address specifier (start of "reg" property of the device) in be32 format
@@ -112,12 +165,12 @@ static const char * const of_irq_imap_abusers[] = {
  */
 int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
 {
-	struct device_node *ipar, *tnode, *old = NULL, *newpar = NULL;
+	struct device_node *ipar, *tnode, *old = NULL;
 	__be32 initial_match_array[MAX_PHANDLE_ARGS];
 	const __be32 *match_array = initial_match_array;
-	const __be32 *tmp, *imap, *imask, dummy_imask[] = { [0 ... MAX_PHANDLE_ARGS] = cpu_to_be32(~0) };
-	u32 intsize = 1, addrsize, newintsize = 0, newaddrsize = 0;
-	int imaplen, match, i, rc = -EINVAL;
+	const __be32 *tmp, dummy_imask[] = { [0 ... MAX_PHANDLE_ARGS] = cpu_to_be32(~0) };
+	u32 intsize = 1, addrsize;
+	int i, rc = -EINVAL;
 
 #ifdef DEBUG
 	of_print_phandle_args("of_irq_parse_raw: ", out_irq);
@@ -176,6 +229,9 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
 
 	/* Now start the actual "proper" walk of the interrupt tree */
 	while (ipar != NULL) {
+		int imaplen, match;
+		const __be32 *imap, *oldimap, *imask;
+		struct device_node *newpar;
 		/*
 		 * Now check if cursor is an interrupt-controller and
 		 * if it is then we are done, unless there is an
@@ -216,7 +272,7 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
 
 		/* Parse interrupt-map */
 		match = 0;
-		while (imaplen > (addrsize + intsize + 1) && !match) {
+		while (imaplen > (addrsize + intsize + 1)) {
 			/* Compare specifiers */
 			match = 1;
 			for (i = 0; i < (addrsize + intsize); i++, imaplen--)
@@ -224,48 +280,17 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
 
 			pr_debug(" -> match=%d (imaplen=%d)\n", match, imaplen);
 
-			/* Get the interrupt parent */
-			if (of_irq_workarounds & OF_IMAP_NO_PHANDLE)
-				newpar = of_node_get(of_irq_dflt_pic);
-			else
-				newpar = of_find_node_by_phandle(be32_to_cpup(imap));
-			imap++;
-			--imaplen;
-
-			/* Check if not found */
-			if (newpar == NULL) {
-				pr_debug(" -> imap parent not found !\n");
-				goto fail;
-			}
-
-			if (!of_device_is_available(newpar))
-				match = 0;
-
-			/* Get #interrupt-cells and #address-cells of new
-			 * parent
-			 */
-			if (of_property_read_u32(newpar, "#interrupt-cells",
-						 &newintsize)) {
-				pr_debug(" -> parent lacks #interrupt-cells!\n");
-				goto fail;
-			}
-			if (of_property_read_u32(newpar, "#address-cells",
-						 &newaddrsize))
-				newaddrsize = 0;
-
-			pr_debug(" -> newintsize=%d, newaddrsize=%d\n",
-			    newintsize, newaddrsize);
-
-			/* Check for malformed properties */
-			if (WARN_ON(newaddrsize + newintsize > MAX_PHANDLE_ARGS)
-			    || (imaplen < (newaddrsize + newintsize))) {
-				rc = -EFAULT;
+			oldimap = imap;
+			imap = of_irq_parse_imap_parent(oldimap, imaplen, out_irq);
+			if (!imap)
 				goto fail;
-			}
 
-			imap += newaddrsize + newintsize;
-			imaplen -= newaddrsize + newintsize;
+			match &= of_device_is_available(out_irq->np);
+			if (match)
+				break;
 
+			of_node_put(out_irq->np);
+			imaplen -= imap - oldimap;
 			pr_debug(" -> imaplen=%d\n", imaplen);
 		}
 		if (!match) {
@@ -287,11 +312,11 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
 		 * Successfully parsed an interrupt-map translation; copy new
 		 * interrupt specifier into the out_irq structure
 		 */
-		match_array = imap - newaddrsize - newintsize;
-		for (i = 0; i < newintsize; i++)
-			out_irq->args[i] = be32_to_cpup(imap - newintsize + i);
-		out_irq->args_count = intsize = newintsize;
-		addrsize = newaddrsize;
+		match_array = oldimap + 1;
+
+		newpar = out_irq->np;
+		intsize = out_irq->args_count;
+		addrsize = (imap - match_array) - intsize;
 
 		if (ipar == newpar) {
 			pr_debug("%pOF interrupt-map entry to self\n", ipar);
@@ -300,7 +325,6 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
 
 	skiplevel:
 		/* Iterate again with new parent */
-		out_irq->np = newpar;
 		pr_debug(" -> new parent: %pOF\n", newpar);
 		of_node_put(ipar);
 		ipar = newpar;
@@ -310,7 +334,6 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
 
  fail:
 	of_node_put(ipar);
-	of_node_put(newpar);
 
 	return rc;
 }
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index 94fc0aa07af9..04aa2a91f851 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -159,6 +159,9 @@ extern void __of_sysfs_remove_bin_file(struct device_node *np,
 extern int of_bus_n_addr_cells(struct device_node *np);
 extern int of_bus_n_size_cells(struct device_node *np);
 
+const __be32 *of_irq_parse_imap_parent(const __be32 *imap, int len,
+				       struct of_phandle_args *out_irq);
+
 struct bus_dma_region;
 #if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA)
 int of_dma_get_range(struct device_node *np,
-- 
cgit v1.2.3


From e7985f43609c782132f8f5794ee6cc4cdb66ca75 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 29 May 2024 14:59:21 -0500
Subject: of: property: Fix fw_devlink handling of interrupt-map

Commit d976c6f4b32c ("of: property: Add fw_devlink support for
interrupt-map property") tried to do what it says on the tin,
but failed on a couple of points:

- it confuses bytes and cells. Not a huge deal, except when it
  comes to pointer arithmetic

- it doesn't really handle anything but interrupt-maps that have
  their parent #address-cells set to 0

The combinations of the two leads to some serious fun on my M1
box, with plenty of WARN-ON() firing all over the shop, and
amusing values being generated for interrupt specifiers.

Having 2 versions of parsing code for "interrupt-map" was a bad
idea. Now that the common parsing parts have been refactored
into of_irq_parse_imap_parent(), rework the code here to use it
instead and fix the pointer arithmetic.

Note that the dependency will be a bit different than the original code
when the interrupt-map points to another interrupt-map. In this case,
the original code would resolve to the final interrupt controller. Now
the dependency is the parent interrupt-map (which itself should have a
dependency to the parent). It is possible that a node with an
interrupt-map has no driver.

Fixes: d976c6f4b32c ("of: property: Add fw_devlink support for interrupt-map property")
Signed-off-by: Marc Zyngier <maz@kernel.org>
Co-developed-by: Rob Herring (Arm) <robh@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Anup Patel <apatel@ventanamicro.com>
Link: https://lore.kernel.org/r/20240529-dt-interrupt-map-fix-v2-2-ef86dc5bcd2a@kernel.org
Signed-off-by: Rob Herring (Arm) <robh@kernel.org>
---
 drivers/of/property.c | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/drivers/of/property.c b/drivers/of/property.c
index 1c83e68f805b..164d77cb9445 100644
--- a/drivers/of/property.c
+++ b/drivers/of/property.c
@@ -1306,10 +1306,10 @@ static struct device_node *parse_interrupts(struct device_node *np,
 static struct device_node *parse_interrupt_map(struct device_node *np,
 					       const char *prop_name, int index)
 {
-	const __be32 *imap, *imap_end, *addr;
+	const __be32 *imap, *imap_end;
 	struct of_phandle_args sup_args;
 	u32 addrcells, intcells;
-	int i, imaplen;
+	int imaplen;
 
 	if (!IS_ENABLED(CONFIG_OF_IRQ))
 		return NULL;
@@ -1322,33 +1322,23 @@ static struct device_node *parse_interrupt_map(struct device_node *np,
 	addrcells = of_bus_n_addr_cells(np);
 
 	imap = of_get_property(np, "interrupt-map", &imaplen);
-	if (!imap || imaplen <= (addrcells + intcells))
+	imaplen /= sizeof(*imap);
+	if (!imap)
 		return NULL;
-	imap_end = imap + imaplen;
 
-	while (imap < imap_end) {
-		addr = imap;
-		imap += addrcells;
+	imap_end = imap + imaplen;
 
-		sup_args.np = np;
-		sup_args.args_count = intcells;
-		for (i = 0; i < intcells; i++)
-			sup_args.args[i] = be32_to_cpu(imap[i]);
-		imap += intcells;
+	for (int i = 0; imap + addrcells + intcells + 1 < imap_end; i++) {
+		imap += addrcells + intcells;
 
-		/*
-		 * Upon success, the function of_irq_parse_raw() returns
-		 * interrupt controller DT node pointer in sup_args.np.
-		 */
-		if (of_irq_parse_raw(addr, &sup_args))
+		imap = of_irq_parse_imap_parent(imap, imap_end - imap, &sup_args);
+		if (!imap)
 			return NULL;
 
-		if (!index)
+		if (i == index)
 			return sup_args.np;
 
 		of_node_put(sup_args.np);
-		imap += sup_args.args_count + 1;
-		index--;
 	}
 
 	return NULL;
-- 
cgit v1.2.3


From 0a751df4566c86e5a24f2a03290dad3d0f215692 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 30 May 2024 09:45:47 -0400
Subject: blk-throttle: Fix incorrect display of io.max

Commit bf20ab538c81 ("blk-throttle: remove CONFIG_BLK_DEV_THROTTLING_LOW")
attempts to revert the code change introduced by commit cd5ab1b0fcb4
("blk-throttle: add .low interface").  However, it leaves behind the
bps_conf[] and iops_conf[] fields in the throtl_grp structure which
aren't set anywhere in the new blk-throttle.c code but are still being
used by tg_prfill_limit() to display the limits in io.max. Now io.max
always displays the following values if a block queue is used:

	<m>:<n> rbps=0 wbps=0 riops=0 wiops=0

Fix this problem by removing bps_conf[] and iops_conf[] and use bps[]
and iops[] instead to complete the revert.

Fixes: bf20ab538c81 ("blk-throttle: remove CONFIG_BLK_DEV_THROTTLING_LOW")
Reported-by: Justin Forbes <jforbes@redhat.com>
Closes: https://github.com/containers/podman/issues/22701#issuecomment-2120627789
Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20240530134547.970075-1-longman@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-throttle.c | 24 ++++++++++++------------
 block/blk-throttle.h |  8 ++------
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 0be180f9a789..c1bf73f8c75d 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1399,32 +1399,32 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
 	bps_dft = U64_MAX;
 	iops_dft = UINT_MAX;
 
-	if (tg->bps_conf[READ] == bps_dft &&
-	    tg->bps_conf[WRITE] == bps_dft &&
-	    tg->iops_conf[READ] == iops_dft &&
-	    tg->iops_conf[WRITE] == iops_dft)
+	if (tg->bps[READ] == bps_dft &&
+	    tg->bps[WRITE] == bps_dft &&
+	    tg->iops[READ] == iops_dft &&
+	    tg->iops[WRITE] == iops_dft)
 		return 0;
 
 	seq_printf(sf, "%s", dname);
-	if (tg->bps_conf[READ] == U64_MAX)
+	if (tg->bps[READ] == U64_MAX)
 		seq_printf(sf, " rbps=max");
 	else
-		seq_printf(sf, " rbps=%llu", tg->bps_conf[READ]);
+		seq_printf(sf, " rbps=%llu", tg->bps[READ]);
 
-	if (tg->bps_conf[WRITE] == U64_MAX)
+	if (tg->bps[WRITE] == U64_MAX)
 		seq_printf(sf, " wbps=max");
 	else
-		seq_printf(sf, " wbps=%llu", tg->bps_conf[WRITE]);
+		seq_printf(sf, " wbps=%llu", tg->bps[WRITE]);
 
-	if (tg->iops_conf[READ] == UINT_MAX)
+	if (tg->iops[READ] == UINT_MAX)
 		seq_printf(sf, " riops=max");
 	else
-		seq_printf(sf, " riops=%u", tg->iops_conf[READ]);
+		seq_printf(sf, " riops=%u", tg->iops[READ]);
 
-	if (tg->iops_conf[WRITE] == UINT_MAX)
+	if (tg->iops[WRITE] == UINT_MAX)
 		seq_printf(sf, " wiops=max");
 	else
-		seq_printf(sf, " wiops=%u", tg->iops_conf[WRITE]);
+		seq_printf(sf, " wiops=%u", tg->iops[WRITE]);
 
 	seq_printf(sf, "\n");
 	return 0;
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index 393c3d134b96..4d9ef5abdf21 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -95,15 +95,11 @@ struct throtl_grp {
 	bool has_rules_bps[2];
 	bool has_rules_iops[2];
 
-	/* internally used bytes per second rate limits */
+	/* bytes per second rate limits */
 	uint64_t bps[2];
-	/* user configured bps limits */
-	uint64_t bps_conf[2];
 
-	/* internally used IOPS limits */
+	/* IOPS limits */
 	unsigned int iops[2];
-	/* user configured IOPS limits */
-	unsigned int iops_conf[2];
 
 	/* Number of bytes dispatched in current slice */
 	uint64_t bytes_disp[2];
-- 
cgit v1.2.3


From 2d707b4e37f9b0c37b8b2392f91b04c5b63ea538 Mon Sep 17 00:00:00 2001
From: Yong-Xuan Wang <yongxuan.wang@sifive.com>
Date: Mon, 15 Apr 2024 14:49:04 +0800
Subject: RISC-V: KVM: No need to use mask when hart-index-bit is 0

When the maximum hart number within groups is 1, hart-index-bit is set to
0. Consequently, there is no need to restore the hart ID from IMSIC
addresses and hart-index-bit settings. Currently, QEMU and kvmtool do not
pass correct hart-index-bit values when the maximum hart number is a
power of 2, thereby avoiding this issue. Corresponding patches for QEMU
and kvmtool will also be dispatched.

Fixes: 89d01306e34d ("RISC-V: KVM: Implement device interface for AIA irqchip")
Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Link: https://lore.kernel.org/r/20240415064905.25184-1-yongxuan.wang@sifive.com
Signed-off-by: Anup Patel <anup@brainfault.org>
---
 arch/riscv/kvm/aia_device.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c
index 0eb689351b7d..5cd407c6a8e4 100644
--- a/arch/riscv/kvm/aia_device.c
+++ b/arch/riscv/kvm/aia_device.c
@@ -237,10 +237,11 @@ static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr)
 
 static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr)
 {
-	u32 hart, group = 0;
+	u32 hart = 0, group = 0;
 
-	hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
-		GENMASK_ULL(aia->nr_hart_bits - 1, 0);
+	if (aia->nr_hart_bits)
+		hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) &
+		       GENMASK_ULL(aia->nr_hart_bits - 1, 0);
 	if (aia->nr_group_bits)
 		group = (addr >> aia->nr_group_shift) &
 			GENMASK_ULL(aia->nr_group_bits - 1, 0);
-- 
cgit v1.2.3


From c66f3b40b17d3dfc4b6abb5efde8e71c46971821 Mon Sep 17 00:00:00 2001
From: Quan Zhou <zhouquan@iscas.ac.cn>
Date: Thu, 23 May 2024 10:13:34 +0800
Subject: RISC-V: KVM: Fix incorrect reg_subtype labels in
 kvm_riscv_vcpu_set_reg_isa_ext function

In the function kvm_riscv_vcpu_set_reg_isa_ext, the original code
used incorrect reg_subtype labels KVM_REG_RISCV_SBI_MULTI_EN/DIS.
These have been corrected to KVM_REG_RISCV_ISA_MULTI_EN/DIS respectively.
Although they are numerically equivalent, the actual processing
will not result in errors, but it may lead to ambiguous code semantics.

Fixes: 613029442a4b ("RISC-V: KVM: Extend ONE_REG to enable/disable multiple ISA extensions")
Signed-off-by: Quan Zhou <zhouquan@iscas.ac.cn>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Link: https://lore.kernel.org/r/ff1c6771a67d660db94372ac9aaa40f51e5e0090.1716429371.git.zhouquan@iscas.ac.cn
Signed-off-by: Anup Patel <anup@brainfault.org>
---
 arch/riscv/kvm/vcpu_onereg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c
index c676275ea0a0..62874fbca29f 100644
--- a/arch/riscv/kvm/vcpu_onereg.c
+++ b/arch/riscv/kvm/vcpu_onereg.c
@@ -724,9 +724,9 @@ static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
 	switch (reg_subtype) {
 	case KVM_REG_RISCV_ISA_SINGLE:
 		return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val);
-	case KVM_REG_RISCV_SBI_MULTI_EN:
+	case KVM_REG_RISCV_ISA_MULTI_EN:
 		return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true);
-	case KVM_REG_RISCV_SBI_MULTI_DIS:
+	case KVM_REG_RISCV_ISA_MULTI_DIS:
 		return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false);
 	default:
 		return -ENOENT;
-- 
cgit v1.2.3


From 310fa3ec2859f1c094e6e9b5d2e1ca51738c409a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 31 May 2024 09:51:07 +0200
Subject: ALSA: seq: ump: Fix swapped song position pointer data

At converting between the legacy event and UMP, the parameters for
MIDI Song Position Pointer are incorrectly stored.  It should have
been LSB -> MSB order while it stored in MSB -> LSB order.
This patch corrects the ordering.

Fixes: e9e02819a98a ("ALSA: seq: Automatic conversion of UMP events")
Link: https://lore.kernel.org/r/20240531075110.3250-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_ump_convert.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c
index f0f332017e66..171fb75267af 100644
--- a/sound/core/seq/seq_ump_convert.c
+++ b/sound/core/seq/seq_ump_convert.c
@@ -157,7 +157,7 @@ static void ump_system_to_one_param_ev(const union snd_ump_midi1_msg *val,
 static void ump_system_to_songpos_ev(const union snd_ump_midi1_msg *val,
 				     struct snd_seq_event *ev)
 {
-	ev->data.control.value = (val->system.parm1 << 7) | val->system.parm2;
+	ev->data.control.value = (val->system.parm2 << 7) | val->system.parm1;
 }
 
 /* Encoders for 0xf0 - 0xff */
@@ -754,8 +754,8 @@ static int system_2p_ev_to_ump_midi1(const struct snd_seq_event *event,
 {
 	data->system.type = UMP_MSG_TYPE_SYSTEM; // override
 	data->system.status = status;
-	data->system.parm1 = (event->data.control.value >> 7) & 0x7f;
-	data->system.parm2 = event->data.control.value & 0x7f;
+	data->system.parm1 = event->data.control.value & 0x7f;
+	data->system.parm2 = (event->data.control.value >> 7) & 0x7f;
 	return 1;
 }
 
-- 
cgit v1.2.3


From 0a44078f2b72abcdda47581c942bd5d0468ec50b Mon Sep 17 00:00:00 2001
From: Jeff Johnson <quic_jjohnson@quicinc.com>
Date: Thu, 30 May 2024 13:12:03 -0700
Subject: perf/x86/rapl: Add missing MODULE_DESCRIPTION() line

Fix the warning from 'make C=1 W=1':

  WARNING: modpost: missing MODULE_DESCRIPTION() in arch/x86/events/rapl.o

Signed-off-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Link: https://lore.kernel.org/r/20240530-md-arch-x86-events-v1-1-e45ffa8af99f@quicinc.com
---
 arch/x86/events/rapl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 46e673585560..0c5e7a7c43ac 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -64,6 +64,7 @@
 #include "perf_event.h"
 #include "probe.h"
 
+MODULE_DESCRIPTION("Support Intel/AMD RAPL energy consumption counters");
 MODULE_LICENSE("GPL");
 
 /*
-- 
cgit v1.2.3


From dc8e5dfb52d56e955ad09174330252710845b8d2 Mon Sep 17 00:00:00 2001
From: Jeff Johnson <quic_jjohnson@quicinc.com>
Date: Thu, 30 May 2024 13:42:51 -0700
Subject: perf/x86/intel: Add missing MODULE_DESCRIPTION() lines

Fix the 'make W=1 C=1' warnings:

  WARNING: modpost: missing MODULE_DESCRIPTION() in arch/x86/events/intel/intel-uncore.o
  WARNING: modpost: missing MODULE_DESCRIPTION() in arch/x86/events/intel/intel-cstate.o

Signed-off-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Link: https://lore.kernel.org/r/20240530-md-arch-x86-events-intel-v1-1-8252194ed20a@quicinc.com
---
 arch/x86/events/intel/cstate.c | 1 +
 arch/x86/events/intel/uncore.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index e64eaa8dda5a..9d6e8f13d13a 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -114,6 +114,7 @@
 #include "../perf_event.h"
 #include "../probe.h"
 
+MODULE_DESCRIPTION("Support for Intel cstate performance events");
 MODULE_LICENSE("GPL");
 
 #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format)		\
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 419c517b8594..c68f5b39952b 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -34,6 +34,7 @@ static struct event_constraint uncore_constraint_fixed =
 struct event_constraint uncore_constraint_empty =
 	EVENT_CONSTRAINT(0, 0, 0);
 
+MODULE_DESCRIPTION("Support for Intel uncore performance events");
 MODULE_LICENSE("GPL");
 
 int uncore_pcibus_to_dieid(struct pci_bus *bus)
-- 
cgit v1.2.3


From d40605a6823577a6c40fad6fb1f10a40ea0389d7 Mon Sep 17 00:00:00 2001
From: Phil Auld <pauld@redhat.com>
Date: Thu, 30 May 2024 14:15:48 -0400
Subject: sched/x86: Export 'percpu arch_freq_scale'

Commit:

  7bc263840bc3 ("sched/topology: Consolidate and clean up access to a CPU's max compute capacity")

removed rq->cpu_capacity_orig in favor of using arch_scale_freq_capacity()
calls. Export the underlying percpu symbol on x86 so that external trace
point helper modules can be made to work again.

Signed-off-by: Phil Auld <pauld@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20240530181548.2039216-1-pauld@redhat.com
---
 arch/x86/kernel/cpu/aperfmperf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index f9a8c7b7943f..b3fa61d45352 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -345,6 +345,7 @@ static DECLARE_WORK(disable_freq_invariance_work,
 		    disable_freq_invariance_workfn);
 
 DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
+EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);
 
 static void scale_freq_tick(u64 acnt, u64 mcnt)
 {
-- 
cgit v1.2.3


From 539d33b5783804f22a62bd62ff463dfd1cef4265 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <linux@treblig.org>
Date: Thu, 16 May 2024 14:37:24 +0100
Subject: drm/komeda: remove unused struct 'gamma_curve_segment'

'gamma_curve_segment' looks like it has never been used.
Remove it.

Signed-off-by: Dr. David Alan Gilbert <linux@treblig.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Acked-by: Liviu Dudau <liviu.dudau@arm.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240516133724.251750-1-linux@treblig.org
Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
---
 drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c b/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c
index d8e449e6ebda..50cb8f7ee6b2 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_color_mgmt.c
@@ -72,11 +72,6 @@ struct gamma_curve_sector {
 	u32 segment_width;
 };
 
-struct gamma_curve_segment {
-	u32 start;
-	u32 end;
-};
-
 static struct gamma_curve_sector sector_tbl[] = {
 	{ 0,    4,  4   },
 	{ 16,   4,  4   },
-- 
cgit v1.2.3


From 86aaa7e9d641c1ad1035ed2df88b8d0b48c86b30 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Thu, 30 May 2024 23:28:17 +0200
Subject: ata: libata-core: Add ATA_HORKAGE_NOLPM for Crucial CT240BX500SSD1

Commit 7627a0edef54 ("ata: ahci: Drop low power policy board type")
dropped the board_ahci_low_power board type, and instead enables LPM if:
-The AHCI controller reports that it supports LPM (Partial/Slumber), and
-CONFIG_SATA_MOBILE_LPM_POLICY != 0, and
-The port is not defined as external in the per port PxCMD register, and
-The port is not defined as hotplug capable in the per port PxCMD
 register.

Partial and Slumber LPM states can either be initiated by HIPM or DIPM.

For HIPM (host initiated power management) to get enabled, both the AHCI
controller and the drive have to report that they support HIPM.

For DIPM (device initiated power management) to get enabled, only the
drive has to report that it supports DIPM. However, the HBA will reject
device requests to enter LPM states which the HBA does not support.

The problem is that Crucial CT240BX500SSD1 drives do not handle low power
modes correctly. The problem was most likely not seen before because no
one had used this drive with a AHCI controller with LPM enabled.

Add a quirk so that we do not enable LPM for this drive, since we see
command timeouts if we do (even though the drive claims to support DIPM).

Fixes: 7627a0edef54 ("ata: ahci: Drop low power policy board type")
Cc: stable@vger.kernel.org
Reported-by: Aarrayy <lp610mh@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218832
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 drivers/ata/libata-core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 4f35aab81a0a..b0ce621fe2a1 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4136,8 +4136,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	{ "PIONEER BD-RW   BDR-207M",	NULL,	ATA_HORKAGE_NOLPM },
 	{ "PIONEER BD-RW   BDR-205",	NULL,	ATA_HORKAGE_NOLPM },
 
-	/* Crucial BX100 SSD 500GB has broken LPM support */
+	/* Crucial devices with broken LPM support */
 	{ "CT500BX100SSD1",		NULL,	ATA_HORKAGE_NOLPM },
+	{ "CT240BX500SSD1",		NULL,	ATA_HORKAGE_NOLPM },
 
 	/* 512GB MX100 with MU01 firmware has both queued TRIM and LPM issues */
 	{ "Crucial_CT512MX100*",	"MU01",	ATA_HORKAGE_NO_NCQ_TRIM |
-- 
cgit v1.2.3


From 473880369304cfd4445720cdd8bae4c6f1e16e60 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Thu, 30 May 2024 23:32:44 +0200
Subject: ata: libata-core: Add ATA_HORKAGE_NOLPM for AMD Radeon S3 SSD

Commit 7627a0edef54 ("ata: ahci: Drop low power policy board type")
dropped the board_ahci_low_power board type, and instead enables LPM if:
-The AHCI controller reports that it supports LPM (Partial/Slumber), and
-CONFIG_SATA_MOBILE_LPM_POLICY != 0, and
-The port is not defined as external in the per port PxCMD register, and
-The port is not defined as hotplug capable in the per port PxCMD
 register.

Partial and Slumber LPM states can either be initiated by HIPM or DIPM.

For HIPM (host initiated power management) to get enabled, both the AHCI
controller and the drive have to report that they support HIPM.

For DIPM (device initiated power management) to get enabled, only the
drive has to report that it supports DIPM. However, the HBA will reject
device requests to enter LPM states which the HBA does not support.

The problem is that AMD Radeon S3 SSD drives do not handle low power modes
correctly. The problem was most likely not seen before because no one
had used this drive with a AHCI controller with LPM enabled.

Add a quirk so that we do not enable LPM for this drive, since we see
command timeouts if we do (even though the drive claims to support both
HIPM and DIPM).

Fixes: 7627a0edef54 ("ata: ahci: Drop low power policy board type")
Cc: stable@vger.kernel.org
Reported-by: Doru Iorgulescu <doru.iorgulescu1@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218832
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 drivers/ata/libata-core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index b0ce621fe2a1..4ea859dc381a 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4156,6 +4156,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 						ATA_HORKAGE_ZERO_AFTER_TRIM |
 						ATA_HORKAGE_NOLPM },
 
+	/* AMD Radeon devices with broken LPM support */
+	{ "R3SL240G",			NULL,	ATA_HORKAGE_NOLPM },
+
 	/* These specific Samsung models/firmware-revs do not handle LPM well */
 	{ "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM },
 	{ "SAMSUNG SSD PM830 mSATA *",  "CXM13D1Q", ATA_HORKAGE_NOLPM },
-- 
cgit v1.2.3


From 3cb648c4dd3e8dde800fb3659250ed11f2d9efa5 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <cassel@kernel.org>
Date: Thu, 30 May 2024 23:27:04 +0200
Subject: ata: libata-core: Add ATA_HORKAGE_NOLPM for Apacer AS340

Commit 7627a0edef54 ("ata: ahci: Drop low power policy board type")
dropped the board_ahci_low_power board type, and instead enables LPM if:
-The AHCI controller reports that it supports LPM (Partial/Slumber), and
-CONFIG_SATA_MOBILE_LPM_POLICY != 0, and
-The port is not defined as external in the per port PxCMD register, and
-The port is not defined as hotplug capable in the per port PxCMD
 register.

Partial and Slumber LPM states can either be initiated by HIPM or DIPM.

For HIPM (host initiated power management) to get enabled, both the AHCI
controller and the drive have to report that they support HIPM.

For DIPM (device initiated power management) to get enabled, only the
drive has to report that it supports DIPM. However, the HBA will reject
device requests to enter LPM states which the HBA does not support.

The problem is that Apacer AS340 drives do not handle low power modes
correctly. The problem was most likely not seen before because no one
had used this drive with a AHCI controller with LPM enabled.

Add a quirk so that we do not enable LPM for this drive, since we see
command timeouts if we do (even though the drive claims to support DIPM).

Fixes: 7627a0edef54 ("ata: ahci: Drop low power policy board type")
Cc: stable@vger.kernel.org
Reported-by: Tim Teichmann <teichmanntim@outlook.de>
Closes: https://lore.kernel.org/linux-ide/87bk4pbve8.ffs@tglx/
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 drivers/ata/libata-core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 4ea859dc381a..e1bf8a19b3c8 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4159,6 +4159,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	/* AMD Radeon devices with broken LPM support */
 	{ "R3SL240G",			NULL,	ATA_HORKAGE_NOLPM },
 
+	/* Apacer models with LPM issues */
+	{ "Apacer AS340*",		NULL,	ATA_HORKAGE_NOLPM },
+
 	/* These specific Samsung models/firmware-revs do not handle LPM well */
 	{ "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM },
 	{ "SAMSUNG SSD PM830 mSATA *",  "CXM13D1Q", ATA_HORKAGE_NOLPM },
-- 
cgit v1.2.3


From 0f42bdf59b4e428485aa922bef871bfa6cc505e0 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Fri, 31 May 2024 18:43:37 +0900
Subject: selftests/tracing: Fix event filter test to retry up to 10 times

Commit eb50d0f250e9 ("selftests/ftrace: Choose target function for filter
test from samples") choose the target function from samples, but sometimes
this test failes randomly because the target function does not hit at the
next time. So retry getting samples up to 10 times.

Fixes: eb50d0f250e9 ("selftests/ftrace: Choose target function for filter test from samples")
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 .../ftrace/test.d/filter/event-filter-function.tc    | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
index 3f74c09c56b6..118247b8dd84 100644
--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -10,7 +10,6 @@ fail() { #msg
 }
 
 sample_events() {
-    echo > trace
     echo 1 > events/kmem/kmem_cache_free/enable
     echo 1 > tracing_on
     ls > /dev/null
@@ -22,6 +21,7 @@ echo 0 > tracing_on
 echo 0 > events/enable
 
 echo "Get the most frequently calling function"
+echo > trace
 sample_events
 
 target_func=`cat trace | grep -o 'call_site=\([^+]*\)' | sed 's/call_site=//' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'`
@@ -32,7 +32,16 @@ echo > trace
 
 echo "Test event filter function name"
 echo "call_site.function == $target_func" > events/kmem/kmem_cache_free/filter
+
+sample_events
+max_retry=10
+while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do
 sample_events
+max_retry=$((max_retry - 1))
+if [ $max_retry -eq 0 ]; then
+	exit_fail
+fi
+done
 
 hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l`
 misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l`
@@ -49,7 +58,16 @@ address=`grep " ${target_func}\$" /proc/kallsyms | cut -d' ' -f1`
 
 echo "Test event filter function address"
 echo "call_site.function == 0x$address" > events/kmem/kmem_cache_free/filter
+echo > trace
+sample_events
+max_retry=10
+while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do
 sample_events
+max_retry=$((max_retry - 1))
+if [ $max_retry -eq 0 ]; then
+	exit_fail
+fi
+done
 
 hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l`
 misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l`
-- 
cgit v1.2.3


From 88da52ccd66e65f2e63a6c35c9dff55d448ef4dc Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Thu, 16 May 2024 20:19:34 +0200
Subject: landlock: Fix d_parent walk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The WARN_ON_ONCE() in collect_domain_accesses() can be triggered when
trying to link a root mount point.  This cannot work in practice because
this directory is mounted, but the VFS check is done after the call to
security_path_link().

Do not use source directory's d_parent when the source directory is the
mount point.

Cc: Günther Noack <gnoack@google.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: stable@vger.kernel.org
Reported-by: syzbot+bf4903dc7e12b18ebc87@syzkaller.appspotmail.com
Fixes: b91c3e4ea756 ("landlock: Add support for file reparenting with LANDLOCK_ACCESS_FS_REFER")
Closes: https://lore.kernel.org/r/000000000000553d3f0618198200@google.com
Link: https://lore.kernel.org/r/20240516181935.1645983-2-mic@digikod.net
[mic: Fix commit message]
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 security/landlock/fs.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/security/landlock/fs.c b/security/landlock/fs.c
index 22d8b7c28074..7877a64cc6b8 100644
--- a/security/landlock/fs.c
+++ b/security/landlock/fs.c
@@ -1110,6 +1110,7 @@ static int current_check_refer_path(struct dentry *const old_dentry,
 	bool allow_parent1, allow_parent2;
 	access_mask_t access_request_parent1, access_request_parent2;
 	struct path mnt_dir;
+	struct dentry *old_parent;
 	layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {},
 		     layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {};
 
@@ -1157,9 +1158,17 @@ static int current_check_refer_path(struct dentry *const old_dentry,
 	mnt_dir.mnt = new_dir->mnt;
 	mnt_dir.dentry = new_dir->mnt->mnt_root;
 
+	/*
+	 * old_dentry may be the root of the common mount point and
+	 * !IS_ROOT(old_dentry) at the same time (e.g. with open_tree() and
+	 * OPEN_TREE_CLONE).  We do not need to call dget(old_parent) because
+	 * we keep a reference to old_dentry.
+	 */
+	old_parent = (old_dentry == mnt_dir.dentry) ? old_dentry :
+						      old_dentry->d_parent;
+
 	/* new_dir->dentry is equal to new_dentry->d_parent */
-	allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry,
-						old_dentry->d_parent,
+	allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry, old_parent,
 						&layer_masks_parent1);
 	allow_parent2 = collect_domain_accesses(
 		dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2);
-- 
cgit v1.2.3


From 0055f53aac80fd938bf7cdfad7ad414ca6c0e198 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Thu, 16 May 2024 20:19:35 +0200
Subject: selftests/landlock: Add layout1.refer_mount_root
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add tests to check error codes when linking or renaming a mount root
directory.  This previously triggered a kernel warning, but it is fixed
with the previous commit.

Cc: Günther Noack <gnoack@google.com>
Cc: Paul Moore <paul@paul-moore.com>
Link: https://lore.kernel.org/r/20240516181935.1645983-3-mic@digikod.net
Signed-off-by: Mickaël Salaün <mic@digikod.net>
---
 tools/testing/selftests/landlock/fs_test.c | 45 ++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 6b5a9ff88c3d..7d063c652be1 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -35,6 +35,7 @@
  * See https://sourceware.org/glibc/wiki/Synchronizing_Headers.
  */
 #include <linux/fs.h>
+#include <linux/mount.h>
 
 #include "common.h"
 
@@ -47,6 +48,13 @@ int renameat2(int olddirfd, const char *oldpath, int newdirfd,
 }
 #endif
 
+#ifndef open_tree
+int open_tree(int dfd, const char *filename, unsigned int flags)
+{
+	return syscall(__NR_open_tree, dfd, filename, flags);
+}
+#endif
+
 #ifndef RENAME_EXCHANGE
 #define RENAME_EXCHANGE (1 << 1)
 #endif
@@ -2400,6 +2408,43 @@ TEST_F_FORK(layout1, refer_denied_by_default4)
 				layer_dir_s1d1_refer);
 }
 
+/*
+ * Tests walking through a denied root mount.
+ */
+TEST_F_FORK(layout1, refer_mount_root_deny)
+{
+	const struct landlock_ruleset_attr ruleset_attr = {
+		.handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_DIR,
+	};
+	int root_fd, ruleset_fd;
+
+	/* Creates a mount object from a non-mount point. */
+	set_cap(_metadata, CAP_SYS_ADMIN);
+	root_fd =
+		open_tree(AT_FDCWD, dir_s1d1,
+			  AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	clear_cap(_metadata, CAP_SYS_ADMIN);
+	ASSERT_LE(0, root_fd);
+
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	ASSERT_LE(0, ruleset_fd);
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+	ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+	EXPECT_EQ(0, close(ruleset_fd));
+
+	/* Link denied by Landlock: EACCES. */
+	EXPECT_EQ(-1, linkat(root_fd, ".", root_fd, "does_not_exist", 0));
+	EXPECT_EQ(EACCES, errno);
+
+	/* renameat2() always returns EBUSY. */
+	EXPECT_EQ(-1, renameat2(root_fd, ".", root_fd, "does_not_exist", 0));
+	EXPECT_EQ(EBUSY, errno);
+
+	EXPECT_EQ(0, close(root_fd));
+}
+
 TEST_F_FORK(layout1, reparent_link)
 {
 	const struct rule layer1[] = {
-- 
cgit v1.2.3


From 518549c120e671c4906f77d1802b97e9b23f673a Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Wed, 29 May 2024 18:16:56 -0500
Subject: cifs: fix creating sockets when using sfu mount options

When running fstest generic/423 with sfu mount option, it
was being skipped due to inability to create sockets:

  generic/423  [not run] cifs does not support mknod/mkfifo

which can also be easily reproduced with their af_unix tool:

  ./src/af_unix /mnt1/socket-two bind: Operation not permitted

Fix sfu mount option to allow creating and reporting sockets.

Cc: stable@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cifspdu.h | 2 +-
 fs/smb/client/inode.c   | 4 ++++
 fs/smb/client/smb2ops.c | 3 +++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index c46d418c1c0c..a2072ab9e586 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -2574,7 +2574,7 @@ typedef struct {
 
 
 struct win_dev {
-	unsigned char type[8]; /* IntxCHR or IntxBLK or LnxFIFO*/
+	unsigned char type[8]; /* IntxCHR or IntxBLK or LnxFIFO or LnxSOCK */
 	__le64 major;
 	__le64 minor;
 } __attribute__((packed));
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 262576573eb5..4a8aa1de9522 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -606,6 +606,10 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path,
 				mnr = le64_to_cpu(*(__le64 *)(pbuf+16));
 				fattr->cf_rdev = MKDEV(mjr, mnr);
 			}
+		} else if (memcmp("LnxSOCK", pbuf, 8) == 0) {
+			cifs_dbg(FYI, "Socket\n");
+			fattr->cf_mode |= S_IFSOCK;
+			fattr->cf_dtype = DT_SOCK;
 		} else if (memcmp("IntxLNK", pbuf, 7) == 0) {
 			cifs_dbg(FYI, "Symlink\n");
 			fattr->cf_mode |= S_IFLNK;
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 4ce6c3121a7e..c8e536540895 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -4997,6 +4997,9 @@ static int __cifs_sfu_make_node(unsigned int xid, struct inode *inode,
 		pdev.major = cpu_to_le64(MAJOR(dev));
 		pdev.minor = cpu_to_le64(MINOR(dev));
 		break;
+	case S_IFSOCK:
+		strscpy(pdev.type, "LnxSOCK");
+		break;
 	case S_IFIFO:
 		strscpy(pdev.type, "LnxFIFO");
 		break;
-- 
cgit v1.2.3


From adb77bba9c664f5d120e0ffb1387e9d7408e1529 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 9 Apr 2024 19:31:52 -0700
Subject: scsi: mpt3sas: Avoid possible run-time warning with long manufacturer
 strings

The prior strscpy() replacement of strncpy() here expected the
manufacture_reply strings to be NUL-terminated, but it is possible
they are not, as the code pattern here shows, e.g., edev->vendor_id
being exactly 1 character larger than manufacture_reply->vendor_id,
and the replaced strncpy() was copying only up to the size of the
source character array. Replace this with memtostr(), which is the
unambiguous way to convert a maybe not-NUL-terminated character array
into a NUL-terminated string.

Fixes: b7e9712a02e8 ("scsi: mpt3sas: Replace deprecated strncpy() with strscpy()")
Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Marco Patalano <mpatalan@redhat.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20240410023155.2100422-3-keescook@chromium.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 drivers/scsi/mpt3sas/mpt3sas_base.c      |  2 +-
 drivers/scsi/mpt3sas/mpt3sas_transport.c | 14 +++++---------
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 258647fc6bdd..1320e06727df 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -4774,7 +4774,7 @@ _base_display_ioc_capabilities(struct MPT3SAS_ADAPTER *ioc)
 	char desc[17] = {0};
 	u32 iounit_pg1_flags;
 
-	strscpy(desc, ioc->manu_pg0.ChipName, sizeof(desc));
+	memtostr(desc, ioc->manu_pg0.ChipName);
 	ioc_info(ioc, "%s: FWVersion(%02d.%02d.%02d.%02d), ChipRevision(0x%02x)\n",
 		 desc,
 		 (ioc->facts.FWVersion.Word & 0xFF000000) >> 24,
diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
index 76f9a9177198..d84413b77d84 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
@@ -458,17 +458,13 @@ _transport_expander_report_manufacture(struct MPT3SAS_ADAPTER *ioc,
 			goto out;
 
 		manufacture_reply = data_out + sizeof(struct rep_manu_request);
-		strscpy(edev->vendor_id, manufacture_reply->vendor_id,
-			sizeof(edev->vendor_id));
-		strscpy(edev->product_id, manufacture_reply->product_id,
-			sizeof(edev->product_id));
-		strscpy(edev->product_rev, manufacture_reply->product_rev,
-			sizeof(edev->product_rev));
+		memtostr(edev->vendor_id, manufacture_reply->vendor_id);
+		memtostr(edev->product_id, manufacture_reply->product_id);
+		memtostr(edev->product_rev, manufacture_reply->product_rev);
 		edev->level = manufacture_reply->sas_format & 1;
 		if (edev->level) {
-			strscpy(edev->component_vendor_id,
-				manufacture_reply->component_vendor_id,
-				sizeof(edev->component_vendor_id));
+			memtostr(edev->component_vendor_id,
+				 manufacture_reply->component_vendor_id);
 			tmp = (u8 *)&manufacture_reply->component_id;
 			edev->component_id = tmp[0] << 8 | tmp[1];
 			edev->component_revision_id =
-- 
cgit v1.2.3


From 4e173c825b1914d5b118bbf26f0168102d56ae95 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Fri, 31 May 2024 08:54:52 -0700
Subject: mailmap: update entry for Kees Cook

I'm tired of gmail breaking DKIM. Switch everything over to my
@kernel.org alias instead.

Signed-off-by: Kees Cook <kees@kernel.org>
---
 .mailmap | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.mailmap b/.mailmap
index 43cd2995dbc2..efd9fa867a8e 100644
--- a/.mailmap
+++ b/.mailmap
@@ -337,10 +337,11 @@ Kalyan Thota <quic_kalyant@quicinc.com> <kalyan_t@codeaurora.org>
 Karthikeyan Periyasamy <quic_periyasa@quicinc.com> <periyasa@codeaurora.org>
 Kathiravan T <quic_kathirav@quicinc.com> <kathirav@codeaurora.org>
 Kay Sievers <kay.sievers@vrfy.org>
-Kees Cook <keescook@chromium.org> <kees.cook@canonical.com>
-Kees Cook <keescook@chromium.org> <keescook@google.com>
-Kees Cook <keescook@chromium.org> <kees@outflux.net>
-Kees Cook <keescook@chromium.org> <kees@ubuntu.com>
+Kees Cook <kees@kernel.org> <kees.cook@canonical.com>
+Kees Cook <kees@kernel.org> <keescook@chromium.org>
+Kees Cook <kees@kernel.org> <keescook@google.com>
+Kees Cook <kees@kernel.org> <kees@outflux.net>
+Kees Cook <kees@kernel.org> <kees@ubuntu.com>
 Keith Busch <kbusch@kernel.org> <keith.busch@intel.com>
 Keith Busch <kbusch@kernel.org> <keith.busch@linux.intel.com>
 Kenneth W Chen <kenneth.w.chen@intel.com>
-- 
cgit v1.2.3


From d551ce15d08114514d489fad63bd275de2aca862 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 May 2024 14:02:35 +0200
Subject: mailbox: zynqmp-ipi: drop irq_to_desc() call

irq_to_desc() is not exported to loadable modules, so this driver now
fails to link in some configurations:

ERROR: modpost: "irq_to_desc" [drivers/mailbox/zynqmp-ipi-mailbox.ko] undefined!

I can't see a purpose for this call, since the return value is unused
and probably left over from some code refactoring.

Address the link failure by just removing the line.

Fixes: 6ffb1635341b ("mailbox: zynqmp: handle SGI for shared IPI")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Tanmay Shah <tanmay.shah@amd.com>
Signed-off-by: Jassi Brar <jassisinghbrar@gmail.com>
---
 drivers/mailbox/zynqmp-ipi-mailbox.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mailbox/zynqmp-ipi-mailbox.c b/drivers/mailbox/zynqmp-ipi-mailbox.c
index 7c90bac3de21..4acf5612487c 100644
--- a/drivers/mailbox/zynqmp-ipi-mailbox.c
+++ b/drivers/mailbox/zynqmp-ipi-mailbox.c
@@ -850,7 +850,6 @@ static int xlnx_mbox_init_sgi(struct platform_device *pdev,
 		return ret;
 	}
 
-	irq_to_desc(pdata->virq_sgi);
 	irq_set_status_flags(pdata->virq_sgi, IRQ_PER_CPU);
 
 	/* Setup function for the CPU hot-plug cases */
-- 
cgit v1.2.3


From 0c2f6d04619ec2b53ad4b0b591eafc9389786e86 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 30 May 2024 17:29:18 +0200
Subject: x86/topology/intel: Unlock CPUID before evaluating anything

Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If
this bit is set by the BIOS then CPUID evaluation including topology
enumeration does not work correctly as the evaluation code does not try
to analyze any leaf greater than two.

This went unnoticed before because the original topology code just
repeated evaluation several times and managed to overwrite the initial
limited information with the correct one later. The new evaluation code
does it once and therefore ends up with the limited and wrong
information.

Cure this by unlocking CPUID right before evaluating anything which
depends on the maximum CPUID leaf being greater than two instead of
rereading stuff after unlock.

Fixes: 22d63660c35e ("x86/cpu: Use common topology code for Intel")
Reported-by: Peter Schneider <pschneider1968@googlemail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Peter Schneider <pschneider1968@googlemail.com>
Cc: <stable@kernel.org>
Link: https://lore.kernel.org/r/fd3f73dc-a86f-4bcf-9c60-43556a21eb42@googlemail.com
---
 arch/x86/kernel/cpu/common.c |  3 ++-
 arch/x86/kernel/cpu/cpu.h    |  2 ++
 arch/x86/kernel/cpu/intel.c  | 25 ++++++++++++++++---------
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e31293c9609f..d4e539d4e158 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1589,6 +1589,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 	if (have_cpuid_p()) {
 		cpu_detect(c);
 		get_cpu_vendor(c);
+		intel_unlock_cpuid_leafs(c);
 		get_cpu_cap(c);
 		setup_force_cpu_cap(X86_FEATURE_CPUID);
 		get_cpu_address_sizes(c);
@@ -1748,7 +1749,7 @@ static void generic_identify(struct cpuinfo_x86 *c)
 	cpu_detect(c);
 
 	get_cpu_vendor(c);
-
+	intel_unlock_cpuid_leafs(c);
 	get_cpu_cap(c);
 
 	get_cpu_address_sizes(c);
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index ea9e07d57c8d..1beccefbaff9 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -61,9 +61,11 @@ extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
 
 extern void __init tsx_init(void);
 void tsx_ap_init(void);
+void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c);
 #else
 static inline void tsx_init(void) { }
 static inline void tsx_ap_init(void) { }
+static inline void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) { }
 #endif /* CONFIG_CPU_SUP_INTEL */
 
 extern void init_spectral_chicken(struct cpuinfo_x86 *c);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3c3e7e5695ba..fdf3489d92a4 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -269,19 +269,26 @@ detect_keyid_bits:
 	c->x86_phys_bits -= keyid_bits;
 }
 
+void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c)
+{
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return;
+
+	if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd))
+		return;
+
+	/*
+	 * The BIOS can have limited CPUID to leaf 2, which breaks feature
+	 * enumeration. Unlock it and update the maximum leaf info.
+	 */
+	if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0)
+		c->cpuid_level = cpuid_eax(0);
+}
+
 static void early_init_intel(struct cpuinfo_x86 *c)
 {
 	u64 misc_enable;
 
-	/* Unmask CPUID levels if masked: */
-	if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
-		if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
-				  MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) {
-			c->cpuid_level = cpuid_eax(0);
-			get_cpu_cap(c);
-		}
-	}
-
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
-- 
cgit v1.2.3


From 7dc3bfcb4c9cc58970fff6aaa48172cb224d85aa Mon Sep 17 00:00:00 2001
From: Chunguang Xu <chunguang.xu@shopee.com>
Date: Fri, 31 May 2024 17:24:21 +0800
Subject: nvme-fabrics: use reserved tag for reg read/write command

In some scenarios, if too many commands are issued by nvme command in
the same time by user tasks, this may exhaust all tags of admin_q. If
a reset (nvme reset or IO timeout) occurs before these commands finish,
reconnect routine may fail to update nvme regs due to insufficient tags,
which will cause kernel hang forever. In order to workaround this issue,
maybe we can let reg_read32()/reg_read64()/reg_write32() use reserved
tags. This maybe safe for nvmf:

1. For the disable ctrl path,  we will not issue connect command
2. For the enable ctrl / fw activate path, since connect and reg_xx()
   are called serially.

So the reserved tags may still be enough while reg_xx() use reserved tags.

Signed-off-by: Chunguang Xu <chunguang.xu@shopee.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/fabrics.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index c6ad2148c2e0..ceb9c0ed3120 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -180,7 +180,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
 	cmd.prop_get.offset = cpu_to_le32(off);
 
 	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
-			NVME_QID_ANY, 0);
+			NVME_QID_ANY, NVME_SUBMIT_RESERVED);
 
 	if (ret >= 0)
 		*val = le64_to_cpu(res.u64);
@@ -226,7 +226,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
 	cmd.prop_get.offset = cpu_to_le32(off);
 
 	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
-			NVME_QID_ANY, 0);
+			NVME_QID_ANY, NVME_SUBMIT_RESERVED);
 
 	if (ret >= 0)
 		*val = le64_to_cpu(res.u64);
@@ -271,7 +271,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
 	cmd.prop_set.value = cpu_to_le64(val);
 
 	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0,
-			NVME_QID_ANY, 0);
+			NVME_QID_ANY, NVME_SUBMIT_RESERVED);
 	if (unlikely(ret))
 		dev_err(ctrl->device,
 			"Property Set error: %d, offset %#x\n",
-- 
cgit v1.2.3


From 32c75ad4a79259609ee19f749832bc2d99bbdd13 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Fri, 31 May 2024 13:07:53 -0700
Subject: selftests/futex: don't redefine .PHONY targets (all, clean)

The .PHONY targets "all" and "clean"  are both already defined in the
file that is included in the very next line:

    ../lib.mk.

Remove this duplicate code.

Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/futex/Makefile | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 11e157d7533b..78ab2cd111f6 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -3,8 +3,6 @@ SUBDIRS := functional
 
 TEST_PROGS := run.sh
 
-.PHONY: all clean
-
 include ../lib.mk
 
 all:
-- 
cgit v1.2.3


From 4bf15b1c657d22d1d70173e43264e4606dfe75ff Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Fri, 31 May 2024 13:07:54 -0700
Subject: selftests/futex: don't pass a const char* to asprintf(3)

When building with clang, via:

    make LLVM=1 -C tools/testing/selftests

...clang issues this warning:

futex_requeue_pi.c:403:17: warning: passing 'const char **' to parameter
of type 'char **' discards qualifiers in nested pointer types
[-Wincompatible-pointer-types-discards-qualifiers]

This warning fires because test_name is passed into asprintf(3), which
then changes it.

Fix this by simply removing the const qualifier. This is a local
automatic variable in a very short function, so there is not much need
to use the compiler to enforce const-ness at this scope.

[1] https://lore.kernel.org/all/20240329-selftests-libmk-llvm-rfc-v1-1-2f9ed7d1c49f@valentinobst.de/

Fixes: f17d8a87ecb5 ("selftests: fuxex: Report a unique test name per run of futex_requeue_pi")
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/futex/functional/futex_requeue_pi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
index 7f3ca5c78df1..215c6cb539b4 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -360,7 +360,7 @@ out:
 
 int main(int argc, char *argv[])
 {
-	const char *test_name;
+	char *test_name;
 	int c, ret;
 
 	while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
-- 
cgit v1.2.3


From 99a6087dfdc65303d26ab5fba2dacd8931b82b08 Mon Sep 17 00:00:00 2001
From: Kees Cook <kees@kernel.org>
Date: Fri, 31 May 2024 11:57:07 -0700
Subject: kunit/fortify: Remove __kmalloc_node() test

__kmalloc_node() is considered an "internal" function to the Slab, so
drop it from explicit testing.

Link: https://lore.kernel.org/r/20240531185703.work.588-kees@kernel.org
Signed-off-by: Kees Cook <kees@kernel.org>
---
 lib/fortify_kunit.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c
index 39da5b3bc649..f9cc467334ce 100644
--- a/lib/fortify_kunit.c
+++ b/lib/fortify_kunit.c
@@ -235,9 +235,6 @@ static void fortify_test_alloc_size_##allocator##_dynamic(struct kunit *test) \
 		kmalloc_array_node(alloc_size, 1, gfp, NUMA_NO_NODE),	\
 		kfree(p));						\
 	checker(expected_size, __kmalloc(alloc_size, gfp),		\
-		kfree(p));						\
-	checker(expected_size,						\
-		__kmalloc_node(alloc_size, gfp, NUMA_NO_NODE),		\
 		kfree(p));						\
 									\
 	orig = kmalloc(alloc_size, gfp);				\
-- 
cgit v1.2.3


From b1a1fdd7096dd2d67911b07f8118ff113d815db4 Mon Sep 17 00:00:00 2001
From: Weiwen Hu <huweiwen@linux.alibaba.com>
Date: Thu, 30 May 2024 14:16:46 +0800
Subject: nvme: fix nvme_pr_* status code parsing

Fix the parsing if extra status bits (e.g. MORE) is present.

Fixes: 7fb42780d06c ("nvme: Convert NVMe errors to PR errors")
Signed-off-by: Weiwen Hu <huweiwen@linux.alibaba.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 drivers/nvme/host/pr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index e05571b2a1b0..8fa1ffcdaed4 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -77,7 +77,7 @@ static int nvme_sc_to_pr_err(int nvme_sc)
 	if (nvme_is_path_error(nvme_sc))
 		return PR_STS_PATH_FAILED;
 
-	switch (nvme_sc) {
+	switch (nvme_sc & 0x7ff) {
 	case NVME_SC_SUCCESS:
 		return PR_STS_SUCCESS;
 	case NVME_SC_RESERVATION_CONFLICT:
-- 
cgit v1.2.3


From 62da3acd28955e7299babebdfcb14243b789e773 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 28 May 2024 15:32:18 -0700
Subject: selftests/bpf: fix inet_csk_accept prototype in
 test_sk_storage_tracing.c

Recent kernel change ([0]) changed inet_csk_accept() prototype. Adapt
progs/test_sk_storage_tracing.c to take that into account.

  [0] 92ef0fd55ac8 ("net: change proto and proto_ops accept type")

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240528223218.3445297-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
index 02e718f06e0f..40531e56776e 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
@@ -84,7 +84,7 @@ int BPF_PROG(trace_tcp_connect, struct sock *sk)
 }
 
 SEC("fexit/inet_csk_accept")
-int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern,
+int BPF_PROG(inet_csk_accept, struct sock *sk, struct proto_accept_arg *arg,
 	     struct sock *accepted_sk)
 {
 	set_task_info(accepted_sk);
-- 
cgit v1.2.3


From aeb8fe0283d4d3b0f27a87c5f5c938e7324f7d8f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 31 May 2024 21:45:00 +0200
Subject: bpf: Fix bpf_session_cookie BTF_ID in special_kfunc_set list

The bpf_session_cookie is unavailable for !CONFIG_FPROBE as reported
by Sebastian [1].

To fix that we remove CONFIG_FPROBE ifdef for session kfuncs, which
is fine, because there's filter for session programs.

Then based on bpf_trace.o dependency:
  obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o

we add bpf_session_cookie BTF_ID in special_kfunc_set list dependency
on CONFIG_BPF_EVENTS.

[1] https://lore.kernel.org/bpf/20240531071557.MvfIqkn7@linutronix.de/T/#m71c6d5ec71db2967288cb79acedc15cc5dbfeec5
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Fixes: 5c919acef8514 ("bpf: Add support for kprobe session cookie")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20240531194500.2967187-1-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c    | 4 ++++
 kernel/trace/bpf_trace.c | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 48f3a9acdef3..36ef8e96787e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11128,7 +11128,11 @@ BTF_ID(func, bpf_iter_css_task_new)
 #else
 BTF_ID_UNUSED
 #endif
+#ifdef CONFIG_BPF_EVENTS
 BTF_ID(func, bpf_session_cookie)
+#else
+BTF_ID_UNUSED
+#endif
 
 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
 {
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 6249dac61701..d1daeab1bbc1 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3517,7 +3517,6 @@ static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
 }
 #endif /* CONFIG_UPROBES */
 
-#ifdef CONFIG_FPROBE
 __bpf_kfunc_start_defs();
 
 __bpf_kfunc bool bpf_session_is_return(void)
@@ -3566,4 +3565,3 @@ static int __init bpf_kprobe_multi_kfuncs_init(void)
 }
 
 late_initcall(bpf_kprobe_multi_kfuncs_init);
-#endif
-- 
cgit v1.2.3


From 7d0b3953f6d832daec10a0d76e2d4db405768a8b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 29 May 2024 16:12:12 -0700
Subject: libbpf: don't close(-1) in multi-uprobe feature detector

Guard close(link_fd) with extra link_fd >= 0 check to prevent close(-1).

Detected by Coverity static analysis.

Fixes: 04d939a2ab22 ("libbpf: detect broken PID filtering logic for multi-uprobe")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20240529231212.768828-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/features.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index 3df0125ed5fa..50befe125ddc 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -393,7 +393,8 @@ static int probe_uprobe_multi_link(int token_fd)
 	err = -errno; /* close() can clobber errno */
 
 	if (link_fd >= 0 || err != -EBADF) {
-		close(link_fd);
+		if (link_fd >= 0)
+			close(link_fd);
 		close(prog_fd);
 		return 0;
 	}
-- 
cgit v1.2.3


From 7bc4244c882a7d7d79f4afefc50893244eb11d07 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 1 Jun 2024 07:28:21 +0200
Subject: Revert "VT: Use macros to define ioctls"

This reverts commit 8c467f3300591a206fa8dcc6988d768910799872.

Turns out this breaks many architectures as the vt ioctls do not all
match up everywhere due to historical reasons, so the original commit is
invalid for many values.

Reported-by: Nick Bowler <nbowler@draconx.ca>
Reported-by: Arnd Bergmann <arnd@kernel.org>
Reported-by: Jiri Slaby <jirislaby@kernel.org>
Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Alexey Gladkov <legion@kernel.org>
Link: https://lore.kernel.org/r/ad4e561c-1d49-4f25-882c-7a36c6b1b5c0@draconx.ca
Link: https://lore.kernel.org/r/0da9785e-ba44-4718-9d08-4e96c1ba7ab2@kernel.org
Link: https://lore.kernel.org/all/34d848f4-670b-4493-bf21-130ef862521b@xenosoft.de/
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/kd.h | 96 ++++++++++++++++++++++++-------------------------
 1 file changed, 47 insertions(+), 49 deletions(-)

diff --git a/include/uapi/linux/kd.h b/include/uapi/linux/kd.h
index 8ddb2219a84b..6b384065c013 100644
--- a/include/uapi/linux/kd.h
+++ b/include/uapi/linux/kd.h
@@ -5,61 +5,60 @@
 #include <linux/compiler.h>
 
 /* 0x4B is 'K', to avoid collision with termios and vt */
-#define KD_IOCTL_BASE	'K'
 
-#define GIO_FONT	_IO(KD_IOCTL_BASE, 0x60)	/* gets font in expanded form */
-#define PIO_FONT	_IO(KD_IOCTL_BASE, 0x61)	/* use font in expanded form */
+#define GIO_FONT	0x4B60	/* gets font in expanded form */
+#define PIO_FONT	0x4B61	/* use font in expanded form */
 
-#define GIO_FONTX	_IO(KD_IOCTL_BASE, 0x6B)	/* get font using struct consolefontdesc */
-#define PIO_FONTX	_IO(KD_IOCTL_BASE, 0x6C)	/* set font using struct consolefontdesc */
+#define GIO_FONTX	0x4B6B	/* get font using struct consolefontdesc */
+#define PIO_FONTX	0x4B6C	/* set font using struct consolefontdesc */
 struct consolefontdesc {
 	unsigned short charcount;	/* characters in font (256 or 512) */
 	unsigned short charheight;	/* scan lines per character (1-32) */
 	char __user *chardata;		/* font data in expanded form */
 };
 
-#define PIO_FONTRESET	_IO(KD_IOCTL_BASE, 0x6D)	/* reset to default font */
+#define PIO_FONTRESET   0x4B6D	/* reset to default font */
 
-#define GIO_CMAP	_IO(KD_IOCTL_BASE, 0x70)	/* gets colour palette on VGA+ */
-#define PIO_CMAP	_IO(KD_IOCTL_BASE, 0x71)	/* sets colour palette on VGA+ */
+#define GIO_CMAP	0x4B70	/* gets colour palette on VGA+ */
+#define PIO_CMAP	0x4B71	/* sets colour palette on VGA+ */
 
-#define KIOCSOUND	_IO(KD_IOCTL_BASE, 0x2F)	/* start sound generation (0 for off) */
-#define KDMKTONE	_IO(KD_IOCTL_BASE, 0x30)	/* generate tone */
+#define KIOCSOUND	0x4B2F	/* start sound generation (0 for off) */
+#define KDMKTONE	0x4B30	/* generate tone */
 
-#define KDGETLED	_IO(KD_IOCTL_BASE, 0x31)	/* return current led state */
-#define KDSETLED	_IO(KD_IOCTL_BASE, 0x32)	/* set led state [lights, not flags] */
+#define KDGETLED	0x4B31	/* return current led state */
+#define KDSETLED	0x4B32	/* set led state [lights, not flags] */
 #define 	LED_SCR		0x01	/* scroll lock led */
 #define 	LED_NUM		0x02	/* num lock led */
 #define 	LED_CAP		0x04	/* caps lock led */
 
-#define KDGKBTYPE	_IO(KD_IOCTL_BASE, 0x33)	/* get keyboard type */
+#define KDGKBTYPE	0x4B33	/* get keyboard type */
 #define 	KB_84		0x01
 #define 	KB_101		0x02 	/* this is what we always answer */
 #define 	KB_OTHER	0x03
 
-#define KDADDIO		_IO(KD_IOCTL_BASE, 0x34)	/* add i/o port as valid */
-#define KDDELIO		_IO(KD_IOCTL_BASE, 0x35)	/* del i/o port as valid */
-#define KDENABIO	_IO(KD_IOCTL_BASE, 0x36)	/* enable i/o to video board */
-#define KDDISABIO	_IO(KD_IOCTL_BASE, 0x37)	/* disable i/o to video board */
+#define KDADDIO		0x4B34	/* add i/o port as valid */
+#define KDDELIO		0x4B35	/* del i/o port as valid */
+#define KDENABIO	0x4B36	/* enable i/o to video board */
+#define KDDISABIO	0x4B37	/* disable i/o to video board */
 
-#define KDSETMODE	_IO(KD_IOCTL_BASE, 0x3A)	/* set text/graphics mode */
+#define KDSETMODE	0x4B3A	/* set text/graphics mode */
 #define		KD_TEXT		0x00
 #define		KD_GRAPHICS	0x01
 #define		KD_TEXT0	0x02	/* obsolete */
 #define		KD_TEXT1	0x03	/* obsolete */
-#define KDGETMODE	_IO(KD_IOCTL_BASE, 0x3B)	/* get current mode */
+#define KDGETMODE	0x4B3B	/* get current mode */
 
-#define KDMAPDISP	_IO(KD_IOCTL_BASE, 0x3C)	/* map display into address space */
-#define KDUNMAPDISP	_IO(KD_IOCTL_BASE, 0x3D)	/* unmap display from address space */
+#define KDMAPDISP	0x4B3C	/* map display into address space */
+#define KDUNMAPDISP	0x4B3D	/* unmap display from address space */
 
 typedef char scrnmap_t;
 #define		E_TABSZ		256
-#define GIO_SCRNMAP	_IO(KD_IOCTL_BASE, 0x40)	/* get screen mapping from kernel */
-#define PIO_SCRNMAP	_IO(KD_IOCTL_BASE, 0x41)	/* put screen mapping table in kernel */
-#define GIO_UNISCRNMAP	_IO(KD_IOCTL_BASE, 0x69)	/* get full Unicode screen mapping */
-#define PIO_UNISCRNMAP	_IO(KD_IOCTL_BASE, 0x6A)	/* set full Unicode screen mapping */
+#define GIO_SCRNMAP	0x4B40	/* get screen mapping from kernel */
+#define PIO_SCRNMAP	0x4B41	/* put screen mapping table in kernel */
+#define GIO_UNISCRNMAP  0x4B69	/* get full Unicode screen mapping */
+#define PIO_UNISCRNMAP  0x4B6A  /* set full Unicode screen mapping */
 
-#define GIO_UNIMAP	_IO(KD_IOCTL_BASE, 0x66)	/* get unicode-to-font mapping from kernel */
+#define GIO_UNIMAP	0x4B66	/* get unicode-to-font mapping from kernel */
 struct unipair {
 	unsigned short unicode;
 	unsigned short fontpos;
@@ -68,8 +67,8 @@ struct unimapdesc {
 	unsigned short entry_ct;
 	struct unipair __user *entries;
 };
-#define PIO_UNIMAP	_IO(KD_IOCTL_BASE, 0x67)	/* put unicode-to-font mapping in kernel */
-#define PIO_UNIMAPCLR	_IO(KD_IOCTL_BASE, 0x68)	/* clear table, possibly advise hash algorithm */
+#define PIO_UNIMAP	0x4B67	/* put unicode-to-font mapping in kernel */
+#define PIO_UNIMAPCLR	0x4B68	/* clear table, possibly advise hash algorithm */
 struct unimapinit {
 	unsigned short advised_hashsize;  /* 0 if no opinion */
 	unsigned short advised_hashstep;  /* 0 if no opinion */
@@ -84,19 +83,19 @@ struct unimapinit {
 #define		K_MEDIUMRAW	0x02
 #define		K_UNICODE	0x03
 #define		K_OFF		0x04
-#define KDGKBMODE	_IO(KD_IOCTL_BASE, 0x44)	/* gets current keyboard mode */
-#define KDSKBMODE	_IO(KD_IOCTL_BASE, 0x45)	/* sets current keyboard mode */
+#define KDGKBMODE	0x4B44	/* gets current keyboard mode */
+#define KDSKBMODE	0x4B45	/* sets current keyboard mode */
 
 #define		K_METABIT	0x03
 #define		K_ESCPREFIX	0x04
-#define KDGKBMETA	_IO(KD_IOCTL_BASE, 0x62)	/* gets meta key handling mode */
-#define KDSKBMETA	_IO(KD_IOCTL_BASE, 0x63)	/* sets meta key handling mode */
+#define KDGKBMETA	0x4B62	/* gets meta key handling mode */
+#define KDSKBMETA	0x4B63	/* sets meta key handling mode */
 
 #define		K_SCROLLLOCK	0x01
 #define		K_NUMLOCK	0x02
 #define		K_CAPSLOCK	0x04
-#define	KDGKBLED	_IO(KD_IOCTL_BASE, 0x64)	/* get led flags (not lights) */
-#define	KDSKBLED	_IO(KD_IOCTL_BASE, 0x65)	/* set led flags (not lights) */
+#define	KDGKBLED	0x4B64	/* get led flags (not lights) */
+#define	KDSKBLED	0x4B65	/* set led flags (not lights) */
 
 struct kbentry {
 	unsigned char kb_table;
@@ -108,15 +107,15 @@ struct kbentry {
 #define		K_ALTTAB	0x02
 #define		K_ALTSHIFTTAB	0x03
 
-#define KDGKBENT	_IO(KD_IOCTL_BASE, 0x46)	/* gets one entry in translation table */
-#define KDSKBENT	_IO(KD_IOCTL_BASE, 0x47)	/* sets one entry in translation table */
+#define KDGKBENT	0x4B46	/* gets one entry in translation table */
+#define KDSKBENT	0x4B47	/* sets one entry in translation table */
 
 struct kbsentry {
 	unsigned char kb_func;
 	unsigned char kb_string[512];
 };
-#define KDGKBSENT	_IO(KD_IOCTL_BASE, 0x48)	/* gets one function key string entry */
-#define KDSKBSENT	_IO(KD_IOCTL_BASE, 0x49)	/* sets one function key string entry */
+#define KDGKBSENT	0x4B48	/* gets one function key string entry */
+#define KDSKBSENT	0x4B49	/* sets one function key string entry */
 
 struct kbdiacr {
         unsigned char diacr, base, result;
@@ -125,8 +124,8 @@ struct kbdiacrs {
         unsigned int kb_cnt;    /* number of entries in following array */
 	struct kbdiacr kbdiacr[256];    /* MAX_DIACR from keyboard.h */
 };
-#define KDGKBDIACR	_IO(KD_IOCTL_BASE, 0x4A)  /* read kernel accent table */
-#define KDSKBDIACR	_IO(KD_IOCTL_BASE, 0x4B)  /* write kernel accent table */
+#define KDGKBDIACR      0x4B4A  /* read kernel accent table */
+#define KDSKBDIACR      0x4B4B  /* write kernel accent table */
 
 struct kbdiacruc {
 	unsigned int diacr, base, result;
@@ -135,16 +134,16 @@ struct kbdiacrsuc {
         unsigned int kb_cnt;    /* number of entries in following array */
 	struct kbdiacruc kbdiacruc[256];    /* MAX_DIACR from keyboard.h */
 };
-#define KDGKBDIACRUC	_IO(KD_IOCTL_BASE, 0xFA)  /* read kernel accent table - UCS */
-#define KDSKBDIACRUC	_IO(KD_IOCTL_BASE, 0xFB)  /* write kernel accent table - UCS */
+#define KDGKBDIACRUC    0x4BFA  /* read kernel accent table - UCS */
+#define KDSKBDIACRUC    0x4BFB  /* write kernel accent table - UCS */
 
 struct kbkeycode {
 	unsigned int scancode, keycode;
 };
-#define KDGETKEYCODE	_IO(KD_IOCTL_BASE, 0x4C)	/* read kernel keycode table entry */
-#define KDSETKEYCODE	_IO(KD_IOCTL_BASE, 0x4D)	/* write kernel keycode table entry */
+#define KDGETKEYCODE	0x4B4C	/* read kernel keycode table entry */
+#define KDSETKEYCODE	0x4B4D	/* write kernel keycode table entry */
 
-#define KDSIGACCEPT	_IO(KD_IOCTL_BASE, 0x4E)	/* accept kbd generated signals */
+#define KDSIGACCEPT	0x4B4E	/* accept kbd generated signals */
 
 struct kbd_repeat {
 	int delay;	/* in msec; <= 0: don't change */
@@ -152,11 +151,10 @@ struct kbd_repeat {
 			/* earlier this field was misnamed "rate" */
 };
 
-#define KDKBDREP	_IO(KD_IOCTL_BASE, 0x52)	/* set keyboard delay/repeat rate;
-							 * actually used values are returned
-							 */
+#define KDKBDREP        0x4B52  /* set keyboard delay/repeat rate;
+				 * actually used values are returned */
 
-#define KDFONTOP	_IO(KD_IOCTL_BASE, 0x72)	/* font operations */
+#define KDFONTOP	0x4B72	/* font operations */
 
 struct console_font_op {
 	unsigned int op;	/* operation code KD_FONT_OP_* */
-- 
cgit v1.2.3


From ebfb5e8fc8b45040b979f4bf0012a01d0abac8d4 Mon Sep 17 00:00:00 2001
From: Alexis Lothoré <alexis.lothore@bootlin.com>
Date: Tue, 28 May 2024 16:20:28 +0200
Subject: Revert "wifi: wilc1000: convert list management to RCU"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit f236464f1db7bea80075e6e31ac70dc6eb80547f

Commit f236464f1db7 ("wifi: wilc1000: convert list management to RCU")
replaced SRCU with RCU, aiming to simplify RCU usage in the driver. No
documentation or commit history hinted about why SRCU has been preferred
in original design, so it has been assumed to be safe to do this
conversion.
Unfortunately, some static analyzers raised warnings, confirmed by runtime
checker, not long after the merge. At least three different issues arose
when switching to RCU:
- wilc_wlan_txq_filter_dup_tcp_ack is executed in a RCU read critical
  section yet calls wait_for_completion_timeout
- wilc_wfi_init_mon_interface calls kmalloc and register_netdevice while
  manipulating a vif retrieved from vif list
- set_channel sends command to chip (and so, also waits for a completion)
  while holding a vif retrieved from vif list (so, in RCU read critical
  section)

Some of those issues are not trivial to fix and would need bigger driver
rework. Fix those issues by reverting the SRCU to RCU conversion commit

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/linux-wireless/3b46ec7c-baee-49fd-b760-3bc12fb12eaf@moroto.mountain/
Fixes: f236464f1db7 ("wifi: wilc1000: convert list management to RCU")
Signed-off-by: Alexis Lothoré <alexis.lothore@bootlin.com>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://msgid.link/20240528-wilc_revert_srcu_to_rcu-v1-1-bce096e0798c@bootlin.com
---
 drivers/net/wireless/microchip/wilc1000/cfg80211.c | 41 ++++++++++++---------
 drivers/net/wireless/microchip/wilc1000/hif.c      | 15 +++++---
 drivers/net/wireless/microchip/wilc1000/netdev.c   | 43 +++++++++++++---------
 drivers/net/wireless/microchip/wilc1000/netdev.h   |  5 ++-
 drivers/net/wireless/microchip/wilc1000/wlan.c     |  5 ++-
 5 files changed, 64 insertions(+), 45 deletions(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
index 7d9fb9f2d527..089102ed9ae5 100644
--- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c
+++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
@@ -237,11 +237,12 @@ static int set_channel(struct wiphy *wiphy,
 	struct wilc_vif *vif;
 	u32 channelnum;
 	int result;
+	int srcu_idx;
 
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wl->srcu);
 	vif = wilc_get_wl_to_vif(wl);
 	if (IS_ERR(vif)) {
-		rcu_read_unlock();
+		srcu_read_unlock(&wl->srcu, srcu_idx);
 		return PTR_ERR(vif);
 	}
 
@@ -252,7 +253,7 @@ static int set_channel(struct wiphy *wiphy,
 	if (result)
 		netdev_err(vif->ndev, "Error in setting channel\n");
 
-	rcu_read_unlock();
+	srcu_read_unlock(&wl->srcu, srcu_idx);
 	return result;
 }
 
@@ -805,8 +806,9 @@ static int set_wiphy_params(struct wiphy *wiphy, u32 changed)
 	struct wilc *wl = wiphy_priv(wiphy);
 	struct wilc_vif *vif;
 	struct wilc_priv *priv;
+	int srcu_idx;
 
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wl->srcu);
 	vif = wilc_get_wl_to_vif(wl);
 	if (IS_ERR(vif))
 		goto out;
@@ -861,7 +863,7 @@ static int set_wiphy_params(struct wiphy *wiphy, u32 changed)
 		netdev_err(priv->dev, "Error in setting WIPHY PARAMS\n");
 
 out:
-	rcu_read_unlock();
+	srcu_read_unlock(&wl->srcu, srcu_idx);
 	return ret;
 }
 
@@ -1537,19 +1539,20 @@ static struct wireless_dev *add_virtual_intf(struct wiphy *wiphy,
 
 	if (type == NL80211_IFTYPE_MONITOR) {
 		struct net_device *ndev;
+		int srcu_idx;
 
-		rcu_read_lock();
+		srcu_idx = srcu_read_lock(&wl->srcu);
 		vif = wilc_get_vif_from_type(wl, WILC_AP_MODE);
 		if (!vif) {
 			vif = wilc_get_vif_from_type(wl, WILC_GO_MODE);
 			if (!vif) {
-				rcu_read_unlock();
+				srcu_read_unlock(&wl->srcu, srcu_idx);
 				goto validate_interface;
 			}
 		}
 
 		if (vif->monitor_flag) {
-			rcu_read_unlock();
+			srcu_read_unlock(&wl->srcu, srcu_idx);
 			goto validate_interface;
 		}
 
@@ -1557,12 +1560,12 @@ static struct wireless_dev *add_virtual_intf(struct wiphy *wiphy,
 		if (ndev) {
 			vif->monitor_flag = 1;
 		} else {
-			rcu_read_unlock();
+			srcu_read_unlock(&wl->srcu, srcu_idx);
 			return ERR_PTR(-EINVAL);
 		}
 
 		wdev = &vif->priv.wdev;
-		rcu_read_unlock();
+		srcu_read_unlock(&wl->srcu, srcu_idx);
 		return wdev;
 	}
 
@@ -1610,7 +1613,7 @@ static int del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev)
 	list_del_rcu(&vif->list);
 	wl->vif_num--;
 	mutex_unlock(&wl->vif_mutex);
-	synchronize_rcu();
+	synchronize_srcu(&wl->srcu);
 	return 0;
 }
 
@@ -1635,23 +1638,25 @@ static void wilc_set_wakeup(struct wiphy *wiphy, bool enabled)
 {
 	struct wilc *wl = wiphy_priv(wiphy);
 	struct wilc_vif *vif;
+	int srcu_idx;
 
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wl->srcu);
 	vif = wilc_get_wl_to_vif(wl);
 	if (IS_ERR(vif)) {
-		rcu_read_unlock();
+		srcu_read_unlock(&wl->srcu, srcu_idx);
 		return;
 	}
 
 	netdev_info(vif->ndev, "cfg set wake up = %d\n", enabled);
 	wilc_set_wowlan_trigger(vif, enabled);
-	rcu_read_unlock();
+	srcu_read_unlock(&wl->srcu, srcu_idx);
 }
 
 static int set_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev,
 			enum nl80211_tx_power_setting type, int mbm)
 {
 	int ret;
+	int srcu_idx;
 	s32 tx_power = MBM_TO_DBM(mbm);
 	struct wilc *wl = wiphy_priv(wiphy);
 	struct wilc_vif *vif;
@@ -1659,10 +1664,10 @@ static int set_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev,
 	if (!wl->initialized)
 		return -EIO;
 
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wl->srcu);
 	vif = wilc_get_wl_to_vif(wl);
 	if (IS_ERR(vif)) {
-		rcu_read_unlock();
+		srcu_read_unlock(&wl->srcu, srcu_idx);
 		return -EINVAL;
 	}
 
@@ -1674,7 +1679,7 @@ static int set_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev,
 	ret = wilc_set_tx_power(vif, tx_power);
 	if (ret)
 		netdev_err(vif->ndev, "Failed to set tx power\n");
-	rcu_read_unlock();
+	srcu_read_unlock(&wl->srcu, srcu_idx);
 
 	return ret;
 }
@@ -1757,6 +1762,7 @@ static void wlan_init_locks(struct wilc *wl)
 	init_completion(&wl->cfg_event);
 	init_completion(&wl->sync_event);
 	init_completion(&wl->txq_thread_started);
+	init_srcu_struct(&wl->srcu);
 }
 
 void wlan_deinit_locks(struct wilc *wilc)
@@ -1767,6 +1773,7 @@ void wlan_deinit_locks(struct wilc *wilc)
 	mutex_destroy(&wilc->txq_add_to_head_cs);
 	mutex_destroy(&wilc->vif_mutex);
 	mutex_destroy(&wilc->deinit_lock);
+	cleanup_srcu_struct(&wilc->srcu);
 }
 
 int wilc_cfg80211_init(struct wilc **wilc, struct device *dev, int io_type,
diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c
index 919de6ffb821..3925ca653e80 100644
--- a/drivers/net/wireless/microchip/wilc1000/hif.c
+++ b/drivers/net/wireless/microchip/wilc1000/hif.c
@@ -1570,11 +1570,12 @@ void wilc_network_info_received(struct wilc *wilc, u8 *buffer, u32 length)
 	struct host_if_drv *hif_drv;
 	struct host_if_msg *msg;
 	struct wilc_vif *vif;
+	int srcu_idx;
 	int result;
 	int id;
 
 	id = get_unaligned_le32(&buffer[length - 4]);
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wilc->srcu);
 	vif = wilc_get_vif_from_idx(wilc, id);
 	if (!vif)
 		goto out;
@@ -1606,7 +1607,7 @@ void wilc_network_info_received(struct wilc *wilc, u8 *buffer, u32 length)
 		kfree(msg);
 	}
 out:
-	rcu_read_unlock();
+	srcu_read_unlock(&wilc->srcu, srcu_idx);
 }
 
 void wilc_gnrl_async_info_received(struct wilc *wilc, u8 *buffer, u32 length)
@@ -1614,13 +1615,14 @@ void wilc_gnrl_async_info_received(struct wilc *wilc, u8 *buffer, u32 length)
 	struct host_if_drv *hif_drv;
 	struct host_if_msg *msg;
 	struct wilc_vif *vif;
+	int srcu_idx;
 	int result;
 	int id;
 
 	mutex_lock(&wilc->deinit_lock);
 
 	id = get_unaligned_le32(&buffer[length - 4]);
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wilc->srcu);
 	vif = wilc_get_vif_from_idx(wilc, id);
 	if (!vif)
 		goto out;
@@ -1647,7 +1649,7 @@ void wilc_gnrl_async_info_received(struct wilc *wilc, u8 *buffer, u32 length)
 		kfree(msg);
 	}
 out:
-	rcu_read_unlock();
+	srcu_read_unlock(&wilc->srcu, srcu_idx);
 	mutex_unlock(&wilc->deinit_lock);
 }
 
@@ -1655,11 +1657,12 @@ void wilc_scan_complete_received(struct wilc *wilc, u8 *buffer, u32 length)
 {
 	struct host_if_drv *hif_drv;
 	struct wilc_vif *vif;
+	int srcu_idx;
 	int result;
 	int id;
 
 	id = get_unaligned_le32(&buffer[length - 4]);
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wilc->srcu);
 	vif = wilc_get_vif_from_idx(wilc, id);
 	if (!vif)
 		goto out;
@@ -1684,7 +1687,7 @@ void wilc_scan_complete_received(struct wilc *wilc, u8 *buffer, u32 length)
 		}
 	}
 out:
-	rcu_read_unlock();
+	srcu_read_unlock(&wilc->srcu, srcu_idx);
 }
 
 int wilc_remain_on_channel(struct wilc_vif *vif, u64 cookie, u16 chan,
diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c
index 73f56f7b002b..710e29bea560 100644
--- a/drivers/net/wireless/microchip/wilc1000/netdev.c
+++ b/drivers/net/wireless/microchip/wilc1000/netdev.c
@@ -127,28 +127,30 @@ void wilc_wlan_set_bssid(struct net_device *wilc_netdev, const u8 *bssid,
 
 int wilc_wlan_get_num_conn_ifcs(struct wilc *wilc)
 {
+	int srcu_idx;
 	u8 ret_val = 0;
 	struct wilc_vif *vif;
 
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wilc->srcu);
 	wilc_for_each_vif(wilc, vif) {
 		if (!is_zero_ether_addr(vif->bssid))
 			ret_val++;
 	}
-	rcu_read_unlock();
+	srcu_read_unlock(&wilc->srcu, srcu_idx);
 	return ret_val;
 }
 
 static void wilc_wake_tx_queues(struct wilc *wl)
 {
+	int srcu_idx;
 	struct wilc_vif *ifc;
 
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wl->srcu);
 	wilc_for_each_vif(wl, ifc) {
 		if (ifc->mac_opened && netif_queue_stopped(ifc->ndev))
 			netif_wake_queue(ifc->ndev);
 	}
-	rcu_read_unlock();
+	srcu_read_unlock(&wl->srcu, srcu_idx);
 }
 
 static int wilc_txq_task(void *vp)
@@ -653,6 +655,7 @@ static int wilc_set_mac_addr(struct net_device *dev, void *p)
 	struct sockaddr *addr = (struct sockaddr *)p;
 	unsigned char mac_addr[ETH_ALEN];
 	struct wilc_vif *tmp_vif;
+	int srcu_idx;
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
@@ -664,19 +667,19 @@ static int wilc_set_mac_addr(struct net_device *dev, void *p)
 
 	/* Verify MAC Address is not already in use: */
 
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wilc->srcu);
 	wilc_for_each_vif(wilc, tmp_vif) {
 		wilc_get_mac_address(tmp_vif, mac_addr);
 		if (ether_addr_equal(addr->sa_data, mac_addr)) {
 			if (vif != tmp_vif) {
-				rcu_read_unlock();
+				srcu_read_unlock(&wilc->srcu, srcu_idx);
 				return -EADDRNOTAVAIL;
 			}
-			rcu_read_unlock();
+			srcu_read_unlock(&wilc->srcu, srcu_idx);
 			return 0;
 		}
 	}
-	rcu_read_unlock();
+	srcu_read_unlock(&wilc->srcu, srcu_idx);
 
 	result = wilc_set_mac_address(vif, (u8 *)addr->sa_data);
 	if (result)
@@ -764,14 +767,15 @@ netdev_tx_t wilc_mac_xmit(struct sk_buff *skb, struct net_device *ndev)
 						wilc_tx_complete);
 
 	if (queue_count > FLOW_CONTROL_UPPER_THRESHOLD) {
+		int srcu_idx;
 		struct wilc_vif *vif;
 
-		rcu_read_lock();
+		srcu_idx = srcu_read_lock(&wilc->srcu);
 		wilc_for_each_vif(wilc, vif) {
 			if (vif->mac_opened)
 				netif_stop_queue(vif->ndev);
 		}
-		rcu_read_unlock();
+		srcu_read_unlock(&wilc->srcu, srcu_idx);
 	}
 
 	return NETDEV_TX_OK;
@@ -815,12 +819,13 @@ void wilc_frmw_to_host(struct wilc *wilc, u8 *buff, u32 size,
 	unsigned int frame_len = 0;
 	struct wilc_vif *vif;
 	struct sk_buff *skb;
+	int srcu_idx;
 	int stats;
 
 	if (!wilc)
 		return;
 
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wilc->srcu);
 	wilc_netdev = get_if_handler(wilc, buff);
 	if (!wilc_netdev)
 		goto out;
@@ -848,14 +853,15 @@ void wilc_frmw_to_host(struct wilc *wilc, u8 *buff, u32 size,
 		netdev_dbg(wilc_netdev, "netif_rx ret value is: %d\n", stats);
 	}
 out:
-	rcu_read_unlock();
+	srcu_read_unlock(&wilc->srcu, srcu_idx);
 }
 
 void wilc_wfi_mgmt_rx(struct wilc *wilc, u8 *buff, u32 size, bool is_auth)
 {
+	int srcu_idx;
 	struct wilc_vif *vif;
 
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wilc->srcu);
 	wilc_for_each_vif(wilc, vif) {
 		struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buff;
 		u16 type = le16_to_cpup((__le16 *)buff);
@@ -876,7 +882,7 @@ void wilc_wfi_mgmt_rx(struct wilc *wilc, u8 *buff, u32 size, bool is_auth)
 		if (vif->monitor_flag)
 			wilc_wfi_monitor_rx(wilc->monitor_dev, buff, size);
 	}
-	rcu_read_unlock();
+	srcu_read_unlock(&wilc->srcu, srcu_idx);
 }
 
 static const struct net_device_ops wilc_netdev_ops = {
@@ -906,7 +912,7 @@ void wilc_netdev_cleanup(struct wilc *wilc)
 		list_del_rcu(&vif->list);
 		wilc->vif_num--;
 		mutex_unlock(&wilc->vif_mutex);
-		synchronize_rcu();
+		synchronize_srcu(&wilc->srcu);
 		if (vif->ndev)
 			unregister_netdev(vif->ndev);
 	}
@@ -925,15 +931,16 @@ static u8 wilc_get_available_idx(struct wilc *wl)
 {
 	int idx = 0;
 	struct wilc_vif *vif;
+	int srcu_idx;
 
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wl->srcu);
 	wilc_for_each_vif(wl, vif) {
 		if (vif->idx == 0)
 			idx = 1;
 		else
 			idx = 0;
 	}
-	rcu_read_unlock();
+	srcu_read_unlock(&wl->srcu, srcu_idx);
 	return idx;
 }
 
@@ -983,7 +990,7 @@ struct wilc_vif *wilc_netdev_ifc_init(struct wilc *wl, const char *name,
 	list_add_tail_rcu(&vif->list, &wl->vif_list);
 	wl->vif_num += 1;
 	mutex_unlock(&wl->vif_mutex);
-	synchronize_rcu();
+	synchronize_srcu(&wl->srcu);
 
 	return vif;
 
diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.h b/drivers/net/wireless/microchip/wilc1000/netdev.h
index eecee3973d6a..5937d6d45695 100644
--- a/drivers/net/wireless/microchip/wilc1000/netdev.h
+++ b/drivers/net/wireless/microchip/wilc1000/netdev.h
@@ -32,8 +32,8 @@
 
 #define wilc_for_each_vif(w, v) \
 	struct wilc *_w = w; \
-	list_for_each_entry_rcu(v, &_w->vif_list, list, \
-				 rcu_read_lock_held())
+	list_for_each_entry_srcu(v, &_w->vif_list, list, \
+				 srcu_read_lock_held(&_w->srcu))
 
 struct wilc_wfi_stats {
 	unsigned long rx_packets;
@@ -220,6 +220,7 @@ struct wilc {
 
 	/* protect vif list */
 	struct mutex vif_mutex;
+	struct srcu_struct srcu;
 	u8 open_ifcs;
 
 	/* protect head of transmit queue */
diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c
index 37c32d17856e..a9e872a7b2c3 100644
--- a/drivers/net/wireless/microchip/wilc1000/wlan.c
+++ b/drivers/net/wireless/microchip/wilc1000/wlan.c
@@ -712,6 +712,7 @@ int wilc_wlan_handle_txq(struct wilc *wilc, u32 *txq_count)
 	u32 *vmm_table = wilc->vmm_table;
 	u8 ac_pkt_num_to_chip[NQUEUES] = {0, 0, 0, 0};
 	const struct wilc_hif_func *func;
+	int srcu_idx;
 	u8 *txb = wilc->tx_buffer;
 	struct wilc_vif *vif;
 
@@ -723,10 +724,10 @@ int wilc_wlan_handle_txq(struct wilc *wilc, u32 *txq_count)
 
 	mutex_lock(&wilc->txq_add_to_head_cs);
 
-	rcu_read_lock();
+	srcu_idx = srcu_read_lock(&wilc->srcu);
 	wilc_for_each_vif(wilc, vif)
 		wilc_wlan_txq_filter_dup_tcp_ack(vif->ndev);
-	rcu_read_unlock();
+	srcu_read_unlock(&wilc->srcu, srcu_idx);
 
 	for (ac = 0; ac < NQUEUES; ac++)
 		tqe_q[ac] = wilc_wlan_txq_get_first(wilc, ac);
-- 
cgit v1.2.3


From 3596717a6fbd54c64e97e085a9f77ed511ff59f9 Mon Sep 17 00:00:00 2001
From: Alexis Lothoré <alexis.lothore@bootlin.com>
Date: Tue, 28 May 2024 16:20:29 +0200
Subject: Revert "wifi: wilc1000: set atomic flag on kmemdup in srcu critical
 section"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 35aee01ff43d7eb6c2caa0b94e7cc6c45baeeab7

Commit 35aee01ff43d ("wifi: wilc1000: set atomic flag on kmemdup in srcu
critical section") was preparatory to the SRCU to RCU conversion done by
commit f236464f1db7 ("wifi: wilc1000: convert list management to RCU").
This conversion brought issues and so has been reverted, so the atomic flag
is not needed anymore.

Signed-off-by: Alexis Lothoré <alexis.lothore@bootlin.com>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://msgid.link/20240528-wilc_revert_srcu_to_rcu-v1-2-bce096e0798c@bootlin.com
---
 drivers/net/wireless/microchip/wilc1000/hif.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c
index 3925ca653e80..f1085ccb7eed 100644
--- a/drivers/net/wireless/microchip/wilc1000/hif.c
+++ b/drivers/net/wireless/microchip/wilc1000/hif.c
@@ -1594,7 +1594,7 @@ void wilc_network_info_received(struct wilc *wilc, u8 *buffer, u32 length)
 	msg->body.net_info.rssi = buffer[8];
 	msg->body.net_info.mgmt = kmemdup(&buffer[9],
 					  msg->body.net_info.frame_len,
-					  GFP_ATOMIC);
+					  GFP_KERNEL);
 	if (!msg->body.net_info.mgmt) {
 		kfree(msg);
 		goto out;
-- 
cgit v1.2.3


From 596c195680dceb34e6b994ff5571331d5dba8299 Mon Sep 17 00:00:00 2001
From: Alexis Lothoré <alexis.lothore@bootlin.com>
Date: Tue, 28 May 2024 16:20:30 +0200
Subject: wifi: wilc1000: document SRCU usage instead of SRCU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit f236464f1db7 ("wifi: wilc1000: convert list management to RCU")
attempted to convert SRCU to RCU usage, assuming it was not really needed.
The runtime issues that arose after merging it showed that there are code
paths involving sleeping functions, and removing those would need some
heavier driver rework.

Add some documentation about SRCU need to make sure that any future
developer do not miss some use cases if tempted to convert back again to
RCU.

Signed-off-by: Alexis Lothoré <alexis.lothore@bootlin.com>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://msgid.link/20240528-wilc_revert_srcu_to_rcu-v1-3-bce096e0798c@bootlin.com
---
 drivers/net/wireless/microchip/wilc1000/netdev.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.h b/drivers/net/wireless/microchip/wilc1000/netdev.h
index 5937d6d45695..fde8610a9c84 100644
--- a/drivers/net/wireless/microchip/wilc1000/netdev.h
+++ b/drivers/net/wireless/microchip/wilc1000/netdev.h
@@ -220,6 +220,13 @@ struct wilc {
 
 	/* protect vif list */
 	struct mutex vif_mutex;
+	/* Sleepable RCU struct to manipulate vif list. Sleepable version is
+	 * needed over the classic RCU version because the driver's current
+	 * design involves some sleeping code while manipulating a vif
+	 * retrieved from vif list (so in a SRCU critical section), like:
+	 * - sending commands to the chip, using info from retrieved vif
+	 * - registering a new monitoring net device
+	 */
 	struct srcu_struct srcu;
 	u8 open_ifcs;
 
-- 
cgit v1.2.3


From 40cecacabc460f5074398753feb9ed7d43e8dfa6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 28 May 2024 14:23:08 +0200
Subject: wifi: mt76: mt7615: add missing chanctx ops

Here's another one I missed during the initial conversion,
fix that.

Cc: stable@vger.kernel.org
Reported-by: Rene Petersen <renepetersen@posteo.de>
Fixes: 0a44dfc07074 ("wifi: mac80211: simplify non-chanctx drivers")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218895
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://msgid.link/20240528142308.3f7db1821e68.I531135d7ad76331a50244d6d5288e14aa9668390@changeid
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 0971c164b57e..c27acaf0eb1c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -1326,6 +1326,10 @@ static void mt7615_set_rekey_data(struct ieee80211_hw *hw,
 #endif /* CONFIG_PM */
 
 const struct ieee80211_ops mt7615_ops = {
+	.add_chanctx = ieee80211_emulate_add_chanctx,
+	.remove_chanctx = ieee80211_emulate_remove_chanctx,
+	.change_chanctx = ieee80211_emulate_change_chanctx,
+	.switch_vif_chanctx = ieee80211_emulate_switch_vif_chanctx,
 	.tx = mt7615_tx,
 	.start = mt7615_start,
 	.stop = mt7615_stop,
-- 
cgit v1.2.3


From 819bda58e77bb67974f94dc1aa11b0556b6f6889 Mon Sep 17 00:00:00 2001
From: Bitterblue Smith <rtl8821cerfe2@gmail.com>
Date: Wed, 29 May 2024 20:19:47 +0300
Subject: wifi: rtlwifi: Ignore IEEE80211_CONF_CHANGE_RETRY_LIMITS

Since commit 0a44dfc07074 ("wifi: mac80211: simplify non-chanctx
drivers") ieee80211_hw_config() is no longer called with changed = ~0.
rtlwifi relied on ~0 in order to ignore the default retry limits of
4/7, preferring 48/48 in station mode and 7/7 in AP/IBSS.

RTL8192DU has a lot of packet loss with the default limits from
mac80211. Fix it by ignoring IEEE80211_CONF_CHANGE_RETRY_LIMITS
completely, because it's the simplest solution.

Link: https://lore.kernel.org/linux-wireless/cedd13d7691f4692b2a2fa5a24d44a22@realtek.com/
Cc: stable@vger.kernel.org # 6.9.x
Signed-off-by: Bitterblue Smith <rtl8821cerfe2@gmail.com>
Acked-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@kernel.org>
Link: https://msgid.link/1fabb8e4-adf3-47ae-8462-8aea963bc2a5@gmail.com
---
 drivers/net/wireless/realtek/rtlwifi/core.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index 2e60a6991ca1..42b7db12b1bd 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -633,21 +633,6 @@ static int rtl_op_config(struct ieee80211_hw *hw, u32 changed)
 		}
 	}
 
-	if (changed & IEEE80211_CONF_CHANGE_RETRY_LIMITS) {
-		rtl_dbg(rtlpriv, COMP_MAC80211, DBG_LOUD,
-			"IEEE80211_CONF_CHANGE_RETRY_LIMITS %x\n",
-			hw->conf.long_frame_max_tx_count);
-		/* brought up everything changes (changed == ~0) indicates first
-		 * open, so use our default value instead of that of wiphy.
-		 */
-		if (changed != ~0) {
-			mac->retry_long = hw->conf.long_frame_max_tx_count;
-			mac->retry_short = hw->conf.long_frame_max_tx_count;
-			rtlpriv->cfg->ops->set_hw_reg(hw, HW_VAR_RETRY_LIMIT,
-				(u8 *)(&hw->conf.long_frame_max_tx_count));
-		}
-	}
-
 	if (changed & IEEE80211_CONF_CHANGE_CHANNEL &&
 	    !rtlpriv->proximity.proxim_on) {
 		struct ieee80211_channel *channel = hw->conf.chandef.chan;
-- 
cgit v1.2.3


From 5fc16fa5f13b3c06fdb959ef262050bd810416a2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 1 Jun 2024 12:25:35 -0600
Subject: io_uring: check for non-NULL file pointer in io_file_can_poll()

In earlier kernels, it was possible to trigger a NULL pointer
dereference off the forced async preparation path, if no file had
been assigned. The trace leading to that looks as follows:

BUG: kernel NULL pointer dereference, address: 00000000000000b0
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP
CPU: 67 PID: 1633 Comm: buf-ring-invali Not tainted 6.8.0-rc3+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS unknown 2/2/2022
RIP: 0010:io_buffer_select+0xc3/0x210
Code: 00 00 48 39 d1 0f 82 ae 00 00 00 48 81 4b 48 00 00 01 00 48 89 73 70 0f b7 50 0c 66 89 53 42 85 ed 0f 85 d2 00 00 00 48 8b 13 <48> 8b 92 b0 00 00 00 48 83 7a 40 00 0f 84 21 01 00 00 4c 8b 20 5b
RSP: 0018:ffffb7bec38c7d88 EFLAGS: 00010246
RAX: ffff97af2be61000 RBX: ffff97af234f1700 RCX: 0000000000000040
RDX: 0000000000000000 RSI: ffff97aecfb04820 RDI: ffff97af234f1700
RBP: 0000000000000000 R08: 0000000000200030 R09: 0000000000000020
R10: ffffb7bec38c7dc8 R11: 000000000000c000 R12: ffffb7bec38c7db8
R13: ffff97aecfb05800 R14: ffff97aecfb05800 R15: ffff97af2be5e000
FS:  00007f852f74b740(0000) GS:ffff97b1eeec0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000000000b0 CR3: 000000016deab005 CR4: 0000000000370ef0
Call Trace:
 <TASK>
 ? __die+0x1f/0x60
 ? page_fault_oops+0x14d/0x420
 ? do_user_addr_fault+0x61/0x6a0
 ? exc_page_fault+0x6c/0x150
 ? asm_exc_page_fault+0x22/0x30
 ? io_buffer_select+0xc3/0x210
 __io_import_iovec+0xb5/0x120
 io_readv_prep_async+0x36/0x70
 io_queue_sqe_fallback+0x20/0x260
 io_submit_sqes+0x314/0x630
 __do_sys_io_uring_enter+0x339/0xbc0
 ? __do_sys_io_uring_register+0x11b/0xc50
 ? vm_mmap_pgoff+0xce/0x160
 do_syscall_64+0x5f/0x180
 entry_SYSCALL_64_after_hwframe+0x46/0x4e
RIP: 0033:0x55e0a110a67e
Code: ba cc 00 00 00 45 31 c0 44 0f b6 92 d0 00 00 00 31 d2 41 b9 08 00 00 00 41 83 e2 01 41 c1 e2 04 41 09 c2 b8 aa 01 00 00 0f 05 <c3> 90 89 30 eb a9 0f 1f 40 00 48 8b 42 20 8b 00 a8 06 75 af 85 f6

because the request is marked forced ASYNC and has a bad file fd, and
hence takes the forced async prep path.

Current kernels with the request async prep cleaned up can no longer hit
this issue, but for ease of backporting, let's add this safety check in
here too as it really doesn't hurt. For both cases, this will inevitably
end with a CQE posted with -EBADF.

Cc: stable@vger.kernel.org
Fixes: a76c0b31eef5 ("io_uring: commit non-pollable provided mapped buffers upfront")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 624ca9076a50..726e6367af4d 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -433,7 +433,7 @@ static inline bool io_file_can_poll(struct io_kiocb *req)
 {
 	if (req->flags & REQ_F_CAN_POLL)
 		return true;
-	if (file_can_poll(req->file)) {
+	if (req->file && file_can_poll(req->file)) {
 		req->flags |= REQ_F_CAN_POLL;
 		return true;
 	}
-- 
cgit v1.2.3


From 30636258a7c9174be44ddb2318bae4f66d4beab0 Mon Sep 17 00:00:00 2001
From: Heng Qi <hengqi@linux.alibaba.com>
Date: Thu, 30 May 2024 11:41:43 +0800
Subject: virtio_net: fix missing lock protection on control_buf access

Refactored the handling of control_buf to be within the cvq_lock
critical section, mitigating race conditions between reading device
responses and new command submissions.

Fixes: 6f45ab3e0409 ("virtio_net: Add a lock for the command VQ.")
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Hariprasad Kelam <hkelam@marvell.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240530034143.19579-1-hengqi@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/virtio_net.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 4a802c0ea2cb..1ea8e6a24286 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2686,6 +2686,7 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd
 {
 	struct scatterlist *sgs[5], hdr, stat;
 	u32 out_num = 0, tmp, in_num = 0;
+	bool ok;
 	int ret;
 
 	/* Caller should know better */
@@ -2731,8 +2732,9 @@ static bool virtnet_send_command_reply(struct virtnet_info *vi, u8 class, u8 cmd
 	}
 
 unlock:
+	ok = vi->ctrl->status == VIRTIO_NET_OK;
 	mutex_unlock(&vi->cvq_lock);
-	return vi->ctrl->status == VIRTIO_NET_OK;
+	return ok;
 }
 
 static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
-- 
cgit v1.2.3


From 7679935b8bdf8d2bc298b04ed579d1ed78e81907 Mon Sep 17 00:00:00 2001
From: Peter Geis <pgwipeout@gmail.com>
Date: Wed, 29 May 2024 14:56:35 -0400
Subject: MAINTAINERS: remove Peter Geis

The Motorcomm PHY driver is now maintained by the OEM. The driver has
expanded far beyond my original purpose, and I do not have the hardware
to test against the new portions of it. Therefore I am removing myself as
a maintainer of the driver.

Signed-off-by: Peter Geis <pgwipeout@gmail.com>
Link: https://lore.kernel.org/r/20240529185635.538072-1-pgwipeout@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index ba231d2f3a4e..f7b8025e3c27 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15239,7 +15239,6 @@ F:	drivers/staging/most/
 F:	include/linux/most.h
 
 MOTORCOMM PHY DRIVER
-M:	Peter Geis <pgwipeout@gmail.com>
 M:	Frank <Frank.Sae@motor-comm.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
-- 
cgit v1.2.3


From 89e281ebff72e6d37dce2df0e142b2909dafb267 Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vadfed@meta.com>
Date: Wed, 29 May 2024 21:08:14 -0700
Subject: ethtool: init tsinfo stats if requested

Statistic values should be set to ETHTOOL_STAT_NOT_SET even if the
device doesn't support statistics. Otherwise zeros will be returned as
if they are proper values:

host# ethtool -I -T lo
Time stamping parameters for lo:
Capabilities:
	software-transmit
	software-receive
	software-system-clock
PTP Hardware Clock: none
Hardware Transmit Timestamp Modes: none
Hardware Receive Filter Modes: none
Statistics:
  tx_pkts: 0
  tx_lost: 0
  tx_err: 0

Fixes: 0e9c127729be ("ethtool: add interface to read Tx hardware timestamping statistics")
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
Reviewed-by: Rahul Rameshbabu <rrameshbabu@nvidia.com>
Link: https://lore.kernel.org/r/20240530040814.1014446-1-vadfed@meta.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ethtool/tsinfo.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c
index be2755c8d8fd..57d496287e52 100644
--- a/net/ethtool/tsinfo.c
+++ b/net/ethtool/tsinfo.c
@@ -38,11 +38,11 @@ static int tsinfo_prepare_data(const struct ethnl_req_info *req_base,
 	ret = ethnl_ops_begin(dev);
 	if (ret < 0)
 		return ret;
-	if (req_base->flags & ETHTOOL_FLAG_STATS &&
-	    dev->ethtool_ops->get_ts_stats) {
+	if (req_base->flags & ETHTOOL_FLAG_STATS) {
 		ethtool_stats_init((u64 *)&data->stats,
 				   sizeof(data->stats) / sizeof(u64));
-		dev->ethtool_ops->get_ts_stats(dev, &data->stats);
+		if (dev->ethtool_ops->get_ts_stats)
+			dev->ethtool_ops->get_ts_stats(dev, &data->stats);
 	}
 	ret = __ethtool_get_ts_info(dev, &data->ts_info);
 	ethnl_ops_complete(dev);
-- 
cgit v1.2.3


From 9e0945b1901c9eed4fbee3b8a3870487b2bdc936 Mon Sep 17 00:00:00 2001
From: Heng Qi <hengqi@linux.alibaba.com>
Date: Tue, 28 May 2024 21:41:15 +0800
Subject: virtio_net: fix possible dim status unrecoverable

When the dim worker is scheduled, if it no longer needs to issue
commands, dim may not be able to return to the working state later.

For example, the following single queue scenario:
  1. The dim worker of rxq0 is scheduled, and the dim status is
     changed to DIM_APPLY_NEW_PROFILE;
  2. dim is disabled or parameters have not been modified;
  3. virtnet_rx_dim_work exits directly;

Then, even if net_dim is invoked again, it cannot work because the
state is not restored to DIM_START_MEASURE.

Fixes: 6208799553a8 ("virtio-net: support rx netdim")
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Link: https://lore.kernel.org/r/20240528134116.117426-2-hengqi@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/virtio_net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1ea8e6a24286..774a292a6090 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -4419,9 +4419,9 @@ static void virtnet_rx_dim_work(struct work_struct *work)
 		if (err)
 			pr_debug("%s: Failed to send dim parameters on rxq%d\n",
 				 dev->name, qnum);
-		dim->state = DIM_START_MEASURE;
 	}
 out:
+	dim->state = DIM_START_MEASURE;
 	mutex_unlock(&rq->dim_lock);
 }
 
-- 
cgit v1.2.3


From d1f0bd01bc58f35b5353ad9dbe5f7249a8f3368e Mon Sep 17 00:00:00 2001
From: Heng Qi <hengqi@linux.alibaba.com>
Date: Tue, 28 May 2024 21:41:16 +0800
Subject: virtio_net: fix a spurious deadlock issue

When the following snippet is run, lockdep will report a deadlock[1].

  /* Acquire all queues dim_locks */
  for (i = 0; i < vi->max_queue_pairs; i++)
          mutex_lock(&vi->rq[i].dim_lock);

There's no deadlock here because the vq locks are always taken
in the same order, but lockdep can not figure it out. So refactoring
the code to alleviate the problem.

[1]
========================================================
WARNING: possible recursive locking detected
6.9.0-rc7+ #319 Not tainted
--------------------------------------------
ethtool/962 is trying to acquire lock:

but task is already holding lock:

other info that might help us debug this:
Possible unsafe locking scenario:

      CPU0
      ----
 lock(&vi->rq[i].dim_lock);
 lock(&vi->rq[i].dim_lock);

*** DEADLOCK ***

 May be due to missing lock nesting notation

3 locks held by ethtool/962:
 #0: ffffffff82dbaab0 (cb_lock){++++}-{3:3}, at: genl_rcv+0x19/0x40
 #1: ffffffff82dad0a8 (rtnl_mutex){+.+.}-{3:3}, at:
				ethnl_default_set_doit+0xbe/0x1e0

stack backtrace:
CPU: 6 PID: 962 Comm: ethtool Not tainted 6.9.0-rc7+ #319
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
	   rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x79/0xb0
 check_deadlock+0x130/0x220
 __lock_acquire+0x861/0x990
 lock_acquire.part.0+0x72/0x1d0
 ? lock_acquire+0xf8/0x130
 __mutex_lock+0x71/0xd50
 virtnet_set_coalesce+0x151/0x190
 __ethnl_set_coalesce.isra.0+0x3f8/0x4d0
 ethnl_set_coalesce+0x34/0x90
 ethnl_default_set_doit+0xdd/0x1e0
 genl_family_rcv_msg_doit+0xdc/0x130
 genl_family_rcv_msg+0x154/0x230
 ? __pfx_ethnl_default_set_doit+0x10/0x10
 genl_rcv_msg+0x4b/0xa0
 ? __pfx_genl_rcv_msg+0x10/0x10
 netlink_rcv_skb+0x5a/0x110
 genl_rcv+0x28/0x40
 netlink_unicast+0x1af/0x280
 netlink_sendmsg+0x20e/0x460
 __sys_sendto+0x1fe/0x210
 ? find_held_lock+0x2b/0x80
 ? do_user_addr_fault+0x3a2/0x8a0
 ? __lock_release+0x5e/0x160
 ? do_user_addr_fault+0x3a2/0x8a0
 ? lock_release+0x72/0x140
 ? do_user_addr_fault+0x3a7/0x8a0
 __x64_sys_sendto+0x29/0x30
 do_syscall_64+0x78/0x180
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Fixes: 4d4ac2ececd3 ("virtio_net: Add a lock for per queue RX coalesce")
Signed-off-by: Heng Qi <hengqi@linux.alibaba.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20240528134116.117426-3-hengqi@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/virtio_net.c | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 774a292a6090..61a57d134544 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -4259,7 +4259,6 @@ static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi,
 	struct virtio_net_ctrl_coal_rx *coal_rx __free(kfree) = NULL;
 	bool rx_ctrl_dim_on = !!ec->use_adaptive_rx_coalesce;
 	struct scatterlist sgs_rx;
-	int ret = 0;
 	int i;
 
 	if (rx_ctrl_dim_on && !virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL))
@@ -4269,27 +4268,27 @@ static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi,
 			       ec->rx_max_coalesced_frames != vi->intr_coal_rx.max_packets))
 		return -EINVAL;
 
-	/* Acquire all queues dim_locks */
-	for (i = 0; i < vi->max_queue_pairs; i++)
-		mutex_lock(&vi->rq[i].dim_lock);
-
 	if (rx_ctrl_dim_on && !vi->rx_dim_enabled) {
 		vi->rx_dim_enabled = true;
-		for (i = 0; i < vi->max_queue_pairs; i++)
+		for (i = 0; i < vi->max_queue_pairs; i++) {
+			mutex_lock(&vi->rq[i].dim_lock);
 			vi->rq[i].dim_enabled = true;
-		goto unlock;
+			mutex_unlock(&vi->rq[i].dim_lock);
+		}
+		return 0;
 	}
 
 	coal_rx = kzalloc(sizeof(*coal_rx), GFP_KERNEL);
-	if (!coal_rx) {
-		ret = -ENOMEM;
-		goto unlock;
-	}
+	if (!coal_rx)
+		return -ENOMEM;
 
 	if (!rx_ctrl_dim_on && vi->rx_dim_enabled) {
 		vi->rx_dim_enabled = false;
-		for (i = 0; i < vi->max_queue_pairs; i++)
+		for (i = 0; i < vi->max_queue_pairs; i++) {
+			mutex_lock(&vi->rq[i].dim_lock);
 			vi->rq[i].dim_enabled = false;
+			mutex_unlock(&vi->rq[i].dim_lock);
+		}
 	}
 
 	/* Since the per-queue coalescing params can be set,
@@ -4302,22 +4301,19 @@ static int virtnet_send_rx_notf_coal_cmds(struct virtnet_info *vi,
 
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
 				  VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
-				  &sgs_rx)) {
-		ret = -EINVAL;
-		goto unlock;
-	}
+				  &sgs_rx))
+		return -EINVAL;
 
 	vi->intr_coal_rx.max_usecs = ec->rx_coalesce_usecs;
 	vi->intr_coal_rx.max_packets = ec->rx_max_coalesced_frames;
 	for (i = 0; i < vi->max_queue_pairs; i++) {
+		mutex_lock(&vi->rq[i].dim_lock);
 		vi->rq[i].intr_coal.max_usecs = ec->rx_coalesce_usecs;
 		vi->rq[i].intr_coal.max_packets = ec->rx_max_coalesced_frames;
-	}
-unlock:
-	for (i = vi->max_queue_pairs - 1; i >= 0; i--)
 		mutex_unlock(&vi->rq[i].dim_lock);
+	}
 
-	return ret;
+	return 0;
 }
 
 static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
-- 
cgit v1.2.3


From 3c34fb0bd4a4237592c5ecb5b2e2531900c55774 Mon Sep 17 00:00:00 2001
From: Lars Kellogg-Stedman <lars@oddbit.com>
Date: Wed, 29 May 2024 17:02:43 -0400
Subject: ax25: Fix refcount imbalance on inbound connections

When releasing a socket in ax25_release(), we call netdev_put() to
decrease the refcount on the associated ax.25 device. However, the
execution path for accepting an incoming connection never calls
netdev_hold(). This imbalance leads to refcount errors, and ultimately
to kernel crashes.

A typical call trace for the above situation will start with one of the
following errors:

    refcount_t: decrement hit 0; leaking memory.
    refcount_t: underflow; use-after-free.

And will then have a trace like:

    Call Trace:
    <TASK>
    ? show_regs+0x64/0x70
    ? __warn+0x83/0x120
    ? refcount_warn_saturate+0xb2/0x100
    ? report_bug+0x158/0x190
    ? prb_read_valid+0x20/0x30
    ? handle_bug+0x3e/0x70
    ? exc_invalid_op+0x1c/0x70
    ? asm_exc_invalid_op+0x1f/0x30
    ? refcount_warn_saturate+0xb2/0x100
    ? refcount_warn_saturate+0xb2/0x100
    ax25_release+0x2ad/0x360
    __sock_release+0x35/0xa0
    sock_close+0x19/0x20
    [...]

On reboot (or any attempt to remove the interface), the kernel gets
stuck in an infinite loop:

    unregister_netdevice: waiting for ax0 to become free. Usage count = 0

This patch corrects these issues by ensuring that we call netdev_hold()
and ax25_dev_hold() for new connections in ax25_accept(). This makes the
logic leading to ax25_accept() match the logic for ax25_bind(): in both
cases we increment the refcount, which is ultimately decremented in
ax25_release().

Fixes: 9fd75b66b8f6 ("ax25: Fix refcount leaks caused by ax25_cb_del()")
Signed-off-by: Lars Kellogg-Stedman <lars@oddbit.com>
Tested-by: Duoming Zhou <duoming@zju.edu.cn>
Tested-by: Dan Cross <crossd@gmail.com>
Tested-by: Chris Maness <christopher.maness@gmail.com>
Reviewed-by: Dan Carpenter <dan.carpenter@linaro.org>
Link: https://lore.kernel.org/r/20240529210242.3346844-2-lars@oddbit.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ax25/af_ax25.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 8077cf2ee448..d6f9fae06a9d 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1378,8 +1378,10 @@ static int ax25_accept(struct socket *sock, struct socket *newsock,
 {
 	struct sk_buff *skb;
 	struct sock *newsk;
+	ax25_dev *ax25_dev;
 	DEFINE_WAIT(wait);
 	struct sock *sk;
+	ax25_cb *ax25;
 	int err = 0;
 
 	if (sock->state != SS_UNCONNECTED)
@@ -1434,6 +1436,10 @@ static int ax25_accept(struct socket *sock, struct socket *newsock,
 	kfree_skb(skb);
 	sk_acceptq_removed(sk);
 	newsock->state = SS_CONNECTED;
+	ax25 = sk_to_ax25(newsk);
+	ax25_dev = ax25->ax25_dev;
+	netdev_hold(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC);
+	ax25_dev_hold(ax25_dev);
 
 out:
 	release_sock(sk);
-- 
cgit v1.2.3


From 166fcf86cd34e15c7f383eda4642d7a212393008 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Thu, 30 May 2024 13:17:33 +0800
Subject: ax25: Replace kfree() in ax25_dev_free() with ax25_dev_put()

The object "ax25_dev" is managed by reference counting. Thus it should
not be directly released by kfree(), replace with ax25_dev_put().

Fixes: d01ffb9eee4a ("ax25: add refcount in ax25_dev to avoid UAF bugs")
Suggested-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Reviewed-by: Dan Carpenter <dan.carpenter@linaro.org>
Link: https://lore.kernel.org/r/20240530051733.11416-1-duoming@zju.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ax25/ax25_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 742d7c68e7e7..9efd6690b344 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -196,7 +196,7 @@ void __exit ax25_dev_free(void)
 	list_for_each_entry_safe(s, n, &ax25_dev_list, list) {
 		netdev_put(s->dev, &s->dev_tracker);
 		list_del(&s->list);
-		kfree(s);
+		ax25_dev_put(s);
 	}
 	spin_unlock_bh(&ax25_dev_lock);
 }
-- 
cgit v1.2.3


From 8105378c0c02309221b63d15cfe8fe92e7fe1434 Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Thu, 30 May 2024 11:27:17 +0800
Subject: net: rps: fix error when CONFIG_RFS_ACCEL is off

John Sperbeck reported that if we turn off CONFIG_RFS_ACCEL, the 'head'
is not defined, which will trigger compile error. So I move the 'head'
out of the CONFIG_RFS_ACCEL scope.

Fixes: 84b6823cd96b ("net: rps: protect last_qtail with rps_input_queue_tail_save() helper")
Reported-by: John Sperbeck <jsperbeck@google.com>
Closes: https://lore.kernel.org/all/20240529203421.2432481-1-jsperbeck@google.com/
Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240530032717.57787-1-kerneljasonxing@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index e1bb6d7856d9..4d4de9008f6f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4516,12 +4516,13 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	    struct rps_dev_flow *rflow, u16 next_cpu)
 {
 	if (next_cpu < nr_cpu_ids) {
+		u32 head;
 #ifdef CONFIG_RFS_ACCEL
 		struct netdev_rx_queue *rxqueue;
 		struct rps_dev_flow_table *flow_table;
 		struct rps_dev_flow *old_rflow;
-		u32 flow_id, head;
 		u16 rxq_index;
+		u32 flow_id;
 		int rc;
 
 		/* Should we steer this flow to a different hardware queue? */
-- 
cgit v1.2.3


From e85e271dec0270982afed84f70dc37703fcc1d52 Mon Sep 17 00:00:00 2001
From: DelphineCCChiu <delphine_cc_chiu@wiwynn.com>
Date: Wed, 29 May 2024 14:58:55 +0800
Subject: net/ncsi: Fix the multi thread manner of NCSI driver

Currently NCSI driver will send several NCSI commands back to back without
waiting the response of previous NCSI command or timeout in some state
when NIC have multi channel. This operation against the single thread
manner defined by NCSI SPEC(section 6.3.2.3 in DSP0222_1.1.1)

According to NCSI SPEC(section 6.2.13.1 in DSP0222_1.1.1), we should probe
one channel at a time by sending NCSI commands (Clear initial state, Get
version ID, Get capabilities...), than repeat this steps until the max
number of channels which we got from NCSI command (Get capabilities) has
been probed.

Fixes: e6f44ed6d04d ("net/ncsi: Package and channel management")
Signed-off-by: DelphineCCChiu <delphine_cc_chiu@wiwynn.com>
Link: https://lore.kernel.org/r/20240529065856.825241-1-delphine_cc_chiu@wiwynn.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ncsi/internal.h    |  2 ++
 net/ncsi/ncsi-manage.c | 73 +++++++++++++++++++++++++-------------------------
 net/ncsi/ncsi-rsp.c    |  4 ++-
 3 files changed, 41 insertions(+), 38 deletions(-)

diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 374412ed780b..ef0f8f73826f 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -325,6 +325,7 @@ struct ncsi_dev_priv {
 	spinlock_t          lock;            /* Protect the NCSI device    */
 	unsigned int        package_probe_id;/* Current ID during probe    */
 	unsigned int        package_num;     /* Number of packages         */
+	unsigned int        channel_probe_id;/* Current cahnnel ID during probe */
 	struct list_head    packages;        /* List of packages           */
 	struct ncsi_channel *hot_channel;    /* Channel was ever active    */
 	struct ncsi_request requests[256];   /* Request table              */
@@ -343,6 +344,7 @@ struct ncsi_dev_priv {
 	bool                multi_package;   /* Enable multiple packages   */
 	bool                mlx_multi_host;  /* Enable multi host Mellanox */
 	u32                 package_whitelist; /* Packages to configure    */
+	unsigned char       channel_count;     /* Num of channels to probe   */
 };
 
 struct ncsi_cmd_arg {
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 745c788f1d1d..5ecf611c8820 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -510,17 +510,19 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
 
 		break;
 	case ncsi_dev_state_suspend_gls:
-		ndp->pending_req_num = np->channel_num;
+		ndp->pending_req_num = 1;
 
 		nca.type = NCSI_PKT_CMD_GLS;
 		nca.package = np->id;
+		nca.channel = ndp->channel_probe_id;
+		ret = ncsi_xmit_cmd(&nca);
+		if (ret)
+			goto error;
+		ndp->channel_probe_id++;
 
-		nd->state = ncsi_dev_state_suspend_dcnt;
-		NCSI_FOR_EACH_CHANNEL(np, nc) {
-			nca.channel = nc->id;
-			ret = ncsi_xmit_cmd(&nca);
-			if (ret)
-				goto error;
+		if (ndp->channel_probe_id == ndp->channel_count) {
+			ndp->channel_probe_id = 0;
+			nd->state = ncsi_dev_state_suspend_dcnt;
 		}
 
 		break;
@@ -1345,7 +1347,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
 {
 	struct ncsi_dev *nd = &ndp->ndev;
 	struct ncsi_package *np;
-	struct ncsi_channel *nc;
 	struct ncsi_cmd_arg nca;
 	unsigned char index;
 	int ret;
@@ -1423,23 +1424,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
 
 		nd->state = ncsi_dev_state_probe_cis;
 		break;
-	case ncsi_dev_state_probe_cis:
-		ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
-
-		/* Clear initial state */
-		nca.type = NCSI_PKT_CMD_CIS;
-		nca.package = ndp->active_package->id;
-		for (index = 0; index < NCSI_RESERVED_CHANNEL; index++) {
-			nca.channel = index;
-			ret = ncsi_xmit_cmd(&nca);
-			if (ret)
-				goto error;
-		}
-
-		nd->state = ncsi_dev_state_probe_gvi;
-		if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY))
-			nd->state = ncsi_dev_state_probe_keep_phy;
-		break;
 	case ncsi_dev_state_probe_keep_phy:
 		ndp->pending_req_num = 1;
 
@@ -1452,14 +1436,17 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
 
 		nd->state = ncsi_dev_state_probe_gvi;
 		break;
+	case ncsi_dev_state_probe_cis:
 	case ncsi_dev_state_probe_gvi:
 	case ncsi_dev_state_probe_gc:
 	case ncsi_dev_state_probe_gls:
 		np = ndp->active_package;
-		ndp->pending_req_num = np->channel_num;
+		ndp->pending_req_num = 1;
 
-		/* Retrieve version, capability or link status */
-		if (nd->state == ncsi_dev_state_probe_gvi)
+		/* Clear initial state Retrieve version, capability or link status */
+		if (nd->state == ncsi_dev_state_probe_cis)
+			nca.type = NCSI_PKT_CMD_CIS;
+		else if (nd->state == ncsi_dev_state_probe_gvi)
 			nca.type = NCSI_PKT_CMD_GVI;
 		else if (nd->state == ncsi_dev_state_probe_gc)
 			nca.type = NCSI_PKT_CMD_GC;
@@ -1467,19 +1454,29 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
 			nca.type = NCSI_PKT_CMD_GLS;
 
 		nca.package = np->id;
-		NCSI_FOR_EACH_CHANNEL(np, nc) {
-			nca.channel = nc->id;
-			ret = ncsi_xmit_cmd(&nca);
-			if (ret)
-				goto error;
-		}
+		nca.channel = ndp->channel_probe_id;
 
-		if (nd->state == ncsi_dev_state_probe_gvi)
+		ret = ncsi_xmit_cmd(&nca);
+		if (ret)
+			goto error;
+
+		if (nd->state == ncsi_dev_state_probe_cis) {
+			nd->state = ncsi_dev_state_probe_gvi;
+			if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) && ndp->channel_probe_id == 0)
+				nd->state = ncsi_dev_state_probe_keep_phy;
+		} else if (nd->state == ncsi_dev_state_probe_gvi) {
 			nd->state = ncsi_dev_state_probe_gc;
-		else if (nd->state == ncsi_dev_state_probe_gc)
+		} else if (nd->state == ncsi_dev_state_probe_gc) {
 			nd->state = ncsi_dev_state_probe_gls;
-		else
+		} else {
+			nd->state = ncsi_dev_state_probe_cis;
+			ndp->channel_probe_id++;
+		}
+
+		if (ndp->channel_probe_id == ndp->channel_count) {
+			ndp->channel_probe_id = 0;
 			nd->state = ncsi_dev_state_probe_dp;
+		}
 		break;
 	case ncsi_dev_state_probe_dp:
 		ndp->pending_req_num = 1;
@@ -1780,6 +1777,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 		ndp->requests[i].ndp = ndp;
 		timer_setup(&ndp->requests[i].timer, ncsi_request_timeout, 0);
 	}
+	ndp->channel_count = NCSI_RESERVED_CHANNEL;
 
 	spin_lock_irqsave(&ncsi_dev_lock, flags);
 	list_add_tail_rcu(&ndp->node, &ncsi_dev_list);
@@ -1813,6 +1811,7 @@ int ncsi_start_dev(struct ncsi_dev *nd)
 
 	if (!(ndp->flags & NCSI_DEV_PROBED)) {
 		ndp->package_probe_id = 0;
+		ndp->channel_probe_id = 0;
 		nd->state = ncsi_dev_state_probe;
 		schedule_work(&ndp->work);
 		return 0;
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index bee290d0f48b..e28be33bdf2c 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -795,12 +795,13 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
 	struct ncsi_rsp_gc_pkt *rsp;
 	struct ncsi_dev_priv *ndp = nr->ndp;
 	struct ncsi_channel *nc;
+	struct ncsi_package *np;
 	size_t size;
 
 	/* Find the channel */
 	rsp = (struct ncsi_rsp_gc_pkt *)skb_network_header(nr->rsp);
 	ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel,
-				      NULL, &nc);
+				      &np, &nc);
 	if (!nc)
 		return -ENODEV;
 
@@ -835,6 +836,7 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
 	 */
 	nc->vlan_filter.bitmap = U64_MAX;
 	nc->vlan_filter.n_vids = rsp->vlan_cnt;
+	np->ndp->channel_count = rsp->channel_cnt;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 33700a0c9b562700c28d31360a5f04508f459a45 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <0x7f454c46@gmail.com>
Date: Wed, 29 May 2024 18:29:32 +0100
Subject: net/tcp: Don't consider TCP_CLOSE in TCP_AO_ESTABLISHED

TCP_CLOSE may or may not have current/rnext keys and should not be
considered "established". The fast-path for TCP_CLOSE is
SKB_DROP_REASON_TCP_CLOSE. This is what tcp_rcv_state_process() does
anyways. Add an early drop path to not spend any time verifying
segment signatures for sockets in TCP_CLOSE state.

Cc: stable@vger.kernel.org # v6.7
Fixes: 0a3a809089eb ("net/tcp: Verify inbound TCP-AO signed segments")
Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com>
Link: https://lore.kernel.org/r/20240529-tcp_ao-sk_state-v1-1-d69b5d323c52@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tcp_ao.h |  7 ++++---
 net/ipv4/tcp_ao.c    | 13 +++++++++----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
index 471e177362b4..5d8e9ed2c005 100644
--- a/include/net/tcp_ao.h
+++ b/include/net/tcp_ao.h
@@ -86,7 +86,8 @@ static inline int tcp_ao_sizeof_key(const struct tcp_ao_key *key)
 struct tcp_ao_info {
 	/* List of tcp_ao_key's */
 	struct hlist_head	head;
-	/* current_key and rnext_key aren't maintained on listen sockets.
+	/* current_key and rnext_key are maintained on sockets
+	 * in TCP_AO_ESTABLISHED states.
 	 * Their purpose is to cache keys on established connections,
 	 * saving needless lookups. Never dereference any of them from
 	 * listen sockets.
@@ -201,9 +202,9 @@ struct tcp6_ao_context {
 };
 
 struct tcp_sigpool;
+/* Established states are fast-path and there always is current_key/rnext_key */
 #define TCP_AO_ESTABLISHED (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | \
-			    TCPF_CLOSE | TCPF_CLOSE_WAIT | \
-			    TCPF_LAST_ACK | TCPF_CLOSING)
+			    TCPF_CLOSE_WAIT | TCPF_LAST_ACK | TCPF_CLOSING)
 
 int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb,
 			struct tcp_ao_key *key, struct tcphdr *th,
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index 781b67a52571..37c42b63ff99 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -933,6 +933,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
 	struct tcp_ao_key *key;
 	__be32 sisn, disn;
 	u8 *traffic_key;
+	int state;
 	u32 sne = 0;
 
 	info = rcu_dereference(tcp_sk(sk)->ao_info);
@@ -948,8 +949,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
 		disn = 0;
 	}
 
+	state = READ_ONCE(sk->sk_state);
 	/* Fast-path */
-	if (likely((1 << sk->sk_state) & TCP_AO_ESTABLISHED)) {
+	if (likely((1 << state) & TCP_AO_ESTABLISHED)) {
 		enum skb_drop_reason err;
 		struct tcp_ao_key *current_key;
 
@@ -988,6 +990,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
 		return SKB_NOT_DROPPED_YET;
 	}
 
+	if (unlikely(state == TCP_CLOSE))
+		return SKB_DROP_REASON_TCP_CLOSE;
+
 	/* Lookup key based on peer address and keyid.
 	 * current_key and rnext_key must not be used on tcp listen
 	 * sockets as otherwise:
@@ -1001,7 +1006,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
 	if (th->syn && !th->ack)
 		goto verify_hash;
 
-	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) {
+	if ((1 << state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) {
 		/* Make the initial syn the likely case here */
 		if (unlikely(req)) {
 			sne = tcp_ao_compute_sne(0, tcp_rsk(req)->rcv_isn,
@@ -1018,14 +1023,14 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb,
 			/* no way to figure out initial sisn/disn - drop */
 			return SKB_DROP_REASON_TCP_FLAGS;
 		}
-	} else if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+	} else if ((1 << state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 		disn = info->lisn;
 		if (th->syn || th->rst)
 			sisn = th->seq;
 		else
 			sisn = info->risn;
 	} else {
-		WARN_ONCE(1, "TCP-AO: Unexpected sk_state %d", sk->sk_state);
+		WARN_ONCE(1, "TCP-AO: Unexpected sk_state %d", state);
 		return SKB_DROP_REASON_TCP_AOFAILURE;
 	}
 verify_hash:
-- 
cgit v1.2.3


From c3f38fa61af77b49866b006939479069cd451173 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 2 Jun 2024 15:44:56 -0700
Subject: Linux 6.10-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index f975b6396328..7f921ae547f1 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 10
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Baby Opossum Posse
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3


From 6c3ca6654a74dd396bc477839ba8d9792eced441 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Mon, 3 Jun 2024 15:45:53 +0800
Subject: LoongArch: Remove CONFIG_ACPI_TABLE_UPGRADE in platform_init()

Both acpi_table_upgrade() and acpi_boot_table_init() are defined as
empty functions under !CONFIG_ACPI_TABLE_UPGRADE and !CONFIG_ACPI in
include/linux/acpi.h, there are no implicit declaration errors with
various configs.

  #ifdef CONFIG_ACPI_TABLE_UPGRADE
  void acpi_table_upgrade(void);
  #else
  static inline void acpi_table_upgrade(void) { }
  #endif

  #ifdef	CONFIG_ACPI
  ...
  void acpi_boot_table_init (void);
  ...
  #else	/* !CONFIG_ACPI */
  ...
  static inline void acpi_boot_table_init(void)
  {
  }
  ...
  #endif	/* !CONFIG_ACPI */

As Huacai suggested, CONFIG_ACPI_TABLE_UPGRADE is ugly and not necessary
here, just remove it. At the same time, just keep CONFIG_ACPI to prevent
potential build errors in future, and give a signal to indicate the code
is ACPI-specific. For the same reason, we also put acpi_table_upgrade()
under CONFIG_ACPI.

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/kernel/setup.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 60e0fe97f61a..89ad756aeeed 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -351,10 +351,8 @@ void __init platform_init(void)
 	arch_reserve_vmcore();
 	arch_reserve_crashkernel();
 
-#ifdef CONFIG_ACPI_TABLE_UPGRADE
-	acpi_table_upgrade();
-#endif
 #ifdef CONFIG_ACPI
+	acpi_table_upgrade();
 	acpi_gbl_use_default_register_widths = false;
 	acpi_boot_table_init();
 #endif
-- 
cgit v1.2.3


From b56f67a6c748bb009f313f91651c8020d2338d63 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Mon, 3 Jun 2024 15:45:53 +0800
Subject: LoongArch: Fix built-in DTB detection

fdt_check_header(__dtb_start) will always success because kernel
provides a dummy dtb, and by coincidence __dtb_start clashed with
entry of this dummy dtb. The consequence is fdt passed from firmware
will never be taken.

Fix by trying to utilise __dtb_start only when CONFIG_BUILTIN_DTB is
enabled.

Cc: stable@vger.kernel.org
Fixes: 7b937cc243e5 ("of: Create of_root if no dtb provided by firmware")
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 89ad756aeeed..3d048f1be143 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -282,7 +282,7 @@ static void __init fdt_setup(void)
 		return;
 
 	/* Prefer to use built-in dtb, checking its legality first. */
-	if (!fdt_check_header(__dtb_start))
+	if (IS_ENABLED(CONFIG_BUILTIN_DTB) && !fdt_check_header(__dtb_start))
 		fdt_pointer = __dtb_start;
 	else
 		fdt_pointer = efi_fdt_pointer(); /* Fallback to firmware dtb */
-- 
cgit v1.2.3


From 3de9c42d02a79a5e09bbee7a4421ddc00cfd5c6d Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Mon, 3 Jun 2024 15:45:53 +0800
Subject: LoongArch: Add all CPUs enabled by fdt to NUMA node 0

NUMA enabled kernel on FDT based machine fails to boot because CPUs
are all in NUMA_NO_NODE and mm subsystem won't accept that.

Fix by adding them to default NUMA node at FDT parsing phase and move
numa_add_cpu(0) to a later point.

Cc: stable@vger.kernel.org
Fixes: 88d4d957edc7 ("LoongArch: Add FDT booting support from efi system table")
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/numa.h | 1 +
 arch/loongarch/kernel/smp.c       | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/loongarch/include/asm/numa.h b/arch/loongarch/include/asm/numa.h
index 27f319b49862..b5f9de9f102e 100644
--- a/arch/loongarch/include/asm/numa.h
+++ b/arch/loongarch/include/asm/numa.h
@@ -56,6 +56,7 @@ extern int early_cpu_to_node(int cpu);
 static inline void early_numa_add_cpu(int cpuid, s16 node)	{ }
 static inline void numa_add_cpu(unsigned int cpu)		{ }
 static inline void numa_remove_cpu(unsigned int cpu)		{ }
+static inline void set_cpuid_to_node(int cpuid, s16 node)	{ }
 
 static inline int early_cpu_to_node(int cpu)
 {
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 0dfe2388ef41..1436d2465939 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -273,7 +273,6 @@ static void __init fdt_smp_setup(void)
 
 		if (cpuid == loongson_sysconf.boot_cpu_id) {
 			cpu = 0;
-			numa_add_cpu(cpu);
 		} else {
 			cpu = cpumask_next_zero(-1, cpu_present_mask);
 		}
@@ -283,6 +282,9 @@ static void __init fdt_smp_setup(void)
 		set_cpu_present(cpu, true);
 		__cpu_number_map[cpuid] = cpu;
 		__cpu_logical_map[cpu] = cpuid;
+
+		early_numa_add_cpu(cpu, 0);
+		set_cpuid_to_node(cpuid, 0);
 	}
 
 	loongson_sysconf.nr_cpus = num_processors;
@@ -468,6 +470,7 @@ void smp_prepare_boot_cpu(void)
 	set_cpu_possible(0, true);
 	set_cpu_online(0, true);
 	set_my_cpu_offset(per_cpu_offset(0));
+	numa_add_cpu(0);
 
 	rr_node = first_node(node_online_map);
 	for_each_possible_cpu(cpu) {
-- 
cgit v1.2.3


From beb2800074c15362cf9f6c7301120910046d6556 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Mon, 3 Jun 2024 15:45:53 +0800
Subject: LoongArch: Fix entry point in kernel image header

Currently kernel entry in head.S is in DMW address range, firmware is
instructed to jump to this address after loading the kernel image.

However kernel should not make any assumption on firmware's DMW
setting, thus the entry point should be a physical address falls into
direct translation region.

Fix by converting entry address to physical and amend entry calculation
logic in libstub accordingly.

BTW, use ABSOLUTE() to calculate variables to make Clang/LLVM happy.

Cc: stable@vger.kernel.org
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/kernel/head.S             |  2 +-
 arch/loongarch/kernel/vmlinux.lds.S      | 10 ++++++----
 drivers/firmware/efi/libstub/loongarch.c |  2 +-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
index c4f7de2e2805..4677ea8fa8e9 100644
--- a/arch/loongarch/kernel/head.S
+++ b/arch/loongarch/kernel/head.S
@@ -22,7 +22,7 @@
 _head:
 	.word	MZ_MAGIC		/* "MZ", MS-DOS header */
 	.org	0x8
-	.dword	kernel_entry		/* Kernel entry point */
+	.dword	_kernel_entry		/* Kernel entry point (physical address) */
 	.dword	_kernel_asize		/* Kernel image effective size */
 	.quad	PHYS_LINK_KADDR		/* Kernel image load offset from start of RAM */
 	.org	0x38			/* 0x20 ~ 0x37 reserved */
diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S
index e8e97dbf9ca4..3c7595342730 100644
--- a/arch/loongarch/kernel/vmlinux.lds.S
+++ b/arch/loongarch/kernel/vmlinux.lds.S
@@ -6,6 +6,7 @@
 
 #define PAGE_SIZE _PAGE_SIZE
 #define RO_EXCEPTION_TABLE_ALIGN	4
+#define PHYSADDR_MASK			0xffffffffffff /* 48-bit */
 
 /*
  * Put .bss..swapper_pg_dir as the first thing in .bss. This will
@@ -142,10 +143,11 @@ SECTIONS
 
 #ifdef CONFIG_EFI_STUB
 	/* header symbols */
-	_kernel_asize = _end - _text;
-	_kernel_fsize = _edata - _text;
-	_kernel_vsize = _end - __initdata_begin;
-	_kernel_rsize = _edata - __initdata_begin;
+	_kernel_entry = ABSOLUTE(kernel_entry & PHYSADDR_MASK);
+	_kernel_asize = ABSOLUTE(_end - _text);
+	_kernel_fsize = ABSOLUTE(_edata - _text);
+	_kernel_vsize = ABSOLUTE(_end - __initdata_begin);
+	_kernel_rsize = ABSOLUTE(_edata - __initdata_begin);
 #endif
 
 	.gptab.sdata : {
diff --git a/drivers/firmware/efi/libstub/loongarch.c b/drivers/firmware/efi/libstub/loongarch.c
index 684c9354637c..d0ef93551c44 100644
--- a/drivers/firmware/efi/libstub/loongarch.c
+++ b/drivers/firmware/efi/libstub/loongarch.c
@@ -41,7 +41,7 @@ static efi_status_t exit_boot_func(struct efi_boot_memmap *map, void *priv)
 unsigned long __weak kernel_entry_address(unsigned long kernel_addr,
 		efi_loaded_image_t *image)
 {
-	return *(unsigned long *)(kernel_addr + 8) - VMLINUX_LOAD_ADDRESS + kernel_addr;
+	return *(unsigned long *)(kernel_addr + 8) - PHYSADDR(VMLINUX_LOAD_ADDRESS) + kernel_addr;
 }
 
 efi_status_t efi_boot_kernel(void *handle, efi_loaded_image_t *image,
-- 
cgit v1.2.3


From 1098efd299ffe9c8af818425338c7f6c4f930a98 Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Mon, 3 Jun 2024 15:45:53 +0800
Subject: LoongArch: Override higher address bits in JUMP_VIRT_ADDR

In JUMP_VIRT_ADDR we are performing an or calculation on address value
directly from pcaddi.

This will only work if we are currently running from direct 1:1 mapping
addresses or firmware's DMW is configured exactly same as kernel. Still,
we should not rely on such assumption.

Fix by overriding higher bits in address comes from pcaddi, so we can
get rid of or operator.

Cc: stable@vger.kernel.org
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/stackframe.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
index 45b507a7b06f..d9eafd3ee3d1 100644
--- a/arch/loongarch/include/asm/stackframe.h
+++ b/arch/loongarch/include/asm/stackframe.h
@@ -42,7 +42,7 @@
 	.macro JUMP_VIRT_ADDR temp1 temp2
 	li.d	\temp1, CACHE_BASE
 	pcaddi	\temp2, 0
-	or	\temp1, \temp1, \temp2
+	bstrins.d  \temp1, \temp2, (DMW_PABITS - 1), 0
 	jirl	zero, \temp1, 0xc
 	.endm
 
-- 
cgit v1.2.3


From eb36e520f4f1b690fd776f15cbac452f82ff7bfa Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Mon, 3 Jun 2024 15:45:53 +0800
Subject: LoongArch: Fix GMAC's phy-mode definitions in dts

The GMAC of Loongson chips cannot insert the correct 1.5-2ns delay. So
we need the PHY to insert internal delays for both transmit and receive
data lines from/to the PHY device. Fix this by changing the "phy-mode"
from "rgmii" to "rgmii-id" in dts.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/boot/dts/loongson-2k0500-ref.dts | 4 ++--
 arch/loongarch/boot/dts/loongson-2k1000-ref.dts | 4 ++--
 arch/loongarch/boot/dts/loongson-2k2000-ref.dts | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
index 8aefb0c12672..a34734a6c3ce 100644
--- a/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k0500-ref.dts
@@ -44,14 +44,14 @@
 &gmac0 {
 	status = "okay";
 
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	bus_id = <0x0>;
 };
 
 &gmac1 {
 	status = "okay";
 
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	bus_id = <0x1>;
 };
 
diff --git a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
index 8463fe035386..23cf26cc3e5f 100644
--- a/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k1000-ref.dts
@@ -43,7 +43,7 @@
 &gmac0 {
 	status = "okay";
 
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-handle = <&phy0>;
 	mdio {
 		compatible = "snps,dwmac-mdio";
@@ -58,7 +58,7 @@
 &gmac1 {
 	status = "okay";
 
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-handle = <&phy1>;
 	mdio {
 		compatible = "snps,dwmac-mdio";
diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
index 74b99bd234cc..ea9e6985d0e9 100644
--- a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
+++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts
@@ -92,7 +92,7 @@
 &gmac2 {
 	status = "okay";
 
-	phy-mode = "rgmii";
+	phy-mode = "rgmii-id";
 	phy-handle = <&phy2>;
 	mdio {
 		compatible = "snps,dwmac-mdio";
-- 
cgit v1.2.3


From 38a38f5a36da9820680d413972cb733349400532 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 25 May 2024 21:38:53 +0200
Subject: Input: silead - Always support 10 fingers

When support for Silead touchscreens was orginal added some touchscreens
with older firmware versions only supported 5 fingers and this was made
the default requiring the setting of a "silead,max-fingers=10" uint32
device-property for all touchscreen models which do support 10 fingers.

There are very few models with the old 5 finger fw, so in practice the
setting of the "silead,max-fingers=10" is boilerplate which needs to
be copy and pasted to every touchscreen config.

Reporting that 10 fingers are supported on devices which only support
5 fingers doesn't cause any problems for userspace in practice, since
at max 4 finger gestures are supported anyways. Drop the max_fingers
configuration and simply always assume 10 fingers.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Link: https://lore.kernel.org/r/20240525193854.39130-2-hdegoede@redhat.com
---
 drivers/input/touchscreen/silead.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c
index bbd366dcb69a..6a42b27c4599 100644
--- a/drivers/input/touchscreen/silead.c
+++ b/drivers/input/touchscreen/silead.c
@@ -71,7 +71,6 @@ struct silead_ts_data {
 	struct regulator_bulk_data regulators[2];
 	char fw_name[64];
 	struct touchscreen_properties prop;
-	u32 max_fingers;
 	u32 chip_id;
 	struct input_mt_pos pos[SILEAD_MAX_FINGERS];
 	int slots[SILEAD_MAX_FINGERS];
@@ -136,7 +135,7 @@ static int silead_ts_request_input_dev(struct silead_ts_data *data)
 	touchscreen_parse_properties(data->input, true, &data->prop);
 	silead_apply_efi_fw_min_max(data);
 
-	input_mt_init_slots(data->input, data->max_fingers,
+	input_mt_init_slots(data->input, SILEAD_MAX_FINGERS,
 			    INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED |
 			    INPUT_MT_TRACK);
 
@@ -256,10 +255,10 @@ static void silead_ts_read_data(struct i2c_client *client)
 		return;
 	}
 
-	if (buf[0] > data->max_fingers) {
+	if (buf[0] > SILEAD_MAX_FINGERS) {
 		dev_warn(dev, "More touches reported then supported %d > %d\n",
-			 buf[0], data->max_fingers);
-		buf[0] = data->max_fingers;
+			 buf[0], SILEAD_MAX_FINGERS);
+		buf[0] = SILEAD_MAX_FINGERS;
 	}
 
 	if (silead_ts_handle_pen_data(data, buf))
@@ -315,7 +314,6 @@ sync:
 
 static int silead_ts_init(struct i2c_client *client)
 {
-	struct silead_ts_data *data = i2c_get_clientdata(client);
 	int error;
 
 	error = i2c_smbus_write_byte_data(client, SILEAD_REG_RESET,
@@ -325,7 +323,7 @@ static int silead_ts_init(struct i2c_client *client)
 	usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX);
 
 	error = i2c_smbus_write_byte_data(client, SILEAD_REG_TOUCH_NR,
-					data->max_fingers);
+					  SILEAD_MAX_FINGERS);
 	if (error)
 		goto i2c_write_err;
 	usleep_range(SILEAD_CMD_SLEEP_MIN, SILEAD_CMD_SLEEP_MAX);
@@ -591,13 +589,6 @@ static void silead_ts_read_props(struct i2c_client *client)
 	const char *str;
 	int error;
 
-	error = device_property_read_u32(dev, "silead,max-fingers",
-					 &data->max_fingers);
-	if (error) {
-		dev_dbg(dev, "Max fingers read error %d\n", error);
-		data->max_fingers = 5; /* Most devices handle up-to 5 fingers */
-	}
-
 	error = device_property_read_string(dev, "firmware-name", &str);
 	if (!error)
 		snprintf(data->fw_name, sizeof(data->fw_name),
-- 
cgit v1.2.3


From 84b26f509c1b9c8b3c3c63d75912701f151fd148 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 25 May 2024 21:38:54 +0200
Subject: platform/x86: touchscreen_dmi: Drop "silead,max-fingers" property

The silead touchscreen driver now defaults to 10 fingers, so it is no
longer necessary to have a "silead,max-fingers=10" property for each
silead touchscreen model. Drop this property from all the configs.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20240525193854.39130-3-hdegoede@redhat.com
---
 drivers/platform/x86/touchscreen_dmi.c | 57 ----------------------------------
 1 file changed, 57 deletions(-)

diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 2d9ca2292ea1..3f626584f2c3 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -34,7 +34,6 @@ static const struct property_entry archos_101_cesium_educ_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1280),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-archos-101-cesium-educ.fw"),
 	{ }
@@ -49,7 +48,6 @@ static const struct property_entry bush_bush_windows_tablet_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1850),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1280),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-bush-bush-windows-tablet.fw"),
 	{ }
@@ -79,7 +77,6 @@ static const struct property_entry chuwi_hi8_air_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1148),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3676-chuwi-hi8-air.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	{ }
 };
 
@@ -95,7 +92,6 @@ static const struct property_entry chuwi_hi8_pro_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1148),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3680-chuwi-hi8-pro.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -123,7 +119,6 @@ static const struct property_entry chuwi_hi10_air_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-fuzz-x", 5),
 	PROPERTY_ENTRY_U32("touchscreen-fuzz-y", 4),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-chuwi-hi10-air.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -139,7 +134,6 @@ static const struct property_entry chuwi_hi10_plus_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1908),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1270),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-chuwi-hi10plus.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	PROPERTY_ENTRY_BOOL("silead,pen-supported"),
 	PROPERTY_ENTRY_U32("silead,pen-resolution-x", 8),
@@ -171,7 +165,6 @@ static const struct property_entry chuwi_hi10_pro_props[] = {
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-chuwi-hi10-pro.fw"),
 	PROPERTY_ENTRY_U32_ARRAY("silead,efi-fw-min-max", chuwi_hi10_pro_efi_min_max),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	PROPERTY_ENTRY_BOOL("silead,pen-supported"),
 	PROPERTY_ENTRY_U32("silead,pen-resolution-x", 8),
@@ -201,7 +194,6 @@ static const struct property_entry chuwi_hibook_props[] = {
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-chuwi-hibook.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -227,7 +219,6 @@ static const struct property_entry chuwi_vi8_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3676-chuwi-vi8.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -255,7 +246,6 @@ static const struct property_entry chuwi_vi10_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1858),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1280),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3680-chuwi-vi10.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -271,7 +261,6 @@ static const struct property_entry chuwi_surbook_mini_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 2040),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1524),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-chuwi-surbook-mini.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	{ }
 };
@@ -289,7 +278,6 @@ static const struct property_entry connect_tablet9_props[] = {
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-connect-tablet9.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	{ }
 };
 
@@ -306,7 +294,6 @@ static const struct property_entry csl_panther_tab_hd_props[] = {
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-csl-panther-tab-hd.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	{ }
 };
 
@@ -322,7 +309,6 @@ static const struct property_entry cube_iwork8_air_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 896),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3670-cube-iwork8-air.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	{ }
 };
 
@@ -346,7 +332,6 @@ static const struct property_entry cube_knote_i1101_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1961),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1513),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-cube-knote-i1101.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -360,7 +345,6 @@ static const struct property_entry dexp_ursus_7w_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 890),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 630),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1686-dexp-ursus-7w.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -376,7 +360,6 @@ static const struct property_entry dexp_ursus_kx210i_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1720),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1137),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-dexp-ursus-kx210i.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -391,7 +374,6 @@ static const struct property_entry digma_citi_e200_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1500),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1686-digma_citi_e200.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -450,7 +432,6 @@ static const struct property_entry irbis_tw90_props[] = {
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3680-irbis_tw90.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -466,7 +447,6 @@ static const struct property_entry irbis_tw118_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1960),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1510),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-irbis-tw118.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	{ }
 };
 
@@ -483,7 +463,6 @@ static const struct property_entry itworks_tw891_props[] = {
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3670-itworks-tw891.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	{ }
 };
 
@@ -496,7 +475,6 @@ static const struct property_entry jumper_ezpad_6_pro_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1980),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1500),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-jumper-ezpad-6-pro.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -511,7 +489,6 @@ static const struct property_entry jumper_ezpad_6_pro_b_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1500),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-jumper-ezpad-6-pro-b.fw"),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -527,7 +504,6 @@ static const struct property_entry jumper_ezpad_6_m4_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1950),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1525),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-jumper-ezpad-6-m4.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -544,7 +520,6 @@ static const struct property_entry jumper_ezpad_7_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1526),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3680-jumper-ezpad-7.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,stuck-controller-bug"),
 	{ }
 };
@@ -561,7 +536,6 @@ static const struct property_entry jumper_ezpad_mini3_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1138),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3676-jumper-ezpad-mini3.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	{ }
 };
 
@@ -578,7 +552,6 @@ static const struct property_entry mpman_converter9_props[] = {
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-mpman-converter9.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	{ }
 };
 
@@ -594,7 +567,6 @@ static const struct property_entry mpman_mpwin895cl_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1150),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3680-mpman-mpwin895cl.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -611,7 +583,6 @@ static const struct property_entry myria_my8307_props[] = {
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-myria-my8307.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -628,7 +599,6 @@ static const struct property_entry onda_obook_20_plus_props[] = {
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3676-onda-obook-20-plus.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -645,7 +615,6 @@ static const struct property_entry onda_v80_plus_v3_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3676-onda-v80-plus-v3.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -669,7 +638,6 @@ static const struct property_entry onda_v820w_32g_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-onda-v820w-32g.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -687,7 +655,6 @@ static const struct property_entry onda_v891_v5_props[] = {
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name",
 			      "gsl3676-onda-v891-v5.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -703,7 +670,6 @@ static const struct property_entry onda_v891w_v1_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1676),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1130),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3680-onda-v891w-v1.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -720,7 +686,6 @@ static const struct property_entry onda_v891w_v3_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1135),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3676-onda-v891w-v3.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -759,7 +724,6 @@ static const struct property_entry pipo_w11_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1984),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1532),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-pipo-w11.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -775,7 +739,6 @@ static const struct property_entry positivo_c4128b_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1915),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1269),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-positivo-c4128b.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	{ }
 };
 
@@ -791,7 +754,6 @@ static const struct property_entry pov_mobii_wintab_p800w_v20_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1146),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3680-pov-mobii-wintab-p800w-v20.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -808,7 +770,6 @@ static const struct property_entry pov_mobii_wintab_p800w_v21_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1148),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-pov-mobii-wintab-p800w.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -825,7 +786,6 @@ static const struct property_entry pov_mobii_wintab_p1006w_v10_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1520),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-pov-mobii-wintab-p1006w-v10.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -842,7 +802,6 @@ static const struct property_entry predia_basic_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1144),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3680-predia-basic.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -859,7 +818,6 @@ static const struct property_entry rca_cambio_w101_v2_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 874),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-rca-cambio-w101-v2.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	{ }
 };
 
@@ -874,7 +832,6 @@ static const struct property_entry rwc_nanote_p8_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-rwc-nanote-p8.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	{ }
 };
 
@@ -890,7 +847,6 @@ static const struct property_entry schneider_sct101ctm_props[] = {
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-schneider-sct101ctm.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -906,7 +862,6 @@ static const struct property_entry globalspace_solt_ivw116_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1723),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1077),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-globalspace-solt-ivw116.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -923,7 +878,6 @@ static const struct property_entry techbite_arc_11_6_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1270),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-techbite-arc-11-6.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	{ }
 };
 
@@ -939,7 +893,6 @@ static const struct property_entry teclast_tbook11_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1264),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-teclast-tbook11.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -965,7 +918,6 @@ static const struct property_entry teclast_x16_plus_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1264),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3692-teclast-x16-plus.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -988,7 +940,6 @@ static const struct property_entry teclast_x3_plus_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1980),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1500),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-teclast-x3-plus.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -1004,7 +955,6 @@ static const struct property_entry teclast_x98plus2_props[] = {
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-x"),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1686-teclast_x98plus2.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	{ }
 };
 
@@ -1018,7 +968,6 @@ static const struct property_entry trekstor_primebook_c11_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1530),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-trekstor-primebook-c11.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -1032,7 +981,6 @@ static const struct property_entry trekstor_primebook_c13_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 2624),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1920),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-trekstor-primebook-c13.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -1046,7 +994,6 @@ static const struct property_entry trekstor_primetab_t13b_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 2500),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1900),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-trekstor-primetab-t13b.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
 	{ }
@@ -1074,7 +1021,6 @@ static const struct property_entry trekstor_surftab_twin_10_1_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1280),
 	PROPERTY_ENTRY_U32("touchscreen-inverted-y", 1),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3670-surftab-twin-10-1-st10432-8.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -1090,7 +1036,6 @@ static const struct property_entry trekstor_surftab_wintron70_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 884),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 632),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1686-surftab-wintron70-st70416-6.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -1107,7 +1052,6 @@ static const struct property_entry viglen_connect_10_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-fuzz-y", 6),
 	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl3680-viglen-connect-10.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
@@ -1121,7 +1065,6 @@ static const struct property_entry vinga_twizzle_j116_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-size-x", 1920),
 	PROPERTY_ENTRY_U32("touchscreen-size-y", 1280),
 	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-vinga-twizzle_j116.fw"),
-	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
 	PROPERTY_ENTRY_BOOL("silead,home-button"),
 	{ }
 };
-- 
cgit v1.2.3


From 55624db051c8bdd39acbb1f35774c3f7b97c07b8 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Sun, 2 Jun 2024 11:58:00 +0300
Subject: platform/x86: touchscreen_dmi: Use 2-argument strscpy()

Use 2-argument strscpy(), which is not only shorter but also provides
an additional check that destination buffer is an array.

Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20240602090244.1666360-8-andy.shevchenko@gmail.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/touchscreen_dmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 3f626584f2c3..f74af0a689f2 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -1850,7 +1850,7 @@ static int __init ts_parse_props(char *str)
 	u32 u32val;
 	int i, ret;
 
-	strscpy(orig_str, str, sizeof(orig_str));
+	strscpy(orig_str, str);
 
 	/*
 	 * str is part of the static_command_line from init/main.c and poking
-- 
cgit v1.2.3


From 078fc56f5c1787a9272373742f817b320c046d1a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 May 2024 13:59:33 +0200
Subject: platform/x86: yt2-1380: add CONFIG_EXTCON dependency

This driver uses the extcon subsystem and fails to build
when it cannot call into that subsystem:

x86_64-linux-ld: vmlinux.o: in function `yt2_1380_fc_worker':
lenovo-yoga-tab2-pro-1380-fastcharger.c:(.text+0xa9d819): undefined reference to `extcon_get_state'
x86_64-linux-ld: lenovo-yoga-tab2-pro-1380-fastcharger.c:(.text+0xa9d853): undefined reference to `extcon_get_state'
x86_64-linux-ld: vmlinux.o: in function `yt2_1380_fc_serdev_probe':
lenovo-yoga-tab2-pro-1380-fastcharger.c:(.text+0xa9da22): undefined reference to `extcon_get_extcon_dev'
x86_64-linux-ld: lenovo-yoga-tab2-pro-1380-fastcharger.c:(.text+0xa9dc0c): undefined reference to `devm_extcon_register_notifier_all'

Add a Kconfig dependency to make it it always builds correctly.

Fixes: b2ed33e8d486 ("platform/x86: Add lenovo-yoga-tab2-pro-1380-fastcharger driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20240528115940.3169455-1-arnd@kernel.org
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 1953317541ea..665fa9524986 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -136,6 +136,7 @@ config YOGABOOK
 config YT2_1380
 	tristate "Lenovo Yoga Tablet 2 1380 fast charge driver"
 	depends on SERIAL_DEV_BUS
+	depends on EXTCON
 	depends on ACPI
 	help
 	  Say Y here to enable support for the custom fast charging protocol
-- 
cgit v1.2.3


From 1981b296f858010eae409548fd297659b2cc570e Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Tue, 28 May 2024 22:49:02 +0200
Subject: platform/x86: dell-smbios: Fix wrong token data in sysfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When reading token data from sysfs on my Inspiron 3505, the token
locations and values are wrong. This happens because match_attribute()
blindly assumes that all entries in da_tokens have an associated
entry in token_attrs.

This however is not true as soon as da_tokens[] contains zeroed
token entries. Those entries are being skipped when initialising
token_attrs, breaking the core assumption of match_attribute().

Fix this by defining an extra struct for each pair of token attributes
and use container_of() to retrieve token information.

Tested on a Dell Inspiron 3050.

Fixes: 33b9ca1e53b4 ("platform/x86: dell-smbios: Add a sysfs interface for SMBIOS tokens")
Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20240528204903.445546-1-W_Armin@gmx.de
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/dell/dell-smbios-base.c | 92 +++++++++++-----------------
 1 file changed, 36 insertions(+), 56 deletions(-)

diff --git a/drivers/platform/x86/dell/dell-smbios-base.c b/drivers/platform/x86/dell/dell-smbios-base.c
index e61bfaf8b5c4..86b95206cb1b 100644
--- a/drivers/platform/x86/dell/dell-smbios-base.c
+++ b/drivers/platform/x86/dell/dell-smbios-base.c
@@ -11,6 +11,7 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/container_of.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/capability.h>
@@ -25,11 +26,16 @@ static u32 da_supported_commands;
 static int da_num_tokens;
 static struct platform_device *platform_device;
 static struct calling_interface_token *da_tokens;
-static struct device_attribute *token_location_attrs;
-static struct device_attribute *token_value_attrs;
+static struct token_sysfs_data *token_entries;
 static struct attribute **token_attrs;
 static DEFINE_MUTEX(smbios_mutex);
 
+struct token_sysfs_data {
+	struct device_attribute location_attr;
+	struct device_attribute value_attr;
+	struct calling_interface_token *token;
+};
+
 struct smbios_device {
 	struct list_head list;
 	struct device *device;
@@ -416,47 +422,26 @@ static void __init find_tokens(const struct dmi_header *dm, void *dummy)
 	}
 }
 
-static int match_attribute(struct device *dev,
-			   struct device_attribute *attr)
-{
-	int i;
-
-	for (i = 0; i < da_num_tokens * 2; i++) {
-		if (!token_attrs[i])
-			continue;
-		if (strcmp(token_attrs[i]->name, attr->attr.name) == 0)
-			return i/2;
-	}
-	dev_dbg(dev, "couldn't match: %s\n", attr->attr.name);
-	return -EINVAL;
-}
-
 static ssize_t location_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
-	int i;
+	struct token_sysfs_data *data = container_of(attr, struct token_sysfs_data, location_attr);
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	i = match_attribute(dev, attr);
-	if (i > 0)
-		return sysfs_emit(buf, "%08x", da_tokens[i].location);
-	return 0;
+	return sysfs_emit(buf, "%08x", data->token->location);
 }
 
 static ssize_t value_show(struct device *dev,
 			  struct device_attribute *attr, char *buf)
 {
-	int i;
+	struct token_sysfs_data *data = container_of(attr, struct token_sysfs_data, value_attr);
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	i = match_attribute(dev, attr);
-	if (i > 0)
-		return sysfs_emit(buf, "%08x", da_tokens[i].value);
-	return 0;
+	return sysfs_emit(buf, "%08x", data->token->value);
 }
 
 static struct attribute_group smbios_attribute_group = {
@@ -473,22 +458,15 @@ static int build_tokens_sysfs(struct platform_device *dev)
 {
 	char *location_name;
 	char *value_name;
-	size_t size;
 	int ret;
 	int i, j;
 
-	/* (number of tokens  + 1 for null terminated */
-	size = sizeof(struct device_attribute) * (da_num_tokens + 1);
-	token_location_attrs = kzalloc(size, GFP_KERNEL);
-	if (!token_location_attrs)
+	token_entries = kcalloc(da_num_tokens, sizeof(*token_entries), GFP_KERNEL);
+	if (!token_entries)
 		return -ENOMEM;
-	token_value_attrs = kzalloc(size, GFP_KERNEL);
-	if (!token_value_attrs)
-		goto out_allocate_value;
 
 	/* need to store both location and value + terminator*/
-	size = sizeof(struct attribute *) * ((2 * da_num_tokens) + 1);
-	token_attrs = kzalloc(size, GFP_KERNEL);
+	token_attrs = kcalloc((2 * da_num_tokens) + 1, sizeof(*token_attrs), GFP_KERNEL);
 	if (!token_attrs)
 		goto out_allocate_attrs;
 
@@ -496,27 +474,32 @@ static int build_tokens_sysfs(struct platform_device *dev)
 		/* skip empty */
 		if (da_tokens[i].tokenID == 0)
 			continue;
+
+		token_entries[i].token = &da_tokens[i];
+
 		/* add location */
 		location_name = kasprintf(GFP_KERNEL, "%04x_location",
 					  da_tokens[i].tokenID);
 		if (location_name == NULL)
 			goto out_unwind_strings;
-		sysfs_attr_init(&token_location_attrs[i].attr);
-		token_location_attrs[i].attr.name = location_name;
-		token_location_attrs[i].attr.mode = 0444;
-		token_location_attrs[i].show = location_show;
-		token_attrs[j++] = &token_location_attrs[i].attr;
+
+		sysfs_attr_init(&token_entries[i].location_attr.attr);
+		token_entries[i].location_attr.attr.name = location_name;
+		token_entries[i].location_attr.attr.mode = 0444;
+		token_entries[i].location_attr.show = location_show;
+		token_attrs[j++] = &token_entries[i].location_attr.attr;
 
 		/* add value */
 		value_name = kasprintf(GFP_KERNEL, "%04x_value",
 				       da_tokens[i].tokenID);
 		if (value_name == NULL)
 			goto loop_fail_create_value;
-		sysfs_attr_init(&token_value_attrs[i].attr);
-		token_value_attrs[i].attr.name = value_name;
-		token_value_attrs[i].attr.mode = 0444;
-		token_value_attrs[i].show = value_show;
-		token_attrs[j++] = &token_value_attrs[i].attr;
+
+		sysfs_attr_init(&token_entries[i].value_attr.attr);
+		token_entries[i].value_attr.attr.name = value_name;
+		token_entries[i].value_attr.attr.mode = 0444;
+		token_entries[i].value_attr.show = value_show;
+		token_attrs[j++] = &token_entries[i].value_attr.attr;
 		continue;
 
 loop_fail_create_value:
@@ -532,14 +515,12 @@ loop_fail_create_value:
 
 out_unwind_strings:
 	while (i--) {
-		kfree(token_location_attrs[i].attr.name);
-		kfree(token_value_attrs[i].attr.name);
+		kfree(token_entries[i].location_attr.attr.name);
+		kfree(token_entries[i].value_attr.attr.name);
 	}
 	kfree(token_attrs);
 out_allocate_attrs:
-	kfree(token_value_attrs);
-out_allocate_value:
-	kfree(token_location_attrs);
+	kfree(token_entries);
 
 	return -ENOMEM;
 }
@@ -551,12 +532,11 @@ static void free_group(struct platform_device *pdev)
 	sysfs_remove_group(&pdev->dev.kobj,
 				&smbios_attribute_group);
 	for (i = 0; i < da_num_tokens; i++) {
-		kfree(token_location_attrs[i].attr.name);
-		kfree(token_value_attrs[i].attr.name);
+		kfree(token_entries[i].location_attr.attr.name);
+		kfree(token_entries[i].value_attr.attr.name);
 	}
 	kfree(token_attrs);
-	kfree(token_value_attrs);
-	kfree(token_location_attrs);
+	kfree(token_entries);
 }
 
 static int __init dell_smbios_init(void)
-- 
cgit v1.2.3


From 306aec7eea8c83e8212f4dd6e5b358c508e3e466 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Tue, 28 May 2024 22:49:03 +0200
Subject: platform/x86: dell-smbios: Simplify error handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the allocation of value_name fails, the error handling code
uses two gotos for error handling, which is not necessary.

Simplify the error handling in this case by only using a single goto.

Tested on a Dell Inspiron 3505.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20240528204903.445546-2-W_Armin@gmx.de
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/dell/dell-smbios-base.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/platform/x86/dell/dell-smbios-base.c b/drivers/platform/x86/dell/dell-smbios-base.c
index 86b95206cb1b..b562ed99ec4e 100644
--- a/drivers/platform/x86/dell/dell-smbios-base.c
+++ b/drivers/platform/x86/dell/dell-smbios-base.c
@@ -492,19 +492,16 @@ static int build_tokens_sysfs(struct platform_device *dev)
 		/* add value */
 		value_name = kasprintf(GFP_KERNEL, "%04x_value",
 				       da_tokens[i].tokenID);
-		if (value_name == NULL)
-			goto loop_fail_create_value;
+		if (!value_name) {
+			kfree(location_name);
+			goto out_unwind_strings;
+		}
 
 		sysfs_attr_init(&token_entries[i].value_attr.attr);
 		token_entries[i].value_attr.attr.name = value_name;
 		token_entries[i].value_attr.attr.mode = 0444;
 		token_entries[i].value_attr.show = value_show;
 		token_attrs[j++] = &token_entries[i].value_attr.attr;
-		continue;
-
-loop_fail_create_value:
-		kfree(location_name);
-		goto out_unwind_strings;
 	}
 	smbios_attribute_group.attrs = token_attrs;
 
-- 
cgit v1.2.3


From 77f1972bdcf7513293e8bbe376b9fe837310ee9c Mon Sep 17 00:00:00 2001
From: Suma Hegde <suma.hegde@amd.com>
Date: Mon, 3 Jun 2024 08:15:12 +0000
Subject: platform/x86/amd/hsmp: Check HSMP support on AMD family of processors

HSMP interface is supported only on few x86 processors from AMD.
Accessing HSMP registers on rest of the platforms might cause
unexpected behaviour. So add a check.

Also unavailability of this interface on rest of the processors
is not an error. Hence, use pr_info() instead of the pr_err() to
log the message.

Signed-off-by: Suma Hegde <suma.hegde@amd.com>
Reviewed-by: Naveen Krishna Chatradhi <naveenkrishna.chatradhi@amd.com>
Link: https://lore.kernel.org/r/20240603081512.142909-1-suma.hegde@amd.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/amd/hsmp.c | 50 +++++++++++++++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 7 deletions(-)

diff --git a/drivers/platform/x86/amd/hsmp.c b/drivers/platform/x86/amd/hsmp.c
index d84ea66eecc6..8fcf38eed7f0 100644
--- a/drivers/platform/x86/amd/hsmp.c
+++ b/drivers/platform/x86/amd/hsmp.c
@@ -907,16 +907,44 @@ static int hsmp_plat_dev_register(void)
 	return ret;
 }
 
+/*
+ * This check is only needed for backward compatibility of previous platforms.
+ * All new platforms are expected to support ACPI based probing.
+ */
+static bool legacy_hsmp_support(void)
+{
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+		return false;
+
+	switch (boot_cpu_data.x86) {
+	case 0x19:
+		switch (boot_cpu_data.x86_model) {
+		case 0x00 ... 0x1F:
+		case 0x30 ... 0x3F:
+		case 0x90 ... 0x9F:
+		case 0xA0 ... 0xAF:
+			return true;
+		default:
+			return false;
+		}
+	case 0x1A:
+		switch (boot_cpu_data.x86_model) {
+		case 0x00 ... 0x1F:
+			return true;
+		default:
+			return false;
+		}
+	default:
+		return false;
+	}
+
+	return false;
+}
+
 static int __init hsmp_plt_init(void)
 {
 	int ret = -ENODEV;
 
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD || boot_cpu_data.x86 < 0x19) {
-		pr_err("HSMP is not supported on Family:%x model:%x\n",
-		       boot_cpu_data.x86, boot_cpu_data.x86_model);
-		return ret;
-	}
-
 	/*
 	 * amd_nb_num() returns number of SMN/DF interfaces present in the system
 	 * if we have N SMN/DF interfaces that ideally means N sockets
@@ -930,7 +958,15 @@ static int __init hsmp_plt_init(void)
 		return ret;
 
 	if (!plat_dev.is_acpi_device) {
-		ret = hsmp_plat_dev_register();
+		if (legacy_hsmp_support()) {
+			/* Not ACPI device, but supports HSMP, register a plat_dev */
+			ret = hsmp_plat_dev_register();
+		} else {
+			/* Not ACPI, Does not support HSMP */
+			pr_info("HSMP is not supported on Family:%x model:%x\n",
+				boot_cpu_data.x86, boot_cpu_data.x86_model);
+			ret = -ENODEV;
+		}
 		if (ret)
 			platform_driver_unregister(&amd_hsmp_driver);
 	}
-- 
cgit v1.2.3


From 0110c4b110477bb1f19b0d02361846be7ab08300 Mon Sep 17 00:00:00 2001
From: Sunil V L <sunilvl@ventanamicro.com>
Date: Mon, 27 May 2024 13:41:13 +0530
Subject: irqchip/riscv-intc: Prevent memory leak when riscv_intc_init_common()
 fails

When riscv_intc_init_common() fails, the firmware node allocated is not
freed. Add the missing free().

Fixes: 7023b9d83f03 ("irqchip/riscv-intc: Add ACPI support")
Signed-off-by: Sunil V L <sunilvl@ventanamicro.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Anup Patel <anup@brainfault.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20240527081113.616189-1-sunilvl@ventanamicro.com
---
 drivers/irqchip/irq-riscv-intc.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c
index 9e71c4428814..4f3a12383a1e 100644
--- a/drivers/irqchip/irq-riscv-intc.c
+++ b/drivers/irqchip/irq-riscv-intc.c
@@ -253,8 +253,9 @@ IRQCHIP_DECLARE(andes, "andestech,cpu-intc", riscv_intc_init);
 static int __init riscv_intc_acpi_init(union acpi_subtable_headers *header,
 				       const unsigned long end)
 {
-	struct fwnode_handle *fn;
 	struct acpi_madt_rintc *rintc;
+	struct fwnode_handle *fn;
+	int rc;
 
 	rintc = (struct acpi_madt_rintc *)header;
 
@@ -273,7 +274,11 @@ static int __init riscv_intc_acpi_init(union acpi_subtable_headers *header,
 		return -ENOMEM;
 	}
 
-	return riscv_intc_init_common(fn, &riscv_intc_chip);
+	rc = riscv_intc_init_common(fn, &riscv_intc_chip);
+	if (rc)
+		irq_domain_free_fwnode(fn);
+
+	return rc;
 }
 
 IRQCHIP_ACPI_DECLARE(riscv_intc, ACPI_MADT_TYPE_RINTC, NULL,
-- 
cgit v1.2.3


From 6149db4997f582e958da675092f21c666e3b67b7 Mon Sep 17 00:00:00 2001
From: Tristram Ha <tristram.ha@microchip.com>
Date: Tue, 28 May 2024 19:20:23 -0700
Subject: net: phy: micrel: fix KSZ9477 PHY issues after suspend/resume

When the PHY is powered up after powered down most of the registers are
reset, so the PHY setup code needs to be done again.  In addition the
interrupt register will need to be setup again so that link status
indication works again.

Fixes: 26dd2974c5b5 ("net: phy: micrel: Move KSZ9477 errata fixes to PHY driver")
Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 62 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 56 insertions(+), 6 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 2b8f8b7f1517..8c20cf937530 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -1939,7 +1939,7 @@ static const struct ksz9477_errata_write ksz9477_errata_writes[] = {
 	{0x1c, 0x20, 0xeeee},
 };
 
-static int ksz9477_config_init(struct phy_device *phydev)
+static int ksz9477_phy_errata(struct phy_device *phydev)
 {
 	int err;
 	int i;
@@ -1967,16 +1967,30 @@ static int ksz9477_config_init(struct phy_device *phydev)
 			return err;
 	}
 
+	err = genphy_restart_aneg(phydev);
+	if (err)
+		return err;
+
+	return err;
+}
+
+static int ksz9477_config_init(struct phy_device *phydev)
+{
+	int err;
+
+	/* Only KSZ9897 family of switches needs this fix. */
+	if ((phydev->phy_id & 0xf) == 1) {
+		err = ksz9477_phy_errata(phydev);
+		if (err)
+			return err;
+	}
+
 	/* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes
 	 * in this switch shall be regarded as broken.
 	 */
 	if (phydev->dev_flags & MICREL_NO_EEE)
 		phydev->eee_broken_modes = -1;
 
-	err = genphy_restart_aneg(phydev);
-	if (err)
-		return err;
-
 	return kszphy_config_init(phydev);
 }
 
@@ -2085,6 +2099,42 @@ static int kszphy_resume(struct phy_device *phydev)
 	return 0;
 }
 
+static int ksz9477_resume(struct phy_device *phydev)
+{
+	int ret;
+
+	/* No need to initialize registers if not powered down. */
+	ret = phy_read(phydev, MII_BMCR);
+	if (ret < 0)
+		return ret;
+	if (!(ret & BMCR_PDOWN))
+		return 0;
+
+	genphy_resume(phydev);
+
+	/* After switching from power-down to normal mode, an internal global
+	 * reset is automatically generated. Wait a minimum of 1 ms before
+	 * read/write access to the PHY registers.
+	 */
+	usleep_range(1000, 2000);
+
+	/* Only KSZ9897 family of switches needs this fix. */
+	if ((phydev->phy_id & 0xf) == 1) {
+		ret = ksz9477_phy_errata(phydev);
+		if (ret)
+			return ret;
+	}
+
+	/* Enable PHY Interrupts */
+	if (phy_interrupt_is_valid(phydev)) {
+		phydev->interrupts = PHY_INTERRUPT_ENABLED;
+		if (phydev->drv->config_intr)
+			phydev->drv->config_intr(phydev);
+	}
+
+	return 0;
+}
+
 static int kszphy_probe(struct phy_device *phydev)
 {
 	const struct kszphy_type *type = phydev->drv->driver_data;
@@ -5493,7 +5543,7 @@ static struct phy_driver ksphy_driver[] = {
 	.config_intr	= kszphy_config_intr,
 	.handle_interrupt = kszphy_handle_interrupt,
 	.suspend	= genphy_suspend,
-	.resume		= genphy_resume,
+	.resume		= ksz9477_resume,
 	.get_features	= ksz9477_get_features,
 } };
 
-- 
cgit v1.2.3


From e306a894bd511804ba9db7c00ca9cc05b55df1f2 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel.holland@sifive.com>
Date: Wed, 29 May 2024 14:54:56 -0700
Subject: irqchip/sifive-plic: Chain to parent IRQ after handlers are ready

Now that the PLIC uses a platform driver, the driver is probed later in the
boot process, where interrupts from peripherals might already be pending.

As a result, plic_handle_irq() may be called as early as the call to
irq_set_chained_handler() completes. But this call happens before the
per-context handler is completely set up, so there is a window where
plic_handle_irq() can see incomplete per-context state and crash.

Avoid this by delaying the call to irq_set_chained_handler() until all
handlers from all PLICs are initialized.

Fixes: 8ec99b033147 ("irqchip/sifive-plic: Convert PLIC driver into a platform driver")
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Samuel Holland <samuel.holland@sifive.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Anup Patel <anup@brainfault.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20240529215458.937817-1-samuel.holland@sifive.com
Closes: https://lore.kernel.org/r/CAMuHMdVYFFR7K5SbHBLY-JHhb7YpgGMS_hnRWm8H0KD-wBo+4A@mail.gmail.com/
---
 drivers/irqchip/irq-sifive-plic.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 8fb183ced1e7..9e22f7e378f5 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -85,7 +85,7 @@ struct plic_handler {
 	struct plic_priv	*priv;
 };
 static int plic_parent_irq __ro_after_init;
-static bool plic_cpuhp_setup_done __ro_after_init;
+static bool plic_global_setup_done __ro_after_init;
 static DEFINE_PER_CPU(struct plic_handler, plic_handlers);
 
 static int plic_irq_set_type(struct irq_data *d, unsigned int type);
@@ -487,10 +487,8 @@ static int plic_probe(struct platform_device *pdev)
 	unsigned long plic_quirks = 0;
 	struct plic_handler *handler;
 	u32 nr_irqs, parent_hwirq;
-	struct irq_domain *domain;
 	struct plic_priv *priv;
 	irq_hw_number_t hwirq;
-	bool cpuhp_setup;
 
 	if (is_of_node(dev->fwnode)) {
 		const struct of_device_id *id;
@@ -549,14 +547,6 @@ static int plic_probe(struct platform_device *pdev)
 			continue;
 		}
 
-		/* Find parent domain and register chained handler */
-		domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), DOMAIN_BUS_ANY);
-		if (!plic_parent_irq && domain) {
-			plic_parent_irq = irq_create_mapping(domain, RV_IRQ_EXT);
-			if (plic_parent_irq)
-				irq_set_chained_handler(plic_parent_irq, plic_handle_irq);
-		}
-
 		/*
 		 * When running in M-mode we need to ignore the S-mode handler.
 		 * Here we assume it always comes later, but that might be a
@@ -597,25 +587,35 @@ done:
 		goto fail_cleanup_contexts;
 
 	/*
-	 * We can have multiple PLIC instances so setup cpuhp state
+	 * We can have multiple PLIC instances so setup global state
 	 * and register syscore operations only once after context
 	 * handlers of all online CPUs are initialized.
 	 */
-	if (!plic_cpuhp_setup_done) {
-		cpuhp_setup = true;
+	if (!plic_global_setup_done) {
+		struct irq_domain *domain;
+		bool global_setup = true;
+
 		for_each_online_cpu(cpu) {
 			handler = per_cpu_ptr(&plic_handlers, cpu);
 			if (!handler->present) {
-				cpuhp_setup = false;
+				global_setup = false;
 				break;
 			}
 		}
-		if (cpuhp_setup) {
+
+		if (global_setup) {
+			/* Find parent domain and register chained handler */
+			domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), DOMAIN_BUS_ANY);
+			if (domain)
+				plic_parent_irq = irq_create_mapping(domain, RV_IRQ_EXT);
+			if (plic_parent_irq)
+				irq_set_chained_handler(plic_parent_irq, plic_handle_irq);
+
 			cpuhp_setup_state(CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
 					  "irqchip/sifive/plic:starting",
 					  plic_starting_cpu, plic_dying_cpu);
 			register_syscore_ops(&plic_irq_syscore_ops);
-			plic_cpuhp_setup_done = true;
+			plic_global_setup_done = true;
 		}
 	}
 
-- 
cgit v1.2.3


From 8c219e52ca4d9a67cd6a7074e91bf29b55edc075 Mon Sep 17 00:00:00 2001
From: Gregor Herburger <gregor.herburger@tq-group.com>
Date: Thu, 30 May 2024 12:19:59 +0200
Subject: gpio: tqmx86: fix typo in Kconfig label

Fix description for GPIO_TQMX86 from QTMX86 to TQMx86.

Fixes: b868db94a6a7 ("gpio: tqmx86: Add GPIO from for this IO controller")
Signed-off-by: Gregor Herburger <gregor.herburger@tq-group.com>
Signed-off-by: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/e0e38c9944ad6d281d9a662a45d289b88edc808e.1717063994.git.matthias.schiffer@ew.tq-group.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 3dbddec07028..1c28a48915bb 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -1576,7 +1576,7 @@ config GPIO_TPS68470
 	  are "output only" GPIOs.
 
 config GPIO_TQMX86
-	tristate "TQ-Systems QTMX86 GPIO"
+	tristate "TQ-Systems TQMx86 GPIO"
 	depends on MFD_TQMX86 || COMPILE_TEST
 	depends on HAS_IOPORT_MAP
 	select GPIOLIB_IRQCHIP
-- 
cgit v1.2.3


From 9d6a811b522ba558bcb4ec01d12e72a0af8e9f6e Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
Date: Thu, 30 May 2024 12:20:00 +0200
Subject: gpio: tqmx86: introduce shadow register for GPIO output value

The TQMx86 GPIO controller uses the same register address for input and
output data. Reading the register will always return current inputs
rather than the previously set outputs (regardless of the current
direction setting). Therefore, using a RMW pattern does not make sense
when setting output values. Instead, the previously set output register
value needs to be stored as a shadow register.

As there is no reliable way to get the current output values from the
hardware, also initialize all channels to 0, to ensure that stored and
actual output values match. This should usually not have any effect in
practise, as the TQMx86 UEFI sets all outputs to 0 during boot.

Also prepare for extension of the driver to more than 8 GPIOs by using
DECLARE_BITMAP.

Fixes: b868db94a6a7 ("gpio: tqmx86: Add GPIO from for this IO controller")
Signed-off-by: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/d0555933becd45fa92a85675d26e4d59343ddc01.1717063994.git.matthias.schiffer@ew.tq-group.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-tqmx86.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c
index 3a28c1f273c3..b7e2dbbdc4eb 100644
--- a/drivers/gpio/gpio-tqmx86.c
+++ b/drivers/gpio/gpio-tqmx86.c
@@ -6,6 +6,7 @@
  *   Vadim V.Vlasov <vvlasov@dev.rtsoft.ru>
  */
 
+#include <linux/bitmap.h>
 #include <linux/bitops.h>
 #include <linux/errno.h>
 #include <linux/gpio/driver.h>
@@ -38,6 +39,7 @@ struct tqmx86_gpio_data {
 	void __iomem		*io_base;
 	int			irq;
 	raw_spinlock_t		spinlock;
+	DECLARE_BITMAP(output, TQMX86_NGPIO);
 	u8			irq_type[TQMX86_NGPI];
 };
 
@@ -64,15 +66,10 @@ static void tqmx86_gpio_set(struct gpio_chip *chip, unsigned int offset,
 {
 	struct tqmx86_gpio_data *gpio = gpiochip_get_data(chip);
 	unsigned long flags;
-	u8 val;
 
 	raw_spin_lock_irqsave(&gpio->spinlock, flags);
-	val = tqmx86_gpio_read(gpio, TQMX86_GPIOD);
-	if (value)
-		val |= BIT(offset);
-	else
-		val &= ~BIT(offset);
-	tqmx86_gpio_write(gpio, val, TQMX86_GPIOD);
+	__assign_bit(offset, gpio->output, value);
+	tqmx86_gpio_write(gpio, bitmap_get_value8(gpio->output, 0), TQMX86_GPIOD);
 	raw_spin_unlock_irqrestore(&gpio->spinlock, flags);
 }
 
@@ -277,6 +274,13 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
 
 	tqmx86_gpio_write(gpio, (u8)~TQMX86_DIR_INPUT_MASK, TQMX86_GPIODD);
 
+	/*
+	 * Reading the previous output state is not possible with TQMx86 hardware.
+	 * Initialize all outputs to 0 to have a defined state that matches the
+	 * shadow register.
+	 */
+	tqmx86_gpio_write(gpio, 0, TQMX86_GPIOD);
+
 	chip = &gpio->chip;
 	chip->label = "gpio-tqmx86";
 	chip->owner = THIS_MODULE;
-- 
cgit v1.2.3


From 08af509efdf8dad08e972b48de0e2c2a7919ea8b Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
Date: Thu, 30 May 2024 12:20:01 +0200
Subject: gpio: tqmx86: store IRQ trigger type and unmask status separately

irq_set_type() should not implicitly unmask the IRQ.

All accesses to the interrupt configuration register are moved to a new
helper tqmx86_gpio_irq_config(). We also introduce the new rule that
accessing irq_type must happen while locked, which will become
significant for fixing EDGE_BOTH handling.

Fixes: b868db94a6a7 ("gpio: tqmx86: Add GPIO from for this IO controller")
Signed-off-by: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
Link: https://lore.kernel.org/r/6aa4f207f77cb58ef64ffb947e91949b0f753ccd.1717063994.git.matthias.schiffer@ew.tq-group.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-tqmx86.c | 48 +++++++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c
index b7e2dbbdc4eb..7e428c872a25 100644
--- a/drivers/gpio/gpio-tqmx86.c
+++ b/drivers/gpio/gpio-tqmx86.c
@@ -29,15 +29,19 @@
 #define TQMX86_GPIIC	3	/* GPI Interrupt Configuration Register */
 #define TQMX86_GPIIS	4	/* GPI Interrupt Status Register */
 
+#define TQMX86_GPII_NONE	0
 #define TQMX86_GPII_FALLING	BIT(0)
 #define TQMX86_GPII_RISING	BIT(1)
 #define TQMX86_GPII_MASK	(BIT(0) | BIT(1))
 #define TQMX86_GPII_BITS	2
+/* Stored in irq_type with GPII bits */
+#define TQMX86_INT_UNMASKED	BIT(2)
 
 struct tqmx86_gpio_data {
 	struct gpio_chip	chip;
 	void __iomem		*io_base;
 	int			irq;
+	/* Lock must be held for accessing output and irq_type fields */
 	raw_spinlock_t		spinlock;
 	DECLARE_BITMAP(output, TQMX86_NGPIO);
 	u8			irq_type[TQMX86_NGPI];
@@ -104,21 +108,32 @@ static int tqmx86_gpio_get_direction(struct gpio_chip *chip,
 	return GPIO_LINE_DIRECTION_OUT;
 }
 
+static void tqmx86_gpio_irq_config(struct tqmx86_gpio_data *gpio, int offset)
+	__must_hold(&gpio->spinlock)
+{
+	u8 type = TQMX86_GPII_NONE, gpiic;
+
+	if (gpio->irq_type[offset] & TQMX86_INT_UNMASKED)
+		type = gpio->irq_type[offset] & TQMX86_GPII_MASK;
+
+	gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC);
+	gpiic &= ~(TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS));
+	gpiic |= type << (offset * TQMX86_GPII_BITS);
+	tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC);
+}
+
 static void tqmx86_gpio_irq_mask(struct irq_data *data)
 {
 	unsigned int offset = (data->hwirq - TQMX86_NGPO);
 	struct tqmx86_gpio_data *gpio = gpiochip_get_data(
 		irq_data_get_irq_chip_data(data));
 	unsigned long flags;
-	u8 gpiic, mask;
-
-	mask = TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS);
 
 	raw_spin_lock_irqsave(&gpio->spinlock, flags);
-	gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC);
-	gpiic &= ~mask;
-	tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC);
+	gpio->irq_type[offset] &= ~TQMX86_INT_UNMASKED;
+	tqmx86_gpio_irq_config(gpio, offset);
 	raw_spin_unlock_irqrestore(&gpio->spinlock, flags);
+
 	gpiochip_disable_irq(&gpio->chip, irqd_to_hwirq(data));
 }
 
@@ -128,16 +143,12 @@ static void tqmx86_gpio_irq_unmask(struct irq_data *data)
 	struct tqmx86_gpio_data *gpio = gpiochip_get_data(
 		irq_data_get_irq_chip_data(data));
 	unsigned long flags;
-	u8 gpiic, mask;
-
-	mask = TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS);
 
 	gpiochip_enable_irq(&gpio->chip, irqd_to_hwirq(data));
+
 	raw_spin_lock_irqsave(&gpio->spinlock, flags);
-	gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC);
-	gpiic &= ~mask;
-	gpiic |= gpio->irq_type[offset] << (offset * TQMX86_GPII_BITS);
-	tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC);
+	gpio->irq_type[offset] |= TQMX86_INT_UNMASKED;
+	tqmx86_gpio_irq_config(gpio, offset);
 	raw_spin_unlock_irqrestore(&gpio->spinlock, flags);
 }
 
@@ -148,7 +159,7 @@ static int tqmx86_gpio_irq_set_type(struct irq_data *data, unsigned int type)
 	unsigned int offset = (data->hwirq - TQMX86_NGPO);
 	unsigned int edge_type = type & IRQF_TRIGGER_MASK;
 	unsigned long flags;
-	u8 new_type, gpiic;
+	u8 new_type;
 
 	switch (edge_type) {
 	case IRQ_TYPE_EDGE_RISING:
@@ -164,13 +175,10 @@ static int tqmx86_gpio_irq_set_type(struct irq_data *data, unsigned int type)
 		return -EINVAL; /* not supported */
 	}
 
-	gpio->irq_type[offset] = new_type;
-
 	raw_spin_lock_irqsave(&gpio->spinlock, flags);
-	gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC);
-	gpiic &= ~((TQMX86_GPII_MASK) << (offset * TQMX86_GPII_BITS));
-	gpiic |= new_type << (offset * TQMX86_GPII_BITS);
-	tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC);
+	gpio->irq_type[offset] &= ~TQMX86_GPII_MASK;
+	gpio->irq_type[offset] |= new_type;
+	tqmx86_gpio_irq_config(gpio, offset);
 	raw_spin_unlock_irqrestore(&gpio->spinlock, flags);
 
 	return 0;
-- 
cgit v1.2.3


From 90dd7de4ef7ba584823dfbeba834c2919a4bb55b Mon Sep 17 00:00:00 2001
From: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
Date: Thu, 30 May 2024 12:20:02 +0200
Subject: gpio: tqmx86: fix broken IRQ_TYPE_EDGE_BOTH interrupt type

The TQMx86 GPIO controller only supports falling and rising edge
triggers, but not both. Fix this by implementing a software both-edge
mode that toggles the edge type after every interrupt.

Fixes: b868db94a6a7 ("gpio: tqmx86: Add GPIO from for this IO controller")
Co-developed-by: Gregor Herburger <gregor.herburger@tq-group.com>
Signed-off-by: Gregor Herburger <gregor.herburger@tq-group.com>
Signed-off-by: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
Link: https://lore.kernel.org/r/515324f0491c4d44f4ef49f170354aca002d81ef.1717063994.git.matthias.schiffer@ew.tq-group.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-tqmx86.c | 46 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c
index 7e428c872a25..f2e7e8754d95 100644
--- a/drivers/gpio/gpio-tqmx86.c
+++ b/drivers/gpio/gpio-tqmx86.c
@@ -32,6 +32,10 @@
 #define TQMX86_GPII_NONE	0
 #define TQMX86_GPII_FALLING	BIT(0)
 #define TQMX86_GPII_RISING	BIT(1)
+/* Stored in irq_type as a trigger type, but not actually valid as a register
+ * value, so the name doesn't use "GPII"
+ */
+#define TQMX86_INT_BOTH		(BIT(0) | BIT(1))
 #define TQMX86_GPII_MASK	(BIT(0) | BIT(1))
 #define TQMX86_GPII_BITS	2
 /* Stored in irq_type with GPII bits */
@@ -113,9 +117,15 @@ static void tqmx86_gpio_irq_config(struct tqmx86_gpio_data *gpio, int offset)
 {
 	u8 type = TQMX86_GPII_NONE, gpiic;
 
-	if (gpio->irq_type[offset] & TQMX86_INT_UNMASKED)
+	if (gpio->irq_type[offset] & TQMX86_INT_UNMASKED) {
 		type = gpio->irq_type[offset] & TQMX86_GPII_MASK;
 
+		if (type == TQMX86_INT_BOTH)
+			type = tqmx86_gpio_get(&gpio->chip, offset + TQMX86_NGPO)
+				? TQMX86_GPII_FALLING
+				: TQMX86_GPII_RISING;
+	}
+
 	gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC);
 	gpiic &= ~(TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS));
 	gpiic |= type << (offset * TQMX86_GPII_BITS);
@@ -169,7 +179,7 @@ static int tqmx86_gpio_irq_set_type(struct irq_data *data, unsigned int type)
 		new_type = TQMX86_GPII_FALLING;
 		break;
 	case IRQ_TYPE_EDGE_BOTH:
-		new_type = TQMX86_GPII_FALLING | TQMX86_GPII_RISING;
+		new_type = TQMX86_INT_BOTH;
 		break;
 	default:
 		return -EINVAL; /* not supported */
@@ -189,8 +199,8 @@ static void tqmx86_gpio_irq_handler(struct irq_desc *desc)
 	struct gpio_chip *chip = irq_desc_get_handler_data(desc);
 	struct tqmx86_gpio_data *gpio = gpiochip_get_data(chip);
 	struct irq_chip *irq_chip = irq_desc_get_chip(desc);
-	unsigned long irq_bits;
-	int i = 0;
+	unsigned long irq_bits, flags;
+	int i;
 	u8 irq_status;
 
 	chained_irq_enter(irq_chip, desc);
@@ -199,6 +209,34 @@ static void tqmx86_gpio_irq_handler(struct irq_desc *desc)
 	tqmx86_gpio_write(gpio, irq_status, TQMX86_GPIIS);
 
 	irq_bits = irq_status;
+
+	raw_spin_lock_irqsave(&gpio->spinlock, flags);
+	for_each_set_bit(i, &irq_bits, TQMX86_NGPI) {
+		/*
+		 * Edge-both triggers are implemented by flipping the edge
+		 * trigger after each interrupt, as the controller only supports
+		 * either rising or falling edge triggers, but not both.
+		 *
+		 * Internally, the TQMx86 GPIO controller has separate status
+		 * registers for rising and falling edge interrupts. GPIIC
+		 * configures which bits from which register are visible in the
+		 * interrupt status register GPIIS and defines what triggers the
+		 * parent IRQ line. Writing to GPIIS always clears both rising
+		 * and falling interrupt flags internally, regardless of the
+		 * currently configured trigger.
+		 *
+		 * In consequence, we can cleanly implement the edge-both
+		 * trigger in software by first clearing the interrupt and then
+		 * setting the new trigger based on the current GPIO input in
+		 * tqmx86_gpio_irq_config() - even if an edge arrives between
+		 * reading the input and setting the trigger, we will have a new
+		 * interrupt pending.
+		 */
+		if ((gpio->irq_type[i] & TQMX86_GPII_MASK) == TQMX86_INT_BOTH)
+			tqmx86_gpio_irq_config(gpio, i);
+	}
+	raw_spin_unlock_irqrestore(&gpio->spinlock, flags);
+
 	for_each_set_bit(i, &irq_bits, TQMX86_NGPI)
 		generic_handle_domain_irq(gpio->chip.irq.domain,
 					  i + TQMX86_NGPO);
-- 
cgit v1.2.3


From 4a77c3dead97339478c7422eb07bf4bf63577008 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 2 Jun 2024 18:15:25 -0400
Subject: SUNRPC: Fix loop termination condition in gss_free_in_token_pages()

The in_token->pages[] array is not NULL terminated. This results in
the following KASAN splat:

  KASAN: maybe wild-memory-access in range [0x04a2013400000008-0x04a201340000000f]

Fixes: bafa6b4d95d9 ("SUNRPC: Fix gss_free_in_token_pages()")
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/auth_gss/svcauth_gss.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 96ab50eda9c2..73a90ad873fb 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1069,7 +1069,7 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
 		goto out_denied_free;
 
 	pages = DIV_ROUND_UP(inlen, PAGE_SIZE);
-	in_token->pages = kcalloc(pages, sizeof(struct page *), GFP_KERNEL);
+	in_token->pages = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
 	if (!in_token->pages)
 		goto out_denied_free;
 	in_token->page_base = 0;
-- 
cgit v1.2.3


From c6144a21169fe7d0d70f1a0dae6f6301e5918d30 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Mon, 3 Jun 2024 22:43:11 +0900
Subject: tomoyo: update project links

TOMOYO project has moved to SourceForge.net .

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
 Documentation/admin-guide/LSM/tomoyo.rst | 35 ++++++++++----------------------
 MAINTAINERS                              |  2 +-
 security/tomoyo/Kconfig                  |  2 +-
 security/tomoyo/common.c                 |  2 +-
 4 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/Documentation/admin-guide/LSM/tomoyo.rst b/Documentation/admin-guide/LSM/tomoyo.rst
index 4bc9c2b4da6f..bdb2c2e2a1b2 100644
--- a/Documentation/admin-guide/LSM/tomoyo.rst
+++ b/Documentation/admin-guide/LSM/tomoyo.rst
@@ -9,8 +9,8 @@ TOMOYO is a name-based MAC extension (LSM module) for the Linux kernel.
 
 LiveCD-based tutorials are available at
 
-http://tomoyo.sourceforge.jp/1.8/ubuntu12.04-live.html
-http://tomoyo.sourceforge.jp/1.8/centos6-live.html
+https://tomoyo.sourceforge.net/1.8/ubuntu12.04-live.html
+https://tomoyo.sourceforge.net/1.8/centos6-live.html
 
 Though these tutorials use non-LSM version of TOMOYO, they are useful for you
 to know what TOMOYO is.
@@ -21,45 +21,32 @@ How to enable TOMOYO?
 Build the kernel with ``CONFIG_SECURITY_TOMOYO=y`` and pass ``security=tomoyo`` on
 kernel's command line.
 
-Please see http://tomoyo.osdn.jp/2.5/ for details.
+Please see https://tomoyo.sourceforge.net/2.6/ for details.
 
 Where is documentation?
 =======================
 
 User <-> Kernel interface documentation is available at
-https://tomoyo.osdn.jp/2.5/policy-specification/index.html .
+https://tomoyo.sourceforge.net/2.6/policy-specification/index.html .
 
 Materials we prepared for seminars and symposiums are available at
-https://osdn.jp/projects/tomoyo/docs/?category_id=532&language_id=1 .
+https://sourceforge.net/projects/tomoyo/files/docs/ .
 Below lists are chosen from three aspects.
 
 What is TOMOYO?
   TOMOYO Linux Overview
-    https://osdn.jp/projects/tomoyo/docs/lca2009-takeda.pdf
+    https://sourceforge.net/projects/tomoyo/files/docs/lca2009-takeda.pdf
   TOMOYO Linux: pragmatic and manageable security for Linux
-    https://osdn.jp/projects/tomoyo/docs/freedomhectaipei-tomoyo.pdf
+    https://sourceforge.net/projects/tomoyo/files/docs/freedomhectaipei-tomoyo.pdf
   TOMOYO Linux: A Practical Method to Understand and Protect Your Own Linux Box
-    https://osdn.jp/projects/tomoyo/docs/PacSec2007-en-no-demo.pdf
+    https://sourceforge.net/projects/tomoyo/files/docs/PacSec2007-en-no-demo.pdf
 
 What can TOMOYO do?
   Deep inside TOMOYO Linux
-    https://osdn.jp/projects/tomoyo/docs/lca2009-kumaneko.pdf
+    https://sourceforge.net/projects/tomoyo/files/docs/lca2009-kumaneko.pdf
   The role of "pathname based access control" in security.
-    https://osdn.jp/projects/tomoyo/docs/lfj2008-bof.pdf
+    https://sourceforge.net/projects/tomoyo/files/docs/lfj2008-bof.pdf
 
 History of TOMOYO?
   Realities of Mainlining
-    https://osdn.jp/projects/tomoyo/docs/lfj2008.pdf
-
-What is future plan?
-====================
-
-We believe that inode based security and name based security are complementary
-and both should be used together. But unfortunately, so far, we cannot enable
-multiple LSM modules at the same time. We feel sorry that you have to give up
-SELinux/SMACK/AppArmor etc. when you want to use TOMOYO.
-
-We hope that LSM becomes stackable in future. Meanwhile, you can use non-LSM
-version of TOMOYO, available at http://tomoyo.osdn.jp/1.8/ .
-LSM version of TOMOYO is a subset of non-LSM version of TOMOYO. We are planning
-to port non-LSM version's functionalities to LSM versions.
+    https://sourceforge.net/projects/tomoyo/files/docs/lfj2008.pdf
diff --git a/MAINTAINERS b/MAINTAINERS
index 8754ac2c259d..4154db83a172 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22679,7 +22679,7 @@ L:	tomoyo-users-en@lists.osdn.me (subscribers-only, for users in English)
 L:	tomoyo-dev@lists.osdn.me (subscribers-only, for developers in Japanese)
 L:	tomoyo-users@lists.osdn.me (subscribers-only, for users in Japanese)
 S:	Maintained
-W:	https://tomoyo.osdn.jp/
+W:	https://tomoyo.sourceforge.net/
 F:	security/tomoyo/
 
 TOPSTAR LAPTOP EXTRAS DRIVER
diff --git a/security/tomoyo/Kconfig b/security/tomoyo/Kconfig
index fad75be5f381..1e0dd1a6d0b0 100644
--- a/security/tomoyo/Kconfig
+++ b/security/tomoyo/Kconfig
@@ -10,7 +10,7 @@ config SECURITY_TOMOYO
 	help
 	  This selects TOMOYO Linux, pathname-based access control.
 	  Required userspace tools and further information may be
-	  found at <https://tomoyo.osdn.jp/>.
+	  found at <https://tomoyo.sourceforge.net/>.
 	  If you are unsure how to answer this question, answer N.
 
 config SECURITY_TOMOYO_MAX_ACCEPT_ENTRY
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index ea3140d510ec..5c7b059a332a 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -2787,7 +2787,7 @@ void tomoyo_check_profile(void)
 		else
 			continue;
 		pr_err("Userland tools for TOMOYO 2.6 must be installed and policy must be initialized.\n");
-		pr_err("Please see https://tomoyo.osdn.jp/2.6/ for more information.\n");
+		pr_err("Please see https://tomoyo.sourceforge.net/2.6/ for more information.\n");
 		panic("STOP!");
 	}
 	tomoyo_read_unlock(idx);
-- 
cgit v1.2.3


From 994af1825a2aa286f4903ff64a1c7378b52defe6 Mon Sep 17 00:00:00 2001
From: Nam Cao <namcao@linutronix.de>
Date: Thu, 25 Apr 2024 13:52:01 +0200
Subject: riscv: fix overlap of allocated page and PTR_ERR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On riscv32, it is possible for the last page in virtual address space
(0xfffff000) to be allocated. This page overlaps with PTR_ERR, so that
shouldn't happen.

There is already some code to ensure memblock won't allocate the last page.
However, buddy allocator is left unchecked.

Fix this by reserving physical memory that would be mapped at virtual
addresses greater than 0xfffff000.

Reported-by: Björn Töpel <bjorn@kernel.org>
Closes: https://lore.kernel.org/linux-riscv/878r1ibpdn.fsf@all.your.base.are.belong.to.us
Fixes: 76d2a0493a17 ("RISC-V: Init and Halt Code")
Signed-off-by: Nam Cao <namcao@linutronix.de>
Cc: <stable@vger.kernel.org>
Tested-by: Björn Töpel <bjorn@rivosinc.com>
Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
Link: https://lore.kernel.org/r/20240425115201.3044202-1-namcao@linutronix.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/mm/init.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index e3218d65f21d..e3405e4b99af 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -250,18 +250,19 @@ static void __init setup_bootmem(void)
 		kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;
 
 	/*
-	 * memblock allocator is not aware of the fact that last 4K bytes of
-	 * the addressable memory can not be mapped because of IS_ERR_VALUE
-	 * macro. Make sure that last 4k bytes are not usable by memblock
-	 * if end of dram is equal to maximum addressable memory.  For 64-bit
-	 * kernel, this problem can't happen here as the end of the virtual
-	 * address space is occupied by the kernel mapping then this check must
-	 * be done as soon as the kernel mapping base address is determined.
+	 * Reserve physical address space that would be mapped to virtual
+	 * addresses greater than (void *)(-PAGE_SIZE) because:
+	 *  - This memory would overlap with ERR_PTR
+	 *  - This memory belongs to high memory, which is not supported
+	 *
+	 * This is not applicable to 64-bit kernel, because virtual addresses
+	 * after (void *)(-PAGE_SIZE) are not linearly mapped: they are
+	 * occupied by kernel mapping. Also it is unrealistic for high memory
+	 * to exist on 64-bit platforms.
 	 */
 	if (!IS_ENABLED(CONFIG_64BIT)) {
-		max_mapped_addr = __pa(~(ulong)0);
-		if (max_mapped_addr == (phys_ram_end - 1))
-			memblock_set_current_limit(max_mapped_addr - 4096);
+		max_mapped_addr = __va_to_pa_nodebug(-PAGE_SIZE);
+		memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr);
 	}
 
 	min_low_pfn = PFN_UP(phys_ram_base);
-- 
cgit v1.2.3


From e2c79b4c5c4d83520abb570ca633ded09621c0a6 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@rivosinc.com>
Date: Thu, 30 May 2024 09:44:51 -0700
Subject: Revert "riscv: mm: accelerate pagefault when badaccess"

I accidentally picked up an earlier version of this patch, which had
already landed via mm.  The patch  I picked up contains a bug, which I
kept as I thought it was a fix.  So let's just revert it.

This reverts commit 4c6c0020427a4547845a83f7e4d6085e16c3e24f.

Fixes: 4c6c0020427a ("riscv: mm: accelerate pagefault when badaccess")
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Link: https://lore.kernel.org/r/20240530164451.21336-1-palmer@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/mm/fault.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index b3fcf7d67efb..5224f3733802 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -293,8 +293,8 @@ void handle_page_fault(struct pt_regs *regs)
 	if (unlikely(access_error(cause, vma))) {
 		vma_end_read(vma);
 		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
-		tsk->thread.bad_cause = SEGV_ACCERR;
-		bad_area_nosemaphore(regs, code, addr);
+		tsk->thread.bad_cause = cause;
+		bad_area_nosemaphore(regs, SEGV_ACCERR, addr);
 		return;
 	}
 
-- 
cgit v1.2.3


From 2317dc2c22cc353b699c7d1db47b2fe91f54055c Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@toblux.com>
Date: Wed, 29 May 2024 12:19:01 +0200
Subject: bpf, devmap: Remove unnecessary if check in for loop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The iterator variable dst cannot be NULL and the if check can be removed.
Remove it and fix the following Coccinelle/coccicheck warning reported
by itnull.cocci:

	ERROR: iterator variable bound on line 762 cannot be NULL

Signed-off-by: Thorsten Blum <thorsten.blum@toblux.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/bpf/20240529101900.103913-2-thorsten.blum@toblux.com
---
 kernel/bpf/devmap.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 4e2cdbb5629f..7f3b34452243 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -760,9 +760,6 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
 		for (i = 0; i < dtab->n_buckets; i++) {
 			head = dev_map_index_hash(dtab, i);
 			hlist_for_each_entry_safe(dst, next, head, index_hlist) {
-				if (!dst)
-					continue;
-
 				if (is_ifindex_excluded(excluded_devices, num_excluded,
 							dst->dev->ifindex))
 					continue;
-- 
cgit v1.2.3


From 93c1800b3799f17375989b0daf76497dd3e80922 Mon Sep 17 00:00:00 2001
From: David Kaplan <david.kaplan@amd.com>
Date: Sun, 2 Jun 2024 13:19:09 -0500
Subject: x86/kexec: Fix bug with call depth tracking

The call to cc_platform_has() triggers a fault and system crash if call depth
tracking is active because the GS segment has been reset by load_segments() and
GS_BASE is now 0 but call depth tracking uses per-CPU variables to operate.

Call cc_platform_has() earlier in the function when GS is still valid.

  [ bp: Massage. ]

Fixes: 5d8213864ade ("x86/retbleed: Add SKL return thunk")
Signed-off-by: David Kaplan <david.kaplan@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Cc: <stable@kernel.org>
Link: https://lore.kernel.org/r/20240603083036.637-1-bp@kernel.org
---
 arch/x86/kernel/machine_kexec_64.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index b180d8e497c3..cc0f7f70b17b 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -295,8 +295,15 @@ void machine_kexec_cleanup(struct kimage *image)
 void machine_kexec(struct kimage *image)
 {
 	unsigned long page_list[PAGES_NR];
-	void *control_page;
+	unsigned int host_mem_enc_active;
 	int save_ftrace_enabled;
+	void *control_page;
+
+	/*
+	 * This must be done before load_segments() since if call depth tracking
+	 * is used then GS must be valid to make any function calls.
+	 */
+	host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT);
 
 #ifdef CONFIG_KEXEC_JUMP
 	if (image->preserve_context)
@@ -358,7 +365,7 @@ void machine_kexec(struct kimage *image)
 				       (unsigned long)page_list,
 				       image->start,
 				       image->preserve_context,
-				       cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT));
+				       host_mem_enc_active);
 
 #ifdef CONFIG_KEXEC_JUMP
 	if (image->preserve_context)
-- 
cgit v1.2.3


From 1e24c31351787e24b7eebe84866bd55fd62a0aef Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 31 May 2024 16:00:04 -0700
Subject: cpufreq: intel_pstate: Fix unchecked HWP MSR access

Fix unchecked MSR access error for processors with no HWP support. On
such processors, maximum frequency can be changed by the system firmware
using ACPI event ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED. This results
in accessing HWP MSR 0x771.

Call Trace:
	<TASK>
	generic_exec_single+0x58/0x120
	smp_call_function_single+0xbf/0x110
	rdmsrl_on_cpu+0x46/0x60
	intel_pstate_get_hwp_cap+0x1b/0x70
	intel_pstate_update_limits+0x2a/0x60
	acpi_processor_notify+0xb7/0x140
	acpi_ev_notify_dispatch+0x3b/0x60

HWP MSR 0x771 can be only read on a CPU which supports HWP and enabled.
Hence intel_pstate_get_hwp_cap() can only be called when hwp_active is
true.

Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Closes: https://lore.kernel.org/linux-pm/20240529155740.Hq2Hw7be@linutronix.de/
Fixes: e8217b4bece3 ("cpufreq: intel_pstate: Update the maximum CPU frequency consistently")
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 4b986c044741..65d3f79104bd 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1153,7 +1153,8 @@ static void intel_pstate_update_policies(void)
 static void __intel_pstate_update_max_freq(struct cpudata *cpudata,
 					   struct cpufreq_policy *policy)
 {
-	intel_pstate_get_hwp_cap(cpudata);
+	if (hwp_active)
+		intel_pstate_get_hwp_cap(cpudata);
 
 	policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ?
 			cpudata->pstate.max_freq : cpudata->pstate.turbo_freq;
-- 
cgit v1.2.3


From 2884dc7d08d98a89d8d65121524bb7533183a63a Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Sun, 2 Jun 2024 11:27:03 -0700
Subject: bpf: Fix a potential use-after-free in bpf_link_free()

After commit 1a80dbcb2dba, bpf_link can be freed by
link->ops->dealloc_deferred, but the code still tests and uses
link->ops->dealloc afterward, which leads to a use-after-free as
reported by syzbot. Actually, one of them should be sufficient, so
just call one of them instead of both. Also add a WARN_ON() in case
of any problematic implementation.

Fixes: 1a80dbcb2dba ("bpf: support deferring bpf_link dealloc to after RCU grace period")
Reported-by: syzbot+1989ee16d94720836244@syzkaller.appspotmail.com
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/bpf/20240602182703.207276-1-xiyou.wangcong@gmail.com
---
 kernel/bpf/syscall.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2222c3ff88e7..f45ed6adc092 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2998,6 +2998,7 @@ static int bpf_obj_get(const union bpf_attr *attr)
 void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
 		   const struct bpf_link_ops *ops, struct bpf_prog *prog)
 {
+	WARN_ON(ops->dealloc && ops->dealloc_deferred);
 	atomic64_set(&link->refcnt, 1);
 	link->type = type;
 	link->id = 0;
@@ -3056,16 +3057,17 @@ static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
 /* bpf_link_free is guaranteed to be called from process context */
 static void bpf_link_free(struct bpf_link *link)
 {
+	const struct bpf_link_ops *ops = link->ops;
 	bool sleepable = false;
 
 	bpf_link_free_id(link->id);
 	if (link->prog) {
 		sleepable = link->prog->sleepable;
 		/* detach BPF program, clean up used resources */
-		link->ops->release(link);
+		ops->release(link);
 		bpf_prog_put(link->prog);
 	}
-	if (link->ops->dealloc_deferred) {
+	if (ops->dealloc_deferred) {
 		/* schedule BPF link deallocation; if underlying BPF program
 		 * is sleepable, we need to first wait for RCU tasks trace
 		 * sync, then go through "classic" RCU grace period
@@ -3074,9 +3076,8 @@ static void bpf_link_free(struct bpf_link *link)
 			call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
 		else
 			call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
-	}
-	if (link->ops->dealloc)
-		link->ops->dealloc(link);
+	} else if (ops->dealloc)
+		ops->dealloc(link);
 }
 
 static void bpf_link_put_deferred(struct work_struct *work)
-- 
cgit v1.2.3


From b97e8a2f7130a4b30d1502003095833d16c028b3 Mon Sep 17 00:00:00 2001
From: Hagar Hemdan <hagarhem@amazon.com>
Date: Fri, 31 May 2024 16:21:44 +0000
Subject: irqchip/gic-v3-its: Fix potential race condition in
 its_vlpi_prop_update()

its_vlpi_prop_update() calls lpi_write_config() which obtains the
mapping information for a VLPI without lock held. So it could race
with its_vlpi_unmap().

Since all calls from its_irq_set_vcpu_affinity() require the same
lock to be held, hoist the locking there instead of sprinkling the
locking all over the place.

This bug was discovered using Coverity Static Analysis Security Testing
(SAST) by Synopsys, Inc.

[ tglx: Use guard() instead of goto ]

Fixes: 015ec0386ab6 ("irqchip/gic-v3-its: Add VLPI configuration handling")
Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Hagar Hemdan <hagarhem@amazon.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Reviewed-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20240531162144.28650-1-hagarhem@amazon.com
---
 drivers/irqchip/irq-gic-v3-its.c | 44 +++++++++++-----------------------------
 1 file changed, 12 insertions(+), 32 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 40ebf1726393..3c755d5dad6e 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1846,28 +1846,22 @@ static int its_vlpi_map(struct irq_data *d, struct its_cmd_info *info)
 {
 	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
 	u32 event = its_get_event_id(d);
-	int ret = 0;
 
 	if (!info->map)
 		return -EINVAL;
 
-	raw_spin_lock(&its_dev->event_map.vlpi_lock);
-
 	if (!its_dev->event_map.vm) {
 		struct its_vlpi_map *maps;
 
 		maps = kcalloc(its_dev->event_map.nr_lpis, sizeof(*maps),
 			       GFP_ATOMIC);
-		if (!maps) {
-			ret = -ENOMEM;
-			goto out;
-		}
+		if (!maps)
+			return -ENOMEM;
 
 		its_dev->event_map.vm = info->map->vm;
 		its_dev->event_map.vlpi_maps = maps;
 	} else if (its_dev->event_map.vm != info->map->vm) {
-		ret = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
 	/* Get our private copy of the mapping information */
@@ -1899,46 +1893,32 @@ static int its_vlpi_map(struct irq_data *d, struct its_cmd_info *info)
 		its_dev->event_map.nr_vlpis++;
 	}
 
-out:
-	raw_spin_unlock(&its_dev->event_map.vlpi_lock);
-	return ret;
+	return 0;
 }
 
 static int its_vlpi_get(struct irq_data *d, struct its_cmd_info *info)
 {
 	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
 	struct its_vlpi_map *map;
-	int ret = 0;
-
-	raw_spin_lock(&its_dev->event_map.vlpi_lock);
 
 	map = get_vlpi_map(d);
 
-	if (!its_dev->event_map.vm || !map) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (!its_dev->event_map.vm || !map)
+		return -EINVAL;
 
 	/* Copy our mapping information to the incoming request */
 	*info->map = *map;
 
-out:
-	raw_spin_unlock(&its_dev->event_map.vlpi_lock);
-	return ret;
+	return 0;
 }
 
 static int its_vlpi_unmap(struct irq_data *d)
 {
 	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
 	u32 event = its_get_event_id(d);
-	int ret = 0;
-
-	raw_spin_lock(&its_dev->event_map.vlpi_lock);
 
-	if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d))
+		return -EINVAL;
 
 	/* Drop the virtual mapping */
 	its_send_discard(its_dev, event);
@@ -1962,9 +1942,7 @@ static int its_vlpi_unmap(struct irq_data *d)
 		kfree(its_dev->event_map.vlpi_maps);
 	}
 
-out:
-	raw_spin_unlock(&its_dev->event_map.vlpi_lock);
-	return ret;
+	return 0;
 }
 
 static int its_vlpi_prop_update(struct irq_data *d, struct its_cmd_info *info)
@@ -1992,6 +1970,8 @@ static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info)
 	if (!is_v4(its_dev->its))
 		return -EINVAL;
 
+	guard(raw_spinlock_irq)(&its_dev->event_map.vlpi_lock);
+
 	/* Unmap request? */
 	if (!info)
 		return its_vlpi_unmap(d);
-- 
cgit v1.2.3


From 27bd5fdc24c0d5d1306f968ef24105c4577242b0 Mon Sep 17 00:00:00 2001
From: Nikunj A Dadhania <nikunj@amd.com>
Date: Fri, 31 May 2024 04:46:42 +0000
Subject: KVM: SEV-ES: Prevent MSR access post VMSA encryption

KVM currently allows userspace to read/write MSRs even after the VMSA is
encrypted. This can cause unintentional issues if MSR access has side-
effects. For ex, while migrating a guest, userspace could attempt to
migrate MSR_IA32_DEBUGCTLMSR and end up unintentionally disabling LBRV on
the target. Fix this by preventing access to those MSRs which are context
switched via the VMSA, once the VMSA is encrypted.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Message-ID: <20240531044644.768-2-ravi.bangoria@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 1d3c8be39479..f265361f4518 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2822,10 +2822,24 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
 	return 0;
 }
 
+static bool
+sev_es_prevent_msr_access(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+	return sev_es_guest(vcpu->kvm) &&
+	       vcpu->arch.guest_state_protected &&
+	       svm_msrpm_offset(msr_info->index) != MSR_INVALID &&
+	       !msr_write_intercepted(vcpu, msr_info->index);
+}
+
 static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	if (sev_es_prevent_msr_access(vcpu, msr_info)) {
+		msr_info->data = 0;
+		return -EINVAL;
+	}
+
 	switch (msr_info->index) {
 	case MSR_AMD64_TSC_RATIO:
 		if (!msr_info->host_initiated &&
@@ -2976,6 +2990,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 
 	u32 ecx = msr->index;
 	u64 data = msr->data;
+
+	if (sev_es_prevent_msr_access(vcpu, msr))
+		return -EINVAL;
+
 	switch (ecx) {
 	case MSR_AMD64_TSC_RATIO:
 
-- 
cgit v1.2.3


From d922056215617eedfbdbc29fe49953423686fe5e Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@amd.com>
Date: Fri, 31 May 2024 04:46:43 +0000
Subject: KVM: SEV-ES: Disallow SEV-ES guests when X86_FEATURE_LBRV is absent

As documented in APM[1], LBR Virtualization must be enabled for SEV-ES
guests. So, prevent SEV-ES guests when LBRV support is missing.

[1]: AMD64 Architecture Programmer's Manual Pub. 40332, Rev. 4.07 - June
     2023, Vol 2, 15.35.2 Enabling SEV-ES.
     https://bugzilla.kernel.org/attachment.cgi?id=304653

Fixes: 376c6d285017 ("KVM: SVM: Provide support for SEV-ES vCPU creation/loading")
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Message-ID: <20240531044644.768-3-ravi.bangoria@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c |  6 ++++++
 arch/x86/kvm/svm/svm.c | 16 +++++++---------
 arch/x86/kvm/svm/svm.h |  1 +
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 0623cfaa7bb0..8b52bbba02c0 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2406,6 +2406,12 @@ void __init sev_hardware_setup(void)
 	if (!boot_cpu_has(X86_FEATURE_SEV_ES))
 		goto out;
 
+	if (!lbrv) {
+		WARN_ONCE(!boot_cpu_has(X86_FEATURE_LBRV),
+			  "LBRV must be present for SEV-ES support");
+		goto out;
+	}
+
 	/* Has the system been allocated ASIDs for SEV-ES? */
 	if (min_sev_asid == 1)
 		goto out;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f265361f4518..223a551bf44e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -215,7 +215,7 @@ int vgif = true;
 module_param(vgif, int, 0444);
 
 /* enable/disable LBR virtualization */
-static int lbrv = true;
+int lbrv = true;
 module_param(lbrv, int, 0444);
 
 static int tsc_scaling = true;
@@ -5294,6 +5294,12 @@ static __init int svm_hardware_setup(void)
 
 	nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
 
+	if (lbrv) {
+		if (!boot_cpu_has(X86_FEATURE_LBRV))
+			lbrv = false;
+		else
+			pr_info("LBR virtualization supported\n");
+	}
 	/*
 	 * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
 	 * may be modified by svm_adjust_mmio_mask()), as well as nrips.
@@ -5347,14 +5353,6 @@ static __init int svm_hardware_setup(void)
 		svm_x86_ops.set_vnmi_pending = NULL;
 	}
 
-
-	if (lbrv) {
-		if (!boot_cpu_has(X86_FEATURE_LBRV))
-			lbrv = false;
-		else
-			pr_info("LBR virtualization supported\n");
-	}
-
 	if (!enable_pmu)
 		pr_info("PMU virtualization is disabled\n");
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index be57213cd295..59bbb8122b75 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -39,6 +39,7 @@ extern int vgif;
 extern bool intercept_smi;
 extern bool x2avic_enabled;
 extern bool vnmi;
+extern int lbrv;
 
 /*
  * Clean bits in VMCB.
-- 
cgit v1.2.3


From b7e4be0a224fe5c6be30c1c8bdda8d2317ad6ba4 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@amd.com>
Date: Fri, 31 May 2024 04:46:44 +0000
Subject: KVM: SEV-ES: Delegate LBR virtualization to the processor

As documented in APM[1], LBR Virtualization must be enabled for SEV-ES
guests. Although KVM currently enforces LBRV for SEV-ES guests, there
are multiple issues with it:

o MSR_IA32_DEBUGCTLMSR is still intercepted. Since MSR_IA32_DEBUGCTLMSR
  interception is used to dynamically toggle LBRV for performance reasons,
  this can be fatal for SEV-ES guests. For ex SEV-ES guest on Zen3:

  [guest ~]# wrmsr 0x1d9 0x4
  KVM: entry failed, hardware error 0xffffffff
  EAX=00000004 EBX=00000000 ECX=000001d9 EDX=00000000

  Fix this by never intercepting MSR_IA32_DEBUGCTLMSR for SEV-ES guests.
  No additional save/restore logic is required since MSR_IA32_DEBUGCTLMSR
  is of swap type A.

o KVM will disable LBRV if userspace sets MSR_IA32_DEBUGCTLMSR before the
  VMSA is encrypted. Fix this by moving LBRV enablement code post VMSA
  encryption.

[1]: AMD64 Architecture Programmer's Manual Pub. 40332, Rev. 4.07 - June
     2023, Vol 2, 15.35.2 Enabling SEV-ES.
     https://bugzilla.kernel.org/attachment.cgi?id=304653

Fixes: 376c6d285017 ("KVM: SVM: Provide support for SEV-ES vCPU creation/loading")
Co-developed-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Message-ID: <20240531044644.768-4-ravi.bangoria@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 13 ++++++++-----
 arch/x86/kvm/svm/svm.c |  8 +++++++-
 arch/x86/kvm/svm/svm.h |  3 ++-
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 8b52bbba02c0..95095a233a45 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -779,6 +779,14 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
 	 */
 	fpstate_set_confidential(&vcpu->arch.guest_fpu);
 	vcpu->arch.guest_state_protected = true;
+
+	/*
+	 * SEV-ES guest mandates LBR Virtualization to be _always_ ON. Enable it
+	 * only after setting guest_state_protected because KVM_SET_MSRS allows
+	 * dynamic toggling of LBRV (for performance reason) on write access to
+	 * MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set.
+	 */
+	svm_enable_lbrv(vcpu);
 	return 0;
 }
 
@@ -3222,7 +3230,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 
 	svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
-	svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
 
 	/*
 	 * An SEV-ES guest requires a VMSA area that is a separate from the
@@ -3274,10 +3281,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
 	/* Clear intercepts on selected MSRs */
 	set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
-	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
-	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
-	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
-	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
 }
 
 void sev_init_vmcb(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 223a551bf44e..296c524988f9 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -99,6 +99,7 @@ static const struct svm_direct_access_msrs {
 	{ .index = MSR_IA32_SPEC_CTRL,			.always = false },
 	{ .index = MSR_IA32_PRED_CMD,			.always = false },
 	{ .index = MSR_IA32_FLUSH_CMD,			.always = false },
+	{ .index = MSR_IA32_DEBUGCTLMSR,		.always = false },
 	{ .index = MSR_IA32_LASTBRANCHFROMIP,		.always = false },
 	{ .index = MSR_IA32_LASTBRANCHTOIP,		.always = false },
 	{ .index = MSR_IA32_LASTINTFROMIP,		.always = false },
@@ -990,7 +991,7 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
 	vmcb_mark_dirty(to_vmcb, VMCB_LBR);
 }
 
-static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
+void svm_enable_lbrv(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -1000,6 +1001,9 @@ static void svm_enable_lbrv(struct kvm_vcpu *vcpu)
 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
 
+	if (sev_es_guest(vcpu->kvm))
+		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_DEBUGCTLMSR, 1, 1);
+
 	/* Move the LBR msrs to the vmcb02 so that the guest can see them. */
 	if (is_guest_mode(vcpu))
 		svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
@@ -1009,6 +1013,8 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
+
 	svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
 	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 59bbb8122b75..0f1472690b59 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -30,7 +30,7 @@
 #define	IOPM_SIZE PAGE_SIZE * 3
 #define	MSRPM_SIZE PAGE_SIZE * 2
 
-#define MAX_DIRECT_ACCESS_MSRS	47
+#define MAX_DIRECT_ACCESS_MSRS	48
 #define MSRPM_OFFSETS	32
 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
 extern bool npt_enabled;
@@ -553,6 +553,7 @@ u32 *svm_vcpu_alloc_msrpm(void);
 void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm);
 void svm_vcpu_free_msrpm(u32 *msrpm);
 void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
+void svm_enable_lbrv(struct kvm_vcpu *vcpu);
 void svm_update_lbrv(struct kvm_vcpu *vcpu);
 
 int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
-- 
cgit v1.2.3


From 89a58812c47f1823191e9d0b08b53df2dd304ae2 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 21 May 2024 18:03:04 -0700
Subject: KVM: x86: Drop support for hand tuning APIC timer advancement from
 userspace

Remove support for specifying a static local APIC timer advancement value,
and instead present a read-only boolean parameter to let userspace enable
or disable KVM's dynamic APIC timer advancement.  Realistically, it's all
but impossible for userspace to specify an advancement that is more
precise than what KVM's adaptive tuning can provide.  E.g. a static value
needs to be tuned for the exact hardware and kernel, and if KVM is using
hrtimers, likely requires additional tuning for the exact configuration of
the entire system.

Dropping support for a userspace provided value also fixes several flaws
in the interface.  E.g. KVM interprets a negative value other than -1 as a
large advancement, toggling between a negative and positive value yields
unpredictable behavior as vCPUs will switch from dynamic to static
advancement, changing the advancement in the middle of VM creation can
result in different values for vCPUs within a VM, etc.  Those flaws are
mostly fixable, but there's almost no justification for taking on yet more
complexity (it's minimal complexity, but still non-zero).

The only arguments against using KVM's adaptive tuning is if a setup needs
a higher maximum, or if the adjustments are too reactive, but those are
arguments for letting userspace control the absolute max advancement and
the granularity of each adjustment, e.g. similar to how KVM provides knobs
for halt polling.

Link: https://lore.kernel.org/all/20240520115334.852510-1-zhoushuling@huawei.com
Cc: Shuling Zhou <zhoushuling@huawei.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20240522010304.1650603-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 39 +++++++++++++++++++++------------------
 arch/x86/kvm/lapic.h |  2 +-
 arch/x86/kvm/x86.c   | 11 +----------
 3 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ebf41023be38..acd7d48100a1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -59,7 +59,17 @@
 #define MAX_APIC_VECTOR			256
 #define APIC_VECTORS_PER_REG		32
 
-static bool lapic_timer_advance_dynamic __read_mostly;
+/*
+ * Enable local APIC timer advancement (tscdeadline mode only) with adaptive
+ * tuning.  When enabled, KVM programs the host timer event to fire early, i.e.
+ * before the deadline expires, to account for the delay between taking the
+ * VM-Exit (to inject the guest event) and the subsequent VM-Enter to resume
+ * the guest, i.e. so that the interrupt arrives in the guest with minimal
+ * latency relative to the deadline programmed by the guest.
+ */
+static bool lapic_timer_advance __read_mostly = true;
+module_param(lapic_timer_advance, bool, 0444);
+
 #define LAPIC_TIMER_ADVANCE_ADJUST_MIN	100	/* clock cycles */
 #define LAPIC_TIMER_ADVANCE_ADJUST_MAX	10000	/* clock cycles */
 #define LAPIC_TIMER_ADVANCE_NS_INIT	1000
@@ -1854,16 +1864,14 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
 	trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
 
-	if (lapic_timer_advance_dynamic) {
-		adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
-		/*
-		 * If the timer fired early, reread the TSC to account for the
-		 * overhead of the above adjustment to avoid waiting longer
-		 * than is necessary.
-		 */
-		if (guest_tsc < tsc_deadline)
-			guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
-	}
+	adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
+
+	/*
+	 * If the timer fired early, reread the TSC to account for the overhead
+	 * of the above adjustment to avoid waiting longer than is necessary.
+	 */
+	if (guest_tsc < tsc_deadline)
+		guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
 
 	if (guest_tsc < tsc_deadline)
 		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
@@ -2812,7 +2820,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 		return HRTIMER_NORESTART;
 }
 
-int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
+int kvm_create_lapic(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic;
 
@@ -2845,13 +2853,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
 		     HRTIMER_MODE_ABS_HARD);
 	apic->lapic_timer.timer.function = apic_timer_fn;
-	if (timer_advance_ns == -1) {
+	if (lapic_timer_advance)
 		apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
-		lapic_timer_advance_dynamic = true;
-	} else {
-		apic->lapic_timer.timer_advance_ns = timer_advance_ns;
-		lapic_timer_advance_dynamic = false;
-	}
 
 	/*
 	 * Stuff the APIC ENABLE bit in lieu of temporarily incrementing
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 0a0ea4b5dd8c..a69e706b9080 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -85,7 +85,7 @@ struct kvm_lapic {
 
 struct dest_map;
 
-int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns);
+int kvm_create_lapic(struct kvm_vcpu *vcpu);
 void kvm_free_lapic(struct kvm_vcpu *vcpu);
 
 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 082ac6d95a3a..8c9e4281d978 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -164,15 +164,6 @@ module_param(kvmclock_periodic_sync, bool, 0444);
 static u32 __read_mostly tsc_tolerance_ppm = 250;
 module_param(tsc_tolerance_ppm, uint, 0644);
 
-/*
- * lapic timer advance (tscdeadline mode only) in nanoseconds.  '-1' enables
- * adaptive tuning starting from default advancement of 1000ns.  '0' disables
- * advancement entirely.  Any other value is used as-is and disables adaptive
- * tuning, i.e. allows privileged userspace to set an exact advancement time.
- */
-static int __read_mostly lapic_timer_advance_ns = -1;
-module_param(lapic_timer_advance_ns, int, 0644);
-
 static bool __read_mostly vector_hashing = true;
 module_param(vector_hashing, bool, 0444);
 
@@ -12169,7 +12160,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	if (r < 0)
 		return r;
 
-	r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
+	r = kvm_create_lapic(vcpu);
 	if (r < 0)
 		goto fail_mmu_destroy;
 
-- 
cgit v1.2.3


From d6283b160a12010b2113cc64726a3c9eda13dc5f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 29 May 2024 10:38:04 -0300
Subject: tools headers uapi: Sync linux/stat.h with the kernel sources to pick
 STATX_SUBVOL

To pick the changes from:

  2a82bb02941fb53d ("statx: stx_subvol")

This silences this perf build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/lkml/ZlnK2Fmx_gahzwZI@x1
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/stat.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 2f2ee82d5517..67626d535316 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -126,8 +126,9 @@ struct statx {
 	__u64	stx_mnt_id;
 	__u32	stx_dio_mem_align;	/* Memory buffer alignment for direct I/O */
 	__u32	stx_dio_offset_align;	/* File offset alignment for direct I/O */
+	__u64	stx_subvol;	/* Subvolume identifier */
 	/* 0xa0 */
-	__u64	__spare3[12];	/* Spare space for future expansion */
+	__u64	__spare3[11];	/* Spare space for future expansion */
 	/* 0x100 */
 };
 
@@ -155,6 +156,7 @@ struct statx {
 #define STATX_MNT_ID		0x00001000U	/* Got stx_mnt_id */
 #define STATX_DIOALIGN		0x00002000U	/* Want/got direct I/O alignment info */
 #define STATX_MNT_ID_UNIQUE	0x00004000U	/* Want/got extended stx_mount_id */
+#define STATX_SUBVOL		0x00008000U	/* Want/got stx_subvol */
 
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
 
-- 
cgit v1.2.3


From ffbe335b8d471f79b259e950cb20999700670456 Mon Sep 17 00:00:00 2001
From: Matthias Stocker <mstocker@barracuda.com>
Date: Fri, 31 May 2024 12:37:11 +0200
Subject: vmxnet3: disable rx data ring on dma allocation failure

When vmxnet3_rq_create() fails to allocate memory for rq->data_ring.base,
the subsequent call to vmxnet3_rq_destroy_all_rxdataring does not reset
rq->data_ring.desc_size for the data ring that failed, which presumably
causes the hypervisor to reference it on packet reception.

To fix this bug, rq->data_ring.desc_size needs to be set to 0 to tell
the hypervisor to disable this feature.

[   95.436876] kernel BUG at net/core/skbuff.c:207!
[   95.439074] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
[   95.440411] CPU: 7 PID: 0 Comm: swapper/7 Not tainted 6.9.3-dirty #1
[   95.441558] Hardware name: VMware, Inc. VMware Virtual
Platform/440BX Desktop Reference Platform, BIOS 6.00 12/12/2018
[   95.443481] RIP: 0010:skb_panic+0x4d/0x4f
[   95.444404] Code: 4f 70 50 8b 87 c0 00 00 00 50 8b 87 bc 00 00 00 50
ff b7 d0 00 00 00 4c 8b 8f c8 00 00 00 48 c7 c7 68 e8 be 9f e8 63 58 f9
ff <0f> 0b 48 8b 14 24 48 c7 c1 d0 73 65 9f e8 a1 ff ff ff 48 8b 14 24
[   95.447684] RSP: 0018:ffffa13340274dd0 EFLAGS: 00010246
[   95.448762] RAX: 0000000000000089 RBX: ffff8fbbc72b02d0 RCX: 000000000000083f
[   95.450148] RDX: 0000000000000000 RSI: 00000000000000f6 RDI: 000000000000083f
[   95.451520] RBP: 000000000000002d R08: 0000000000000000 R09: ffffa13340274c60
[   95.452886] R10: ffffffffa04ed468 R11: 0000000000000002 R12: 0000000000000000
[   95.454293] R13: ffff8fbbdab3c2d0 R14: ffff8fbbdbd829e0 R15: ffff8fbbdbd809e0
[   95.455682] FS:  0000000000000000(0000) GS:ffff8fbeefd80000(0000) knlGS:0000000000000000
[   95.457178] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   95.458340] CR2: 00007fd0d1f650c8 CR3: 0000000115f28000 CR4: 00000000000406f0
[   95.459791] Call Trace:
[   95.460515]  <IRQ>
[   95.461180]  ? __die_body.cold+0x19/0x27
[   95.462150]  ? die+0x2e/0x50
[   95.462976]  ? do_trap+0xca/0x110
[   95.463973]  ? do_error_trap+0x6a/0x90
[   95.464966]  ? skb_panic+0x4d/0x4f
[   95.465901]  ? exc_invalid_op+0x50/0x70
[   95.466849]  ? skb_panic+0x4d/0x4f
[   95.467718]  ? asm_exc_invalid_op+0x1a/0x20
[   95.468758]  ? skb_panic+0x4d/0x4f
[   95.469655]  skb_put.cold+0x10/0x10
[   95.470573]  vmxnet3_rq_rx_complete+0x862/0x11e0 [vmxnet3]
[   95.471853]  vmxnet3_poll_rx_only+0x36/0xb0 [vmxnet3]
[   95.473185]  __napi_poll+0x2b/0x160
[   95.474145]  net_rx_action+0x2c6/0x3b0
[   95.475115]  handle_softirqs+0xe7/0x2a0
[   95.476122]  __irq_exit_rcu+0x97/0xb0
[   95.477109]  common_interrupt+0x85/0xa0
[   95.478102]  </IRQ>
[   95.478846]  <TASK>
[   95.479603]  asm_common_interrupt+0x26/0x40
[   95.480657] RIP: 0010:pv_native_safe_halt+0xf/0x20
[   95.481801] Code: 22 d7 e9 54 87 01 00 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa eb 07 0f 00 2d 93 ba 3b 00 fb f4 <e9> 2c 87 01 00 66 66 2e 0f 1f 84 00 00 00 00 00 90 90 90 90 90 90
[   95.485563] RSP: 0018:ffffa133400ffe58 EFLAGS: 00000246
[   95.486882] RAX: 0000000000004000 RBX: ffff8fbbc1d14064 RCX: 0000000000000000
[   95.488477] RDX: ffff8fbeefd80000 RSI: ffff8fbbc1d14000 RDI: 0000000000000001
[   95.490067] RBP: ffff8fbbc1d14064 R08: ffffffffa0652260 R09: 00000000000010d3
[   95.491683] R10: 0000000000000018 R11: ffff8fbeefdb4764 R12: ffffffffa0652260
[   95.493389] R13: ffffffffa06522e0 R14: 0000000000000001 R15: 0000000000000000
[   95.495035]  acpi_safe_halt+0x14/0x20
[   95.496127]  acpi_idle_do_entry+0x2f/0x50
[   95.497221]  acpi_idle_enter+0x7f/0xd0
[   95.498272]  cpuidle_enter_state+0x81/0x420
[   95.499375]  cpuidle_enter+0x2d/0x40
[   95.500400]  do_idle+0x1e5/0x240
[   95.501385]  cpu_startup_entry+0x29/0x30
[   95.502422]  start_secondary+0x11c/0x140
[   95.503454]  common_startup_64+0x13e/0x141
[   95.504466]  </TASK>
[   95.505197] Modules linked in: nft_fib_inet nft_fib_ipv4
nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6
nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6
nf_defrag_ipv4 rfkill ip_set nf_tables vsock_loopback
vmw_vsock_virtio_transport_common qrtr vmw_vsock_vmci_transport vsock
sunrpc binfmt_misc pktcdvd vmw_balloon pcspkr vmw_vmci i2c_piix4 joydev
loop dm_multipath nfnetlink zram crct10dif_pclmul crc32_pclmul vmwgfx
crc32c_intel polyval_clmulni polyval_generic ghash_clmulni_intel
sha512_ssse3 sha256_ssse3 vmxnet3 sha1_ssse3 drm_ttm_helper vmw_pvscsi
ttm ata_generic pata_acpi serio_raw scsi_dh_rdac scsi_dh_emc
scsi_dh_alua ip6_tables ip_tables fuse
[   95.516536] ---[ end trace 0000000000000000 ]---

Fixes: 6f4833383e85 ("net: vmxnet3: Fix NULL pointer dereference in vmxnet3_rq_rx_complete()")
Signed-off-by: Matthias Stocker <mstocker@barracuda.com>
Reviewed-by: Subbaraya Sundeep <sbhatta@marvell.com>
Reviewed-by: Ronak Doshi <ronak.doshi@broadcom.com>
Link: https://lore.kernel.org/r/20240531103711.101961-1-mstocker@barracuda.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/vmxnet3/vmxnet3_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 89ca6e75fcc6..63822d454c00 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2034,8 +2034,8 @@ vmxnet3_rq_destroy_all_rxdataring(struct vmxnet3_adapter *adapter)
 					  rq->data_ring.base,
 					  rq->data_ring.basePA);
 			rq->data_ring.base = NULL;
-			rq->data_ring.desc_size = 0;
 		}
+		rq->data_ring.desc_size = 0;
 	}
 }
 
-- 
cgit v1.2.3


From 2fe40483ec257de2a0d819ef88e3e76c7e261319 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 31 May 2024 13:26:32 +0000
Subject: ipv6: ioam: block BH from ioam6_output()

As explained in commit 1378817486d6 ("tipc: block BH
before using dst_cache"), net/core/dst_cache.c
helpers need to be called with BH disabled.

Disabling preemption in ioam6_output() is not good enough,
because ioam6_output() is called from process context,
lwtunnel_output() only uses rcu_read_lock().

We might be interrupted by a softirq, re-enter ioam6_output()
and corrupt dst_cache data structures.

Fix the race by using local_bh_disable() instead of
preempt_disable().

Fixes: 8cb3bf8bff3c ("ipv6: ioam: Add support for the ip6ip6 encapsulation")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Justin Iurman <justin.iurman@uliege.be>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Link: https://lore.kernel.org/r/20240531132636.2637995-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/ioam6_iptunnel.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index 7563f8c6aa87..bf7120ecea1e 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -351,9 +351,9 @@ do_encap:
 		goto drop;
 
 	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
-		preempt_disable();
+		local_bh_disable();
 		dst = dst_cache_get(&ilwt->cache);
-		preempt_enable();
+		local_bh_enable();
 
 		if (unlikely(!dst)) {
 			struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -373,9 +373,9 @@ do_encap:
 				goto drop;
 			}
 
-			preempt_disable();
+			local_bh_disable();
 			dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
-			preempt_enable();
+			local_bh_enable();
 		}
 
 		skb_dst_drop(skb);
-- 
cgit v1.2.3


From db0090c6eb12c31246438b7fe2a8f1b833e7a653 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 31 May 2024 13:26:33 +0000
Subject: net: ipv6: rpl_iptunnel: block BH in rpl_output() and rpl_input()

As explained in commit 1378817486d6 ("tipc: block BH
before using dst_cache"), net/core/dst_cache.c
helpers need to be called with BH disabled.

Disabling preemption in rpl_output() is not good enough,
because rpl_output() is called from process context,
lwtunnel_output() only uses rcu_read_lock().

We might be interrupted by a softirq, re-enter rpl_output()
and corrupt dst_cache data structures.

Fix the race by using local_bh_disable() instead of
preempt_disable().

Apply a similar change in rpl_input().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Aring <aahringo@redhat.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Link: https://lore.kernel.org/r/20240531132636.2637995-3-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/rpl_iptunnel.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index a013b92cbb86..2c83b7586422 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -212,9 +212,9 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	if (unlikely(err))
 		goto drop;
 
-	preempt_disable();
+	local_bh_disable();
 	dst = dst_cache_get(&rlwt->cache);
-	preempt_enable();
+	local_bh_enable();
 
 	if (unlikely(!dst)) {
 		struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -234,9 +234,9 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 			goto drop;
 		}
 
-		preempt_disable();
+		local_bh_disable();
 		dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr);
-		preempt_enable();
+		local_bh_enable();
 	}
 
 	skb_dst_drop(skb);
@@ -268,23 +268,21 @@ static int rpl_input(struct sk_buff *skb)
 		return err;
 	}
 
-	preempt_disable();
+	local_bh_disable();
 	dst = dst_cache_get(&rlwt->cache);
-	preempt_enable();
 
 	if (!dst) {
 		ip6_route_input(skb);
 		dst = skb_dst(skb);
 		if (!dst->error) {
-			preempt_disable();
 			dst_cache_set_ip6(&rlwt->cache, dst,
 					  &ipv6_hdr(skb)->saddr);
-			preempt_enable();
 		}
 	} else {
 		skb_dst_drop(skb);
 		skb_dst_set(skb, dst);
 	}
+	local_bh_enable();
 
 	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 	if (unlikely(err))
-- 
cgit v1.2.3


From c0b98ac1cc104f48763cdb27b1e9ac25fd81fc90 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 31 May 2024 13:26:34 +0000
Subject: ipv6: sr: block BH in seg6_output_core() and seg6_input_core()

As explained in commit 1378817486d6 ("tipc: block BH
before using dst_cache"), net/core/dst_cache.c
helpers need to be called with BH disabled.

Disabling preemption in seg6_output_core() is not good enough,
because seg6_output_core() is called from process context,
lwtunnel_output() only uses rcu_read_lock().

We might be interrupted by a softirq, re-enter seg6_output_core()
and corrupt dst_cache data structures.

Fix the race by using local_bh_disable() instead of
preempt_disable().

Apply a similar change in seg6_input_core().

Fixes: fa79581ea66c ("ipv6: sr: fix several BUGs when preemption is enabled")
Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: David Lebrun <dlebrun@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Link: https://lore.kernel.org/r/20240531132636.2637995-4-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/seg6_iptunnel.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index a75df2ec8db0..098632adc9b5 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -464,23 +464,21 @@ static int seg6_input_core(struct net *net, struct sock *sk,
 
 	slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 
-	preempt_disable();
+	local_bh_disable();
 	dst = dst_cache_get(&slwt->cache);
-	preempt_enable();
 
 	if (!dst) {
 		ip6_route_input(skb);
 		dst = skb_dst(skb);
 		if (!dst->error) {
-			preempt_disable();
 			dst_cache_set_ip6(&slwt->cache, dst,
 					  &ipv6_hdr(skb)->saddr);
-			preempt_enable();
 		}
 	} else {
 		skb_dst_drop(skb);
 		skb_dst_set(skb, dst);
 	}
+	local_bh_enable();
 
 	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
 	if (unlikely(err))
@@ -536,9 +534,9 @@ static int seg6_output_core(struct net *net, struct sock *sk,
 
 	slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
 
-	preempt_disable();
+	local_bh_disable();
 	dst = dst_cache_get(&slwt->cache);
-	preempt_enable();
+	local_bh_enable();
 
 	if (unlikely(!dst)) {
 		struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -558,9 +556,9 @@ static int seg6_output_core(struct net *net, struct sock *sk,
 			goto drop;
 		}
 
-		preempt_disable();
+		local_bh_disable();
 		dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
-		preempt_enable();
+		local_bh_enable();
 	}
 
 	skb_dst_drop(skb);
-- 
cgit v1.2.3


From cf28ff8e4c02e1ffa850755288ac954b6ff0db8c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 31 May 2024 13:26:35 +0000
Subject: ila: block BH in ila_output()

As explained in commit 1378817486d6 ("tipc: block BH
before using dst_cache"), net/core/dst_cache.c
helpers need to be called with BH disabled.

ila_output() is called from lwtunnel_output()
possibly from process context, and under rcu_read_lock().

We might be interrupted by a softirq, re-enter ila_output()
and corrupt dst_cache data structures.

Fix the race by using local_bh_disable().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Link: https://lore.kernel.org/r/20240531132636.2637995-5-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/ila/ila_lwt.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index 0601bad79822..ff7e734e335b 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -58,7 +58,9 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		return orig_dst->lwtstate->orig_output(net, sk, skb);
 	}
 
+	local_bh_disable();
 	dst = dst_cache_get(&ilwt->dst_cache);
+	local_bh_enable();
 	if (unlikely(!dst)) {
 		struct ipv6hdr *ip6h = ipv6_hdr(skb);
 		struct flowi6 fl6;
@@ -86,8 +88,11 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 			goto drop;
 		}
 
-		if (ilwt->connected)
+		if (ilwt->connected) {
+			local_bh_disable();
 			dst_cache_set_ip6(&ilwt->dst_cache, dst, &fl6.saddr);
+			local_bh_enable();
+		}
 	}
 
 	skb_dst_set(skb, dst);
-- 
cgit v1.2.3


From 2fe6fb36c781b50482b1c3323fb526bc07d1af59 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 31 May 2024 13:26:36 +0000
Subject: net: dst_cache: add two DEBUG_NET warnings

After fixing four different bugs involving dst_cache
users, it might be worth adding a check about BH being
blocked by dst_cache callers.

DEBUG_NET_WARN_ON_ONCE(!in_softirq());

It is not fatal, if we missed valid case where no
BH deadlock is to be feared, we might change this.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Link: https://lore.kernel.org/r/20240531132636.2637995-6-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/dst_cache.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 6a0482e676d3..70c634b9e7b0 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -27,6 +27,7 @@ struct dst_cache_pcpu {
 static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
 				      struct dst_entry *dst, u32 cookie)
 {
+	DEBUG_NET_WARN_ON_ONCE(!in_softirq());
 	dst_release(dst_cache->dst);
 	if (dst)
 		dst_hold(dst);
@@ -40,6 +41,7 @@ static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
 {
 	struct dst_entry *dst;
 
+	DEBUG_NET_WARN_ON_ONCE(!in_softirq());
 	dst = idst->dst;
 	if (!dst)
 		goto fail;
-- 
cgit v1.2.3


From c6cab01d7e20a028ffcee1e0a0b782332a16b5e6 Mon Sep 17 00:00:00 2001
From: Jeff Johnson <quic_jjohnson@quicinc.com>
Date: Fri, 31 May 2024 18:35:43 -0700
Subject: lib/test_rhashtable: add missing MODULE_DESCRIPTION() macro

make allmodconfig && make W=1 C=1 reports:
WARNING: modpost: missing MODULE_DESCRIPTION() in lib/test_rhashtable.o

Add the missing invocation of the MODULE_DESCRIPTION() macro.

Signed-off-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Link: https://lore.kernel.org/r/20240531-md-lib-test_rhashtable-v1-1-cd6d4138f1b6@quicinc.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 lib/test_rhashtable.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 42b585208249..c63db03ebb9d 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -811,4 +811,5 @@ static void __exit test_rht_exit(void)
 module_init(test_rht_init);
 module_exit(test_rht_exit);
 
+MODULE_DESCRIPTION("Resizable, Scalable, Concurrent Hash Table test module");
 MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 4aa2dcfbad538adf7becd0034a3754e1bd01b2b5 Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Date: Fri, 17 May 2024 07:19:14 -0700
Subject: HID: core: remove unnecessary WARN_ON() in implement()

Syzkaller hit a warning [1] in a call to implement() when trying
to write a value into a field of smaller size in an output report.

Since implement() already has a warn message printed out with the
help of hid_warn() and value in question gets trimmed with:
	...
	value &= m;
	...
WARN_ON may be considered superfluous. Remove it to suppress future
syzkaller triggers.

[1]
WARNING: CPU: 0 PID: 5084 at drivers/hid/hid-core.c:1451 implement drivers/hid/hid-core.c:1451 [inline]
WARNING: CPU: 0 PID: 5084 at drivers/hid/hid-core.c:1451 hid_output_report+0x548/0x760 drivers/hid/hid-core.c:1863
Modules linked in:
CPU: 0 PID: 5084 Comm: syz-executor424 Not tainted 6.9.0-rc7-syzkaller-00183-gcf87f46fd34d #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/02/2024
RIP: 0010:implement drivers/hid/hid-core.c:1451 [inline]
RIP: 0010:hid_output_report+0x548/0x760 drivers/hid/hid-core.c:1863
...
Call Trace:
 <TASK>
 __usbhid_submit_report drivers/hid/usbhid/hid-core.c:591 [inline]
 usbhid_submit_report+0x43d/0x9e0 drivers/hid/usbhid/hid-core.c:636
 hiddev_ioctl+0x138b/0x1f00 drivers/hid/usbhid/hiddev.c:726
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:904 [inline]
 __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:890
 do_syscall_x64 arch/x86/entry/common.c:52 [inline]
 do_syscall_64+0xf5/0x240 arch/x86/entry/common.c:83
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
...

Fixes: 95d1c8951e5b ("HID: simplify implement() a bit")
Reported-by: <syzbot+5186630949e3c55f0799@syzkaller.appspotmail.com>
Suggested-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 drivers/hid/hid-core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index b1fa0378e8f4..74efda212c55 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1448,7 +1448,6 @@ static void implement(const struct hid_device *hid, u8 *report,
 			hid_warn(hid,
 				 "%s() called with too large value %d (n: %d)! (%s)\n",
 				 __func__, value, n, current->comm);
-			WARN_ON(1);
 			value &= m;
 		}
 	}
-- 
cgit v1.2.3


From 3ec8ebd8a5b782d56347ae884de880af26f93996 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Mon, 27 May 2024 16:22:34 +0300
Subject: EDAC/amd64: Convert PCIBIOS_* return codes to errnos
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gpu_get_node_map() uses pci_read_config_dword() that returns PCIBIOS_*
codes. The return code is then returned all the way into the module
init function amd64_edac_init() that returns it as is. The module init
functions, however, should return normal errnos.

Convert PCIBIOS_* returns code using pcibios_err_to_errno() into normal
errno before returning it from gpu_get_node_map().

For consistency, convert also the other similar cases which return
PCIBIOS_* codes even if they do not have any bugs at the moment.

Fixes: 4251566ebc1c ("EDAC/amd64: Cache and use GPU node map")
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20240527132236.13875-1-ilpo.jarvinen@linux.intel.com
---
 drivers/edac/amd64_edac.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 1f3520d76861..a17f3c0cdfa6 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -81,7 +81,7 @@ int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
 		amd64_warn("%s: error reading F%dx%03x.\n",
 			   func, PCI_FUNC(pdev->devfn), offset);
 
-	return err;
+	return pcibios_err_to_errno(err);
 }
 
 int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
@@ -94,7 +94,7 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
 		amd64_warn("%s: error writing to F%dx%03x.\n",
 			   func, PCI_FUNC(pdev->devfn), offset);
 
-	return err;
+	return pcibios_err_to_errno(err);
 }
 
 /*
@@ -1025,8 +1025,10 @@ static int gpu_get_node_map(struct amd64_pvt *pvt)
 	}
 
 	ret = pci_read_config_dword(pdev, REG_LOCAL_NODE_TYPE_MAP, &tmp);
-	if (ret)
+	if (ret) {
+		ret = pcibios_err_to_errno(ret);
 		goto out;
+	}
 
 	gpu_node_map.node_count = FIELD_GET(LNTM_NODE_COUNT, tmp);
 	gpu_node_map.base_node_id = FIELD_GET(LNTM_BASE_NODE_ID, tmp);
-- 
cgit v1.2.3


From f8367a74aebf88dc8b58a0db6a6c90b4cb8fc9d3 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Date: Mon, 27 May 2024 16:22:35 +0300
Subject: EDAC/igen6: Convert PCIBIOS_* return codes to errnos
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

errcmd_enable_error_reporting() uses pci_{read,write}_config_word()
that return PCIBIOS_* codes. The return code is then returned all the
way into the probe function igen6_probe() that returns it as is. The
probe functions, however, should return normal errnos.

Convert PCIBIOS_* returns code using pcibios_err_to_errno() into normal
errno before returning it from errcmd_enable_error_reporting().

Fixes: 10590a9d4f23 ("EDAC/igen6: Add EDAC driver for Intel client SoCs using IBECC")
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20240527132236.13875-2-ilpo.jarvinen@linux.intel.com
---
 drivers/edac/igen6_edac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/igen6_edac.c b/drivers/edac/igen6_edac.c
index cdd8480e7368..dbe9fe5f2ca6 100644
--- a/drivers/edac/igen6_edac.c
+++ b/drivers/edac/igen6_edac.c
@@ -800,7 +800,7 @@ static int errcmd_enable_error_reporting(bool enable)
 
 	rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd);
 	if (rc)
-		return rc;
+		return pcibios_err_to_errno(rc);
 
 	if (enable)
 		errcmd |= ERRCMD_CE | ERRSTS_UE;
@@ -809,7 +809,7 @@ static int errcmd_enable_error_reporting(bool enable)
 
 	rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd);
 	if (rc)
-		return rc;
+		return pcibios_err_to_errno(rc);
 
 	return 0;
 }
-- 
cgit v1.2.3


From a535d59432370343058755100ee75ab03c0e3f91 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 30 May 2024 16:26:07 -0700
Subject: net: tls: fix marking packets as decrypted

For TLS offload we mark packets with skb->decrypted to make sure
they don't escape the host without getting encrypted first.
The crypto state lives in the socket, so it may get detached
by a call to skb_orphan(). As a safety check - the egress path
drops all packets with skb->decrypted and no "crypto-safe" socket.

The skb marking was added to sendpage only (and not sendmsg),
because tls_device injected data into the TCP stack using sendpage.
This special case was missed when sendpage got folded into sendmsg.

Fixes: c5c37af6ecad ("tcp: Convert do_tcp_sendpages() to use MSG_SPLICE_PAGES")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20240530232607.82686-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/ipv4/tcp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 681b54e1f3a6..4d8cc2ebb64c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1165,6 +1165,9 @@ new_segment:
 
 			process_backlog++;
 
+#ifdef CONFIG_SKB_DECRYPTED
+			skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
+#endif
 			tcp_skb_entail(sk, skb);
 			copy = size_goal;
 
-- 
cgit v1.2.3


From a295ec52c8624883885396fde7b4df1a179627c3 Mon Sep 17 00:00:00 2001
From: "Kun(llfl)" <llfl@linux.alibaba.com>
Date: Thu, 9 May 2024 08:42:20 +0800
Subject: iommu/amd: Fix sysfs leak in iommu init

During the iommu initialization, iommu_init_pci() adds sysfs nodes.
However, these nodes aren't remove in free_iommu_resources() subsequently.

Fixes: 39ab9555c241 ("iommu: Add sysfs bindings for struct iommu_device")
Signed-off-by: Kun(llfl) <llfl@linux.alibaba.com>
Reviewed-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Link: https://lore.kernel.org/r/c8e0d11c6ab1ee48299c288009cf9c5dae07b42d.1715215003.git.llfl@linux.alibaba.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd/init.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index a18e74878f68..27e293727095 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1626,8 +1626,17 @@ static void __init free_pci_segments(void)
 	}
 }
 
+static void __init free_sysfs(struct amd_iommu *iommu)
+{
+	if (iommu->iommu.dev) {
+		iommu_device_unregister(&iommu->iommu);
+		iommu_device_sysfs_remove(&iommu->iommu);
+	}
+}
+
 static void __init free_iommu_one(struct amd_iommu *iommu)
 {
+	free_sysfs(iommu);
 	free_cwwb_sem(iommu);
 	free_command_buffer(iommu);
 	free_event_buffer(iommu);
-- 
cgit v1.2.3


From cc8d89d0637990c66440a226f443d95340979a04 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 20 May 2024 20:14:44 +0100
Subject: iommu/dma: Fix domain init

Despite carefully rewording the kerneldoc to describe the new direct
interaction with dma_range_map, it seems I managed to confuse myself in
removing the redundant force_aperture check and ended up making the code
not do that at all. This led to dma_range_maps inadvertently being able
to set iovad->start_pfn = 0, and all the nonsensical chaos which ensues
from there. Restore the correct behaviour of constraining base_pfn to
the domain aperture regardless of dma_range_map, and not trying to apply
dma_range_map constraints to the basic IOVA domain since they will be
properly handled with reserved regions later.

Reported-by: Jon Hunter <jonathanh@nvidia.com>
Reported-by: Jerry Snitselaar <jsnitsel@redhat.com>
Fixes: ad4750b07d34 ("iommu/dma: Make limit checks self-contained")
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Link: https://lore.kernel.org/r/721fa6baebb0924aa40db0b8fb86bcb4538434af.1716232484.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f731e4b2a417..43520e7275cc 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -686,15 +686,15 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev
 
 	/* Check the domain allows at least some access to the device... */
 	if (map) {
-		dma_addr_t base = dma_range_map_min(map);
-		if (base > domain->geometry.aperture_end ||
+		if (dma_range_map_min(map) > domain->geometry.aperture_end ||
 		    dma_range_map_max(map) < domain->geometry.aperture_start) {
 			pr_warn("specified DMA range outside IOMMU capability\n");
 			return -EFAULT;
 		}
-		/* ...then finally give it a kicking to make sure it fits */
-		base_pfn = max(base, domain->geometry.aperture_start) >> order;
 	}
+	/* ...then finally give it a kicking to make sure it fits */
+	base_pfn = max_t(unsigned long, base_pfn,
+			 domain->geometry.aperture_start >> order);
 
 	/* start_pfn is always nonzero for an already-initialised domain */
 	mutex_lock(&cookie->mutex);
-- 
cgit v1.2.3


From 89e8a2366e3bce584b6c01549d5019c5cda1205e Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 28 May 2024 12:25:28 +0800
Subject: iommu: Return right value in iommu_sva_bind_device()

iommu_sva_bind_device() should return either a sva bond handle or an
ERR_PTR value in error cases. Existing drivers (idxd and uacce) only
check the return value with IS_ERR(). This could potentially lead to
a kernel NULL pointer dereference issue if the function returns NULL
instead of an error pointer.

In reality, this doesn't cause any problems because iommu_sva_bind_device()
only returns NULL when the kernel is not configured with CONFIG_IOMMU_SVA.
In this case, iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) will
return an error, and the device drivers won't call iommu_sva_bind_device()
at all.

Fixes: 26b25a2b98e4 ("iommu: Bind process address spaces to devices")
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Link: https://lore.kernel.org/r/20240528042528.71396-1-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7bc8dff7cf6d..17b3f36ad843 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1533,7 +1533,7 @@ struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
 static inline struct iommu_sva *
 iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
 {
-	return NULL;
+	return ERR_PTR(-ENODEV);
 }
 
 static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
-- 
cgit v1.2.3


From 998a0a362b0b4cf501161c3319e6994d37c45a8c Mon Sep 17 00:00:00 2001
From: Vasant Hegde <vasant.hegde@amd.com>
Date: Wed, 29 May 2024 11:39:00 +0000
Subject: iommu/amd: Fix workqueue name

Workqueue name length is crossing WQ_NAME_LEN limit. Fix it by changing
name format. New format : "iopf_queue/amdvi-<iommu-devid>"

kernel warning:
  [   11.146912] workqueue: name exceeds WQ_NAME_LEN. Truncating to: iopf_queue/amdiommu-0xc002-iopf

Reported-by: Borislav Petkov <bp@alien8.de>
Fixes: 61928bab9d26 ("iommu/amd: Define per-IOMMU iopf_queue")
Signed-off-by: Vasant Hegde <vasant.hegde@amd.com>
Acked-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20240529113900.5798-1-vasant.hegde@amd.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd/ppr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c
index 091423bb8aac..31d98a2a11a6 100644
--- a/drivers/iommu/amd/ppr.c
+++ b/drivers/iommu/amd/ppr.c
@@ -222,8 +222,7 @@ int amd_iommu_iopf_init(struct amd_iommu *iommu)
 	if (iommu->iopf_queue)
 		return ret;
 
-	snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
-		 "amdiommu-%#x-iopfq",
+	snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), "amdvi-%#x",
 		 PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, iommu->devid));
 
 	iommu->iopf_queue = iopf_queue_alloc(iommu->iopfq_name);
-- 
cgit v1.2.3


From 48dc345a23b984c457d1c5878168d026c500618f Mon Sep 17 00:00:00 2001
From: Vasant Hegde <vasant.hegde@amd.com>
Date: Thu, 30 May 2024 07:11:18 +0000
Subject: iommu/amd: Check EFR[EPHSup] bit before enabling PPR

Check for EFR[EPHSup] bit before enabling PPR. This bit must be set
to enable PPR.

Reported-by: Borislav Petkov <bp@alien8.de>
Fixes: c4cb23111103 ("iommu/amd: Add support for enable/disable IOPF")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218900
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Jean-Christophe Guillain <jean-christophe@guillain.net>
Signed-off-by: Vasant Hegde <vasant.hegde@amd.com>
Reviewed-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Link: https://lore.kernel.org/r/20240530071118.10297-1-vasant.hegde@amd.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd/amd_iommu.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 2fde1302a584..2d5945c982bd 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -129,7 +129,8 @@ static inline int check_feature_gpt_level(void)
 static inline bool amd_iommu_gt_ppr_supported(void)
 {
 	return (check_feature(FEATURE_GT) &&
-		check_feature(FEATURE_PPR));
+		check_feature(FEATURE_PPR) &&
+		check_feature(FEATURE_EPHSUP));
 }
 
 static inline u64 iommu_virt_to_phys(void *vaddr)
-- 
cgit v1.2.3


From 526606b0a1998b0791b42c199d53550c3ba724b5 Mon Sep 17 00:00:00 2001
From: Vasant Hegde <vasant.hegde@amd.com>
Date: Thu, 30 May 2024 08:48:01 +0000
Subject: iommu/amd: Fix Invalid wait context issue

With commit c4cb23111103 ("iommu/amd: Add support for enable/disable IOPF")
we are hitting below issue. This happens because in IOPF enablement path
it holds spin lock with irq disable and then tries to take mutex lock.

dmesg:
-----
[    0.938739] =============================
[    0.938740] [ BUG: Invalid wait context ]
[    0.938742] 6.10.0-rc1+ #1 Not tainted
[    0.938745] -----------------------------
[    0.938746] swapper/0/1 is trying to lock:
[    0.938748] ffffffff8c9f01d8 (&port_lock_key){....}-{3:3}, at: serial8250_console_write+0x78/0x4a0
[    0.938767] other info that might help us debug this:
[    0.938768] context-{5:5}
[    0.938769] 7 locks held by swapper/0/1:
[    0.938772]  #0: ffff888101a91310 (&group->mutex){+.+.}-{4:4}, at: bus_iommu_probe+0x70/0x160
[    0.938790]  #1: ffff888101d1f1b8 (&domain->lock){....}-{3:3}, at: amd_iommu_attach_device+0xa5/0x700
[    0.938799]  #2: ffff888101cc3d18 (&dev_data->lock){....}-{3:3}, at: amd_iommu_attach_device+0xc5/0x700
[    0.938806]  #3: ffff888100052830 (&iommu->lock){....}-{2:2}, at: amd_iommu_iopf_add_device+0x3f/0xa0
[    0.938813]  #4: ffffffff8945a340 (console_lock){+.+.}-{0:0}, at: _printk+0x48/0x50
[    0.938822]  #5: ffffffff8945a390 (console_srcu){....}-{0:0}, at: console_flush_all+0x58/0x4e0
[    0.938867]  #6: ffffffff82459f80 (console_owner){....}-{0:0}, at: console_flush_all+0x1f0/0x4e0
[    0.938872] stack backtrace:
[    0.938874] CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.10.0-rc1+ #1
[    0.938877] Hardware name: HP HP EliteBook 745 G3/807E, BIOS N73 Ver. 01.39 04/16/2019

Fix above issue by re-arranging code in attach device path:
  - move device PASID/IOPF enablement outside lock in AMD IOMMU driver.
    This is safe as core layer holds group->mutex lock before calling
    iommu_ops->attach_dev.

Reported-by: Borislav Petkov <bp@alien8.de>
Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Reported-by: Chris Bainbridge <chris.bainbridge@gmail.com>
Fixes: c4cb23111103 ("iommu/amd: Add support for enable/disable IOPF")
Tested-by: Borislav Petkov <bp@alien8.de>
Tested-by: Chris Bainbridge <chris.bainbridge@gmail.com>
Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Signed-off-by: Vasant Hegde <vasant.hegde@amd.com>
Link: https://lore.kernel.org/r/20240530084801.10758-1-vasant.hegde@amd.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd/iommu.c | 48 +++++++++++++++++++++++------------------------
 drivers/iommu/amd/ppr.c   | 22 ++++------------------
 2 files changed, 28 insertions(+), 42 deletions(-)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 52d83730a22a..c2703599bb16 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2032,7 +2032,6 @@ static int do_attach(struct iommu_dev_data *dev_data,
 		     struct protection_domain *domain)
 {
 	struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
-	struct pci_dev *pdev;
 	int ret = 0;
 
 	/* Update data structures */
@@ -2047,30 +2046,13 @@ static int do_attach(struct iommu_dev_data *dev_data,
 	domain->dev_iommu[iommu->index] += 1;
 	domain->dev_cnt                 += 1;
 
-	pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL;
+	/* Setup GCR3 table */
 	if (pdom_is_sva_capable(domain)) {
 		ret = init_gcr3_table(dev_data, domain);
 		if (ret)
 			return ret;
-
-		if (pdev) {
-			pdev_enable_caps(pdev);
-
-			/*
-			 * Device can continue to function even if IOPF
-			 * enablement failed. Hence in error path just
-			 * disable device PRI support.
-			 */
-			if (amd_iommu_iopf_add_device(iommu, dev_data))
-				pdev_disable_cap_pri(pdev);
-		}
-	} else if (pdev) {
-		pdev_enable_cap_ats(pdev);
 	}
 
-	/* Update device table */
-	amd_iommu_dev_update_dte(dev_data, true);
-
 	return ret;
 }
 
@@ -2163,6 +2145,11 @@ static void detach_device(struct device *dev)
 
 	do_detach(dev_data);
 
+out:
+	spin_unlock(&dev_data->lock);
+
+	spin_unlock_irqrestore(&domain->lock, flags);
+
 	/* Remove IOPF handler */
 	if (ppr)
 		amd_iommu_iopf_remove_device(iommu, dev_data);
@@ -2170,10 +2157,6 @@ static void detach_device(struct device *dev)
 	if (dev_is_pci(dev))
 		pdev_disable_caps(to_pci_dev(dev));
 
-out:
-	spin_unlock(&dev_data->lock);
-
-	spin_unlock_irqrestore(&domain->lock, flags);
 }
 
 static struct iommu_device *amd_iommu_probe_device(struct device *dev)
@@ -2485,6 +2468,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 	struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
 	struct protection_domain *domain = to_pdomain(dom);
 	struct amd_iommu *iommu = get_amd_iommu_from_dev(dev);
+	struct pci_dev *pdev;
 	int ret;
 
 	/*
@@ -2517,7 +2501,23 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 	}
 #endif
 
-	iommu_completion_wait(iommu);
+	pdev = dev_is_pci(dev_data->dev) ? to_pci_dev(dev_data->dev) : NULL;
+	if (pdev && pdom_is_sva_capable(domain)) {
+		pdev_enable_caps(pdev);
+
+		/*
+		 * Device can continue to function even if IOPF
+		 * enablement failed. Hence in error path just
+		 * disable device PRI support.
+		 */
+		if (amd_iommu_iopf_add_device(iommu, dev_data))
+			pdev_disable_cap_pri(pdev);
+	} else if (pdev) {
+		pdev_enable_cap_ats(pdev);
+	}
+
+	/* Update device table */
+	amd_iommu_dev_update_dte(dev_data, true);
 
 	return ret;
 }
diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c
index 31d98a2a11a6..7c67d69f0b8c 100644
--- a/drivers/iommu/amd/ppr.c
+++ b/drivers/iommu/amd/ppr.c
@@ -248,40 +248,26 @@ void amd_iommu_page_response(struct device *dev, struct iopf_fault *evt,
 int amd_iommu_iopf_add_device(struct amd_iommu *iommu,
 			      struct iommu_dev_data *dev_data)
 {
-	unsigned long flags;
 	int ret = 0;
 
 	if (!dev_data->pri_enabled)
 		return ret;
 
-	raw_spin_lock_irqsave(&iommu->lock, flags);
-
-	if (!iommu->iopf_queue) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
+	if (!iommu->iopf_queue)
+		return -EINVAL;
 
 	ret = iopf_queue_add_device(iommu->iopf_queue, dev_data->dev);
 	if (ret)
-		goto out_unlock;
+		return ret;
 
 	dev_data->ppr = true;
-
-out_unlock:
-	raw_spin_unlock_irqrestore(&iommu->lock, flags);
-	return ret;
+	return 0;
 }
 
 /* Its assumed that caller has verified that device was added to iopf queue */
 void amd_iommu_iopf_remove_device(struct amd_iommu *iommu,
 				  struct iommu_dev_data *dev_data)
 {
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&iommu->lock, flags);
-
 	iopf_queue_remove_device(iommu->iopf_queue, dev_data->dev);
 	dev_data->ppr = false;
-
-	raw_spin_unlock_irqrestore(&iommu->lock, flags);
 }
-- 
cgit v1.2.3


From 415ce0ea55c5a3afea501a773e002be9ed7149f5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 3 Jun 2024 13:56:53 -0600
Subject: io_uring/napi: fix timeout calculation

Not quite sure what __io_napi_adjust_timeout() was attemping to do, it's
adjusting both the NAPI timeout and the general overall timeout, and
calculating a value that is never used. The overall timeout is a super
set of the NAPI timeout, and doesn't need adjusting. The only thing we
really need to care about is that the NAPI timeout doesn't exceed the
overall timeout. If a user asked for a timeout of eg 5 usec and NAPI
timeout is 10 usec, then we should not spin for 10 usec.

While in there, sanitize the time checking a bit. If we have a negative
value in the passed in timeout, discard it. Round up the value as well,
so we don't end up with a NAPI timeout for the majority of the wait,
with only a tiny sleep value at the end.

Hence the only case we need to care about is if the NAPI timeout is
larger than the overall timeout. If it is, cap the NAPI timeout at what
the overall timeout is.

Cc: stable@vger.kernel.org
Fixes: 8d0c12a80cde ("io-uring: add napi busy poll support")
Reported-by: Lewis Baker <lewissbaker@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/napi.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/io_uring/napi.c b/io_uring/napi.c
index 883a1a665907..8c18ede595c4 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -261,12 +261,14 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
 }
 
 /*
- * __io_napi_adjust_timeout() - Add napi id to the busy poll list
+ * __io_napi_adjust_timeout() - adjust busy loop timeout
  * @ctx: pointer to io-uring context structure
  * @iowq: pointer to io wait queue
  * @ts: pointer to timespec or NULL
  *
  * Adjust the busy loop timeout according to timespec and busy poll timeout.
+ * If the specified NAPI timeout is bigger than the wait timeout, then adjust
+ * the NAPI timeout accordingly.
  */
 void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
 			      struct timespec64 *ts)
@@ -274,16 +276,16 @@ void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iow
 	unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to);
 
 	if (ts) {
-		struct timespec64 poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
-
-		if (timespec64_compare(ts, &poll_to_ts) > 0) {
-			*ts = timespec64_sub(*ts, poll_to_ts);
-		} else {
-			u64 to = timespec64_to_ns(ts);
-
-			do_div(to, 1000);
-			ts->tv_sec = 0;
-			ts->tv_nsec = 0;
+		struct timespec64 poll_to_ts;
+
+		poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
+		if (timespec64_compare(ts, &poll_to_ts) < 0) {
+			s64 poll_to_ns = timespec64_to_ns(ts);
+			if (poll_to_ns > 0) {
+				u64 val = poll_to_ns + 999;
+				do_div(val, (s64) 1000);
+				poll_to = val;
+			}
 		}
 	}
 
-- 
cgit v1.2.3


From 91215f70ea8541e9011c0b48f8b59b9e0ce6953b Mon Sep 17 00:00:00 2001
From: Su Hui <suhui@nfschina.com>
Date: Tue, 4 Jun 2024 20:12:43 +0800
Subject: io_uring/io-wq: avoid garbage value of 'match' in io_wq_enqueue()

Clang static checker (scan-build) warning:
o_uring/io-wq.c:line 1051, column 3
The expression is an uninitialized value. The computed value will
also be garbage.

'match.nr_pending' is used in io_acct_cancel_pending_work(), but it is
not fully initialized. Change the order of assignment for 'match' to fix
this problem.

Fixes: 42abc95f05bf ("io-wq: decouple work_list protection from the big wqe->lock")
Signed-off-by: Su Hui <suhui@nfschina.com>
Link: https://lore.kernel.org/r/20240604121242.2661244-1-suhui@nfschina.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io-wq.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index d1c47a9d9215..7d3316fe9bfc 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -927,7 +927,11 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
 {
 	struct io_wq_acct *acct = io_work_get_acct(wq, work);
 	unsigned long work_flags = work->flags;
-	struct io_cb_cancel_data match;
+	struct io_cb_cancel_data match = {
+		.fn		= io_wq_work_match_item,
+		.data		= work,
+		.cancel_all	= false,
+	};
 	bool do_create;
 
 	/*
@@ -965,10 +969,6 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
 		raw_spin_unlock(&wq->lock);
 
 		/* fatal condition, failed to create the first worker */
-		match.fn		= io_wq_work_match_item,
-		match.data		= work,
-		match.cancel_all	= false,
-
 		io_acct_cancel_pending_work(wq, acct, &match);
 	}
 }
-- 
cgit v1.2.3


From 73254a297c2dd094abec7c9efee32455ae875bdf Mon Sep 17 00:00:00 2001
From: Hagar Hemdan <hagarhem@amazon.com>
Date: Tue, 4 Jun 2024 13:05:27 +0000
Subject: io_uring: fix possible deadlock in io_register_iowq_max_workers()

The io_register_iowq_max_workers() function calls io_put_sq_data(),
which acquires the sqd->lock without releasing the uring_lock.
Similar to the commit 009ad9f0c6ee ("io_uring: drop ctx->uring_lock
before acquiring sqd->lock"), this can lead to a potential deadlock
situation.

To resolve this issue, the uring_lock is released before calling
io_put_sq_data(), and then it is re-acquired after the function call.

This change ensures that the locks are acquired in the correct
order, preventing the possibility of a deadlock.

Suggested-by: Maximilian Heyne <mheyne@amazon.de>
Signed-off-by: Hagar Hemdan <hagarhem@amazon.com>
Link: https://lore.kernel.org/r/20240604130527.3597-1-hagarhem@amazon.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/register.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/io_uring/register.c b/io_uring/register.c
index ef8c908346a4..c0010a66a6f2 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -355,8 +355,10 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
 	}
 
 	if (sqd) {
+		mutex_unlock(&ctx->uring_lock);
 		mutex_unlock(&sqd->lock);
 		io_put_sq_data(sqd);
+		mutex_lock(&ctx->uring_lock);
 	}
 
 	if (copy_to_user(arg, new_count, sizeof(new_count)))
@@ -380,8 +382,10 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
 	return 0;
 err:
 	if (sqd) {
+		mutex_unlock(&ctx->uring_lock);
 		mutex_unlock(&sqd->lock);
 		io_put_sq_data(sqd);
+		mutex_lock(&ctx->uring_lock);
 	}
 	return ret;
 }
-- 
cgit v1.2.3


From 87bb39ed40bdf1596b8820e800226e24eb642677 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 3 Jun 2024 13:28:43 +0100
Subject: KVM: arm64: Reintroduce __sve_save_state

Now that the hypervisor is handling the host sve state in
protected mode, it needs to be able to save it.

This reverts commit e66425fc9ba3 ("KVM: arm64: Remove unused
__sve_save_state").

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Fuad Tabba <tabba@google.com>
Link: https://lore.kernel.org/r/20240603122852.3923848-2-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_hyp.h | 1 +
 arch/arm64/kvm/hyp/fpsimd.S      | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 3e80464f8953..2ab23589339a 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -111,6 +111,7 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
 
 void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
 void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
+void __sve_save_state(void *sve_pffr, u32 *fpsr);
 void __sve_restore_state(void *sve_pffr, u32 *fpsr);
 
 u64 __guest_enter(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index 61e6f3ba7b7d..e950875e31ce 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -25,3 +25,9 @@ SYM_FUNC_START(__sve_restore_state)
 	sve_load 0, x1, x2, 3
 	ret
 SYM_FUNC_END(__sve_restore_state)
+
+SYM_FUNC_START(__sve_save_state)
+	mov	x2, #1
+	sve_save 0, x1, x2, 3
+	ret
+SYM_FUNC_END(__sve_save_state)
-- 
cgit v1.2.3


From 45f4ea9bcfe909b3461059990b1e232e55dde809 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 3 Jun 2024 13:28:44 +0100
Subject: KVM: arm64: Fix prototype for __sve_save_state/__sve_restore_state

Since the prototypes for __sve_save_state/__sve_restore_state at
hyp were added, the underlying macro has acquired a third
parameter for saving/restoring ffr.

Fix the prototypes to account for the third parameter, and
restore the ffr for the guest since it is saved.

Suggested-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20240603122852.3923848-3-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_hyp.h        | 4 ++--
 arch/arm64/kvm/hyp/include/hyp/switch.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 2ab23589339a..686cce7e4e96 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -111,8 +111,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
 
 void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
 void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
-void __sve_save_state(void *sve_pffr, u32 *fpsr);
-void __sve_restore_state(void *sve_pffr, u32 *fpsr);
+void __sve_save_state(void *sve_pffr, u32 *fpsr, int save_ffr);
+void __sve_restore_state(void *sve_pffr, u32 *fpsr, int restore_ffr);
 
 u64 __guest_enter(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index a92566f36022..d58933ae8fd5 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -316,7 +316,8 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
 {
 	sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
 	__sve_restore_state(vcpu_sve_pffr(vcpu),
-			    &vcpu->arch.ctxt.fp_regs.fpsr);
+			    &vcpu->arch.ctxt.fp_regs.fpsr,
+			    true);
 	write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
 }
 
-- 
cgit v1.2.3


From 6d8fb3cbf7e06431a607c30c1bc4cd53a62c220a Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 3 Jun 2024 13:28:45 +0100
Subject: KVM: arm64: Abstract set/clear of CPTR_EL2 bits behind helper

The same traps controlled by CPTR_EL2 or CPACR_EL1 need to be
toggled in different parts of the code, but the exact bits and
their polarity differ between these two formats and the mode
(vhe/nvhe/hvhe).

To reduce the amount of duplicated code and the chance of getting
the wrong bit/polarity or missing a field, abstract the set/clear
of CPTR_EL2 bits behind a helper.

Since (h)VHE is the way of the future, use the CPACR_EL1 format,
which is a subset of the VHE CPTR_EL2, as a reference.

No functional change intended.

Suggested-by: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Fuad Tabba <tabba@google.com>
Link: https://lore.kernel.org/r/20240603122852.3923848-4-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_arm.h        |  6 ++++
 arch/arm64/include/asm/kvm_emulate.h    | 62 +++++++++++++++++++++++++++++++++
 arch/arm64/kvm/hyp/include/hyp/switch.h | 18 +++-------
 arch/arm64/kvm/hyp/nvhe/hyp-main.c      |  6 +---
 4 files changed, 73 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index e01bb5ca13b7..b2adc2c6c82a 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -305,6 +305,12 @@
 				 GENMASK(19, 14) |	\
 				 BIT(11))
 
+#define CPTR_VHE_EL2_RES0	(GENMASK(63, 32) |	\
+				 GENMASK(27, 26) |	\
+				 GENMASK(23, 22) |	\
+				 GENMASK(19, 18) |	\
+				 GENMASK(15, 0))
+
 /* Hyp Debug Configuration Register bits */
 #define MDCR_EL2_E2TB_MASK	(UL(0x3))
 #define MDCR_EL2_E2TB_SHIFT	(UL(24))
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 501e3e019c93..2d7a0bdf9d03 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -557,6 +557,68 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
 		vcpu_set_flag((v), e);					\
 	} while (0)
 
+#define __build_check_all_or_none(r, bits)				\
+	BUILD_BUG_ON(((r) & (bits)) && ((r) & (bits)) != (bits))
+
+#define __cpacr_to_cptr_clr(clr, set)					\
+	({								\
+		u64 cptr = 0;						\
+									\
+		if ((set) & CPACR_ELx_FPEN)				\
+			cptr |= CPTR_EL2_TFP;				\
+		if ((set) & CPACR_ELx_ZEN)				\
+			cptr |= CPTR_EL2_TZ;				\
+		if ((set) & CPACR_ELx_SMEN)				\
+			cptr |= CPTR_EL2_TSM;				\
+		if ((clr) & CPACR_ELx_TTA)				\
+			cptr |= CPTR_EL2_TTA;				\
+		if ((clr) & CPTR_EL2_TAM)				\
+			cptr |= CPTR_EL2_TAM;				\
+		if ((clr) & CPTR_EL2_TCPAC)				\
+			cptr |= CPTR_EL2_TCPAC;				\
+									\
+		cptr;							\
+	})
+
+#define __cpacr_to_cptr_set(clr, set)					\
+	({								\
+		u64 cptr = 0;						\
+									\
+		if ((clr) & CPACR_ELx_FPEN)				\
+			cptr |= CPTR_EL2_TFP;				\
+		if ((clr) & CPACR_ELx_ZEN)				\
+			cptr |= CPTR_EL2_TZ;				\
+		if ((clr) & CPACR_ELx_SMEN)				\
+			cptr |= CPTR_EL2_TSM;				\
+		if ((set) & CPACR_ELx_TTA)				\
+			cptr |= CPTR_EL2_TTA;				\
+		if ((set) & CPTR_EL2_TAM)				\
+			cptr |= CPTR_EL2_TAM;				\
+		if ((set) & CPTR_EL2_TCPAC)				\
+			cptr |= CPTR_EL2_TCPAC;				\
+									\
+		cptr;							\
+	})
+
+#define cpacr_clear_set(clr, set)					\
+	do {								\
+		BUILD_BUG_ON((set) & CPTR_VHE_EL2_RES0);		\
+		BUILD_BUG_ON((clr) & CPACR_ELx_E0POE);			\
+		__build_check_all_or_none((clr), CPACR_ELx_FPEN);	\
+		__build_check_all_or_none((set), CPACR_ELx_FPEN);	\
+		__build_check_all_or_none((clr), CPACR_ELx_ZEN);	\
+		__build_check_all_or_none((set), CPACR_ELx_ZEN);	\
+		__build_check_all_or_none((clr), CPACR_ELx_SMEN);	\
+		__build_check_all_or_none((set), CPACR_ELx_SMEN);	\
+									\
+		if (has_vhe() || has_hvhe())				\
+			sysreg_clear_set(cpacr_el1, clr, set);		\
+		else							\
+			sysreg_clear_set(cptr_el2,			\
+					 __cpacr_to_cptr_clr(clr, set),	\
+					 __cpacr_to_cptr_set(clr, set));\
+	} while (0)
+
 static __always_inline void kvm_write_cptr_el2(u64 val)
 {
 	if (has_vhe() || has_hvhe())
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index d58933ae8fd5..055d2ca7264e 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -331,7 +331,6 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
 	bool sve_guest;
 	u8 esr_ec;
-	u64 reg;
 
 	if (!system_supports_fpsimd())
 		return false;
@@ -354,19 +353,10 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
 	/* Valid trap.  Switch the context: */
 
 	/* First disable enough traps to allow us to update the registers */
-	if (has_vhe() || has_hvhe()) {
-		reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
-		if (sve_guest)
-			reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
-
-		sysreg_clear_set(cpacr_el1, 0, reg);
-	} else {
-		reg = CPTR_EL2_TFP;
-		if (sve_guest)
-			reg |= CPTR_EL2_TZ;
-
-		sysreg_clear_set(cptr_el2, reg, 0);
-	}
+	if (sve_guest)
+		cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+	else
+		cpacr_clear_set(0, CPACR_ELx_FPEN);
 	isb();
 
 	/* Write out the host state if it's in the registers */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index d5c48dc98f67..f71394d0e32a 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -405,11 +405,7 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
 		handle_host_smc(host_ctxt);
 		break;
 	case ESR_ELx_EC_SVE:
-		if (has_hvhe())
-			sysreg_clear_set(cpacr_el1, 0, (CPACR_EL1_ZEN_EL1EN |
-							CPACR_EL1_ZEN_EL0EN));
-		else
-			sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
+		cpacr_clear_set(0, CPACR_ELx_ZEN);
 		isb();
 		sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
 		break;
-- 
cgit v1.2.3


From e511e08a9f496948b13aac50610f2d17335f56c3 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 3 Jun 2024 13:28:46 +0100
Subject: KVM: arm64: Specialize handling of host fpsimd state on trap

In subsequent patches, n/vhe will diverge on saving the host
fpsimd/sve state when taking a guest fpsimd/sve trap. Add a
specialized helper to handle it.

No functional change intended.

Reviewed-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Fuad Tabba <tabba@google.com>
Link: https://lore.kernel.org/r/20240603122852.3923848-5-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/hyp/include/hyp/switch.h | 4 +++-
 arch/arm64/kvm/hyp/nvhe/switch.c        | 5 +++++
 arch/arm64/kvm/hyp/vhe/switch.c         | 5 +++++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 055d2ca7264e..9b904c858df0 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -321,6 +321,8 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
 	write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
 }
 
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
+
 /*
  * We trap the first access to the FP/SIMD to save the host context and
  * restore the guest context lazily.
@@ -361,7 +363,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
 
 	/* Write out the host state if it's in the registers */
 	if (host_owns_fp_regs())
-		__fpsimd_save_state(*host_data_ptr(fpsimd_state));
+		kvm_hyp_save_fpsimd_host(vcpu);
 
 	/* Restore the guest state */
 	if (sve_guest)
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 6758cd905570..019f863922fa 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -182,6 +182,11 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
 		kvm_handle_pvm_sysreg(vcpu, exit_code));
 }
 
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+	__fpsimd_save_state(*host_data_ptr(fpsimd_state));
+}
+
 static const exit_handler_fn hyp_exit_handlers[] = {
 	[0 ... ESR_ELx_EC_MAX]		= NULL,
 	[ESR_ELx_EC_CP15_32]		= kvm_hyp_handle_cp15_32,
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index d7af5f46f22a..20073579e9f5 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -262,6 +262,11 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
 	return true;
 }
 
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+	__fpsimd_save_state(*host_data_ptr(fpsimd_state));
+}
+
 static const exit_handler_fn hyp_exit_handlers[] = {
 	[0 ... ESR_ELx_EC_MAX]		= NULL,
 	[ESR_ELx_EC_CP15_32]		= kvm_hyp_handle_cp15_32,
-- 
cgit v1.2.3


From 66d5b53e20a6e00b7ce3b652a3e2db967f7b33d0 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 3 Jun 2024 13:28:47 +0100
Subject: KVM: arm64: Allocate memory mapped at hyp for host sve state in pKVM

Protected mode needs to maintain (save/restore) the host's sve
state, rather than relying on the host kernel to do that. This is
to avoid leaking information to the host about guests and the
type of operations they are performing.

As a first step towards that, allocate memory mapped at hyp, per
cpu, for the host sve state. The following patch will use this
memory to save/restore the host state.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Fuad Tabba <tabba@google.com>
Link: https://lore.kernel.org/r/20240603122852.3923848-6-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_host.h | 17 ++++++++++
 arch/arm64/include/asm/kvm_hyp.h  |  1 +
 arch/arm64/include/asm/kvm_pkvm.h |  9 ++++++
 arch/arm64/kvm/arm.c              | 68 +++++++++++++++++++++++++++++++++++++++
 arch/arm64/kvm/hyp/nvhe/pkvm.c    |  2 ++
 arch/arm64/kvm/hyp/nvhe/setup.c   | 24 ++++++++++++++
 arch/arm64/kvm/reset.c            |  3 ++
 7 files changed, 124 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8170c04fde91..90df7ccec5f4 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -76,6 +76,7 @@ static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
 DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
 extern unsigned int __ro_after_init kvm_sve_max_vl;
+extern unsigned int __ro_after_init kvm_host_sve_max_vl;
 int __init kvm_arm_init_sve(void);
 
 u32 __attribute_const__ kvm_target_cpu(void);
@@ -521,6 +522,20 @@ struct kvm_cpu_context {
 	u64 *vncr_array;
 };
 
+struct cpu_sve_state {
+	__u64 zcr_el1;
+
+	/*
+	 * Ordering is important since __sve_save_state/__sve_restore_state
+	 * relies on it.
+	 */
+	__u32 fpsr;
+	__u32 fpcr;
+
+	/* Must be SVE_VQ_BYTES (128 bit) aligned. */
+	__u8 sve_regs[];
+};
+
 /*
  * This structure is instantiated on a per-CPU basis, and contains
  * data that is:
@@ -534,7 +549,9 @@ struct kvm_cpu_context {
  */
 struct kvm_host_data {
 	struct kvm_cpu_context host_ctxt;
+
 	struct user_fpsimd_state *fpsimd_state;	/* hyp VA */
+	struct cpu_sve_state *sve_state;	/* hyp VA */
 
 	/* Ownership of the FP regs */
 	enum {
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 686cce7e4e96..b05bceca3385 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -143,5 +143,6 @@ extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
 
 extern unsigned long kvm_nvhe_sym(__icache_flags);
 extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
+extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
 
 #endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index ad9cfb5c1ff4..cd56acd9a842 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -128,4 +128,13 @@ static inline unsigned long hyp_ffa_proxy_pages(void)
 	return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
 }
 
+static inline size_t pkvm_host_sve_state_size(void)
+{
+	if (!system_supports_sve())
+		return 0;
+
+	return size_add(sizeof(struct cpu_sve_state),
+			SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
+}
+
 #endif	/* __ARM64_KVM_PKVM_H__ */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9996a989b52e..1acf7415e831 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1931,6 +1931,11 @@ static unsigned long nvhe_percpu_order(void)
 	return size ? get_order(size) : 0;
 }
 
+static size_t pkvm_host_sve_state_order(void)
+{
+	return get_order(pkvm_host_sve_state_size());
+}
+
 /* A lookup table holding the hypervisor VA for each vector slot */
 static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
 
@@ -2310,12 +2315,20 @@ static void __init teardown_subsystems(void)
 
 static void __init teardown_hyp_mode(void)
 {
+	bool free_sve = system_supports_sve() && is_protected_kvm_enabled();
 	int cpu;
 
 	free_hyp_pgds();
 	for_each_possible_cpu(cpu) {
 		free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
 		free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
+
+		if (free_sve) {
+			struct cpu_sve_state *sve_state;
+
+			sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
+			free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
+		}
 	}
 }
 
@@ -2398,6 +2411,50 @@ static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
 	return 0;
 }
 
+static int init_pkvm_host_sve_state(void)
+{
+	int cpu;
+
+	if (!system_supports_sve())
+		return 0;
+
+	/* Allocate pages for host sve state in protected mode. */
+	for_each_possible_cpu(cpu) {
+		struct page *page = alloc_pages(GFP_KERNEL, pkvm_host_sve_state_order());
+
+		if (!page)
+			return -ENOMEM;
+
+		per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = page_address(page);
+	}
+
+	/*
+	 * Don't map the pages in hyp since these are only used in protected
+	 * mode, which will (re)create its own mapping when initialized.
+	 */
+
+	return 0;
+}
+
+/*
+ * Finalizes the initialization of hyp mode, once everything else is initialized
+ * and the initialziation process cannot fail.
+ */
+static void finalize_init_hyp_mode(void)
+{
+	int cpu;
+
+	if (!is_protected_kvm_enabled() || !system_supports_sve())
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct cpu_sve_state *sve_state;
+
+		sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
+		per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = kern_hyp_va(sve_state);
+	}
+}
+
 static void pkvm_hyp_init_ptrauth(void)
 {
 	struct kvm_cpu_context *hyp_ctxt;
@@ -2566,6 +2623,10 @@ static int __init init_hyp_mode(void)
 			goto out_err;
 		}
 
+		err = init_pkvm_host_sve_state();
+		if (err)
+			goto out_err;
+
 		err = kvm_hyp_init_protection(hyp_va_bits);
 		if (err) {
 			kvm_err("Failed to init hyp memory protection\n");
@@ -2730,6 +2791,13 @@ static __init int kvm_arm_init(void)
 	if (err)
 		goto out_subs;
 
+	/*
+	 * This should be called after initialization is done and failure isn't
+	 * possible anymore.
+	 */
+	if (!in_hyp_mode)
+		finalize_init_hyp_mode();
+
 	kvm_arm_initialised = true;
 
 	return 0;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 16aa4875ddb8..25e9a94f6d76 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -18,6 +18,8 @@ unsigned long __icache_flags;
 /* Used by kvm_get_vttbr(). */
 unsigned int kvm_arm_vmid_bits;
 
+unsigned int kvm_host_sve_max_vl;
+
 /*
  * Set trap register values based on features in ID_AA64PFR0.
  */
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 859f22f754d3..3fae42479598 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -67,6 +67,28 @@ static int divide_memory_pool(void *virt, unsigned long size)
 	return 0;
 }
 
+static int pkvm_create_host_sve_mappings(void)
+{
+	void *start, *end;
+	int ret, i;
+
+	if (!system_supports_sve())
+		return 0;
+
+	for (i = 0; i < hyp_nr_cpus; i++) {
+		struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
+		struct cpu_sve_state *sve_state = host_data->sve_state;
+
+		start = kern_hyp_va(sve_state);
+		end = start + PAGE_ALIGN(pkvm_host_sve_state_size());
+		ret = pkvm_create_mappings(start, end, PAGE_HYP);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 				 unsigned long *per_cpu_base,
 				 u32 hyp_va_bits)
@@ -125,6 +147,8 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 			return ret;
 	}
 
+	pkvm_create_host_sve_mappings();
+
 	/*
 	 * Map the host sections RO in the hypervisor, but transfer the
 	 * ownership from the host to the hypervisor itself to make sure they
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 1b7b58cb121f..3fc8ca164dbe 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -32,6 +32,7 @@
 
 /* Maximum phys_shift supported for any VM on this host */
 static u32 __ro_after_init kvm_ipa_limit;
+unsigned int __ro_after_init kvm_host_sve_max_vl;
 
 /*
  * ARMv8 Reset Values
@@ -51,6 +52,8 @@ int __init kvm_arm_init_sve(void)
 {
 	if (system_supports_sve()) {
 		kvm_sve_max_vl = sve_max_virtualisable_vl();
+		kvm_host_sve_max_vl = sve_max_vl();
+		kvm_nvhe_sym(kvm_host_sve_max_vl) = kvm_host_sve_max_vl;
 
 		/*
 		 * The get_sve_reg()/set_sve_reg() ioctl interface will need
-- 
cgit v1.2.3


From b5b9955617bc0b41546f2fa7c3dbcc048b43dc82 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 3 Jun 2024 13:28:48 +0100
Subject: KVM: arm64: Eagerly restore host fpsimd/sve state in pKVM

When running in protected mode we don't want to leak protected
guest state to the host, including whether a guest has used
fpsimd/sve. Therefore, eagerly restore the host state on guest
exit when running in protected mode, which happens only if the
guest has used fpsimd/sve.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Fuad Tabba <tabba@google.com>
Link: https://lore.kernel.org/r/20240603122852.3923848-7-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/hyp/include/hyp/switch.h | 13 ++++++-
 arch/arm64/kvm/hyp/nvhe/hyp-main.c      | 67 +++++++++++++++++++++++++++++++--
 arch/arm64/kvm/hyp/nvhe/pkvm.c          |  2 +
 arch/arm64/kvm/hyp/nvhe/switch.c        | 16 +++++++-
 4 files changed, 93 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 9b904c858df0..0c4de44534b7 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -321,6 +321,17 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
 	write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
 }
 
+static inline void __hyp_sve_save_host(void)
+{
+	struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+
+	sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
+	write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+	__sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+			 &sve_state->fpsr,
+			 true);
+}
+
 static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
 
 /*
@@ -355,7 +366,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
 	/* Valid trap.  Switch the context: */
 
 	/* First disable enough traps to allow us to update the registers */
-	if (sve_guest)
+	if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
 		cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
 	else
 		cpacr_clear_set(0, CPACR_ELx_FPEN);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index f71394d0e32a..bd93b8a9e172 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -23,20 +23,80 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
 
 void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
 
+static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
+{
+	__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
+	/*
+	 * On saving/restoring guest sve state, always use the maximum VL for
+	 * the guest. The layout of the data when saving the sve state depends
+	 * on the VL, so use a consistent (i.e., the maximum) guest VL.
+	 */
+	sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
+	__sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true);
+	write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+}
+
+static void __hyp_sve_restore_host(void)
+{
+	struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+
+	/*
+	 * On saving/restoring host sve state, always use the maximum VL for
+	 * the host. The layout of the data when saving the sve state depends
+	 * on the VL, so use a consistent (i.e., the maximum) host VL.
+	 *
+	 * Setting ZCR_EL2 to ZCR_ELx_LEN_MASK sets the effective length
+	 * supported by the system (or limited at EL3).
+	 */
+	write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+	__sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+			    &sve_state->fpsr,
+			    true);
+	write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
+}
+
+static void fpsimd_sve_flush(void)
+{
+	*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
+static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
+{
+	if (!guest_owns_fp_regs())
+		return;
+
+	cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+	isb();
+
+	if (vcpu_has_sve(vcpu))
+		__hyp_sve_save_guest(vcpu);
+	else
+		__fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
+
+	if (system_supports_sve())
+		__hyp_sve_restore_host();
+	else
+		__fpsimd_restore_state(*host_data_ptr(fpsimd_state));
+
+	*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
 static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
 {
 	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
 
+	fpsimd_sve_flush();
+
 	hyp_vcpu->vcpu.arch.ctxt	= host_vcpu->arch.ctxt;
 
 	hyp_vcpu->vcpu.arch.sve_state	= kern_hyp_va(host_vcpu->arch.sve_state);
-	hyp_vcpu->vcpu.arch.sve_max_vl	= host_vcpu->arch.sve_max_vl;
+	/* Limit guest vector length to the maximum supported by the host.  */
+	hyp_vcpu->vcpu.arch.sve_max_vl	= min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
 
 	hyp_vcpu->vcpu.arch.hw_mmu	= host_vcpu->arch.hw_mmu;
 
 	hyp_vcpu->vcpu.arch.hcr_el2	= host_vcpu->arch.hcr_el2;
 	hyp_vcpu->vcpu.arch.mdcr_el2	= host_vcpu->arch.mdcr_el2;
-	hyp_vcpu->vcpu.arch.cptr_el2	= host_vcpu->arch.cptr_el2;
 
 	hyp_vcpu->vcpu.arch.iflags	= host_vcpu->arch.iflags;
 
@@ -54,10 +114,11 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
 	struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
 	unsigned int i;
 
+	fpsimd_sve_sync(&hyp_vcpu->vcpu);
+
 	host_vcpu->arch.ctxt		= hyp_vcpu->vcpu.arch.ctxt;
 
 	host_vcpu->arch.hcr_el2		= hyp_vcpu->vcpu.arch.hcr_el2;
-	host_vcpu->arch.cptr_el2	= hyp_vcpu->vcpu.arch.cptr_el2;
 
 	host_vcpu->arch.fault		= hyp_vcpu->vcpu.arch.fault;
 
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 25e9a94f6d76..feb27b4ce459 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -588,6 +588,8 @@ unlock:
 	if (ret)
 		unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
 
+	hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
+
 	return ret;
 }
 
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 019f863922fa..bef74de7065b 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -184,7 +184,21 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
 
 static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
 {
-	__fpsimd_save_state(*host_data_ptr(fpsimd_state));
+	/*
+	 * Non-protected kvm relies on the host restoring its sve state.
+	 * Protected kvm restores the host's sve state as not to reveal that
+	 * fpsimd was used by a guest nor leak upper sve bits.
+	 */
+	if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
+		__hyp_sve_save_host();
+
+		/* Re-enable SVE traps if not supported for the guest vcpu. */
+		if (!vcpu_has_sve(vcpu))
+			cpacr_clear_set(CPACR_ELx_ZEN, 0);
+
+	} else {
+		__fpsimd_save_state(*host_data_ptr(fpsimd_state));
+	}
 }
 
 static const exit_handler_fn hyp_exit_handlers[] = {
-- 
cgit v1.2.3


From 1696fc2174dbab12228ea9ec4c213d6aeea348f8 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 3 Jun 2024 13:28:49 +0100
Subject: KVM: arm64: Consolidate initializing the host data's fpsimd_state/sve
 in pKVM

Now that we have introduced finalize_init_hyp_mode(), lets
consolidate the initializing of the host_data fpsimd_state and
sve state.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20240603122852.3923848-8-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_host.h      | 10 ++++++++--
 arch/arm64/kvm/arm.c                   | 20 ++++++++++++++------
 arch/arm64/kvm/hyp/include/nvhe/pkvm.h |  1 -
 arch/arm64/kvm/hyp/nvhe/pkvm.c         | 11 -----------
 arch/arm64/kvm/hyp/nvhe/setup.c        |  1 -
 5 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 90df7ccec5f4..36b8e97bf49e 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -550,8 +550,14 @@ struct cpu_sve_state {
 struct kvm_host_data {
 	struct kvm_cpu_context host_ctxt;
 
-	struct user_fpsimd_state *fpsimd_state;	/* hyp VA */
-	struct cpu_sve_state *sve_state;	/* hyp VA */
+	/*
+	 * All pointers in this union are hyp VA.
+	 * sve_state is only used in pKVM and if system_supports_sve().
+	 */
+	union {
+		struct user_fpsimd_state *fpsimd_state;
+		struct cpu_sve_state *sve_state;
+	};
 
 	/* Ownership of the FP regs */
 	enum {
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 1acf7415e831..59716789fe0f 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2444,14 +2444,22 @@ static void finalize_init_hyp_mode(void)
 {
 	int cpu;
 
-	if (!is_protected_kvm_enabled() || !system_supports_sve())
-		return;
+	if (system_supports_sve() && is_protected_kvm_enabled()) {
+		for_each_possible_cpu(cpu) {
+			struct cpu_sve_state *sve_state;
 
-	for_each_possible_cpu(cpu) {
-		struct cpu_sve_state *sve_state;
+			sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
+			per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state =
+				kern_hyp_va(sve_state);
+		}
+	} else {
+		for_each_possible_cpu(cpu) {
+			struct user_fpsimd_state *fpsimd_state;
 
-		sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
-		per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = kern_hyp_va(sve_state);
+			fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs;
+			per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state =
+				kern_hyp_va(fpsimd_state);
+		}
 	}
 }
 
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 22f374e9f532..24a9a8330d19 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -59,7 +59,6 @@ static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu)
 }
 
 void pkvm_hyp_vm_table_init(void *tbl);
-void pkvm_host_fpsimd_state_init(void);
 
 int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
 		   unsigned long pgd_hva);
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index feb27b4ce459..ea67fcbf8376 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -249,17 +249,6 @@ void pkvm_hyp_vm_table_init(void *tbl)
 	vm_table = tbl;
 }
 
-void pkvm_host_fpsimd_state_init(void)
-{
-	unsigned long i;
-
-	for (i = 0; i < hyp_nr_cpus; i++) {
-		struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
-
-		host_data->fpsimd_state = &host_data->host_ctxt.fp_regs;
-	}
-}
-
 /*
  * Return the hyp vm structure corresponding to the handle.
  */
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 3fae42479598..f4350ba07b0b 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -324,7 +324,6 @@ void __noreturn __pkvm_init_finalise(void)
 		goto out;
 
 	pkvm_hyp_vm_table_init(vm_table_base);
-	pkvm_host_fpsimd_state_init();
 out:
 	/*
 	 * We tail-called to here from handle___pkvm_init() and will not return,
-- 
cgit v1.2.3


From a69283ae1db8dd416870d931caa9e2d3d2c1cd8b Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 3 Jun 2024 13:28:50 +0100
Subject: KVM: arm64: Refactor CPACR trap bit setting/clearing to use ELx
 format

When setting/clearing CPACR bits for EL0 and EL1, use the ELx
format of the bits, which covers both. This makes the code
clearer, and reduces the chances of accidentally missing a bit.

No functional change intended.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Fuad Tabba <tabba@google.com>
Link: https://lore.kernel.org/r/20240603122852.3923848-9-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/el2_setup.h   | 6 +++---
 arch/arm64/include/asm/kvm_emulate.h | 9 ++++-----
 arch/arm64/kvm/fpsimd.c              | 4 +---
 arch/arm64/kvm/hyp/nvhe/pkvm.c       | 2 +-
 arch/arm64/kvm/hyp/nvhe/switch.c     | 5 ++---
 arch/arm64/kvm/hyp/vhe/switch.c      | 7 +++----
 6 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index e4546b29dd0c..fd87c4b8f984 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -146,7 +146,7 @@
 /* Coprocessor traps */
 .macro __init_el2_cptr
 	__check_hvhe .LnVHE_\@, x1
-	mov	x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
+	mov	x0, #CPACR_ELx_FPEN
 	msr	cpacr_el1, x0
 	b	.Lskip_set_cptr_\@
 .LnVHE_\@:
@@ -277,7 +277,7 @@
 
 	// (h)VHE case
 	mrs	x0, cpacr_el1			// Disable SVE traps
-	orr	x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
+	orr	x0, x0, #CPACR_ELx_ZEN
 	msr	cpacr_el1, x0
 	b	.Lskip_set_cptr_\@
 
@@ -298,7 +298,7 @@
 
 	// (h)VHE case
 	mrs	x0, cpacr_el1			// Disable SME traps
-	orr	x0, x0, #(CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN)
+	orr	x0, x0, #CPACR_ELx_SMEN
 	msr	cpacr_el1, x0
 	b	.Lskip_set_cptr_sme_\@
 
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 2d7a0bdf9d03..21650e7924d4 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -632,17 +632,16 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
 	u64 val;
 
 	if (has_vhe()) {
-		val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
-		       CPACR_EL1_ZEN_EL1EN);
+		val = (CPACR_ELx_FPEN | CPACR_EL1_ZEN_EL1EN);
 		if (cpus_have_final_cap(ARM64_SME))
 			val |= CPACR_EL1_SMEN_EL1EN;
 	} else if (has_hvhe()) {
-		val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
+		val = CPACR_ELx_FPEN;
 
 		if (!vcpu_has_sve(vcpu) || !guest_owns_fp_regs())
-			val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN;
+			val |= CPACR_ELx_ZEN;
 		if (cpus_have_final_cap(ARM64_SME))
-			val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN;
+			val |= CPACR_ELx_SMEN;
 	} else {
 		val = CPTR_NVHE_EL2_RES1;
 
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 1807d3a79a8a..eb21f29d91fc 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -161,9 +161,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
 	if (has_vhe() && system_supports_sme()) {
 		/* Also restore EL0 state seen on entry */
 		if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
-			sysreg_clear_set(CPACR_EL1, 0,
-					 CPACR_EL1_SMEN_EL0EN |
-					 CPACR_EL1_SMEN_EL1EN);
+			sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_SMEN);
 		else
 			sysreg_clear_set(CPACR_EL1,
 					 CPACR_EL1_SMEN_EL0EN,
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index ea67fcbf8376..95cf18574251 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -65,7 +65,7 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
 	/* Trap SVE */
 	if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) {
 		if (has_hvhe())
-			cptr_clear |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
+			cptr_clear |= CPACR_ELx_ZEN;
 		else
 			cptr_set |= CPTR_EL2_TZ;
 	}
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index bef74de7065b..6af179c6356d 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -48,15 +48,14 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 	val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA;
 	if (cpus_have_final_cap(ARM64_SME)) {
 		if (has_hvhe())
-			val &= ~(CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN);
+			val &= ~CPACR_ELx_SMEN;
 		else
 			val |= CPTR_EL2_TSM;
 	}
 
 	if (!guest_owns_fp_regs()) {
 		if (has_hvhe())
-			val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
-				 CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
+			val &= ~(CPACR_ELx_FPEN | CPACR_ELx_ZEN);
 		else
 			val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
 
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 20073579e9f5..8fbb6a2e0559 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -93,8 +93,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 
 	val = read_sysreg(cpacr_el1);
 	val |= CPACR_ELx_TTA;
-	val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
-		 CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN);
+	val &= ~(CPACR_ELx_ZEN | CPACR_ELx_SMEN);
 
 	/*
 	 * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
@@ -109,9 +108,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
 
 	if (guest_owns_fp_regs()) {
 		if (vcpu_has_sve(vcpu))
-			val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
+			val |= CPACR_ELx_ZEN;
 	} else {
-		val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
+		val &= ~CPACR_ELx_FPEN;
 		__activate_traps_fpsimd32(vcpu);
 	}
 
-- 
cgit v1.2.3


From afb91f5f8ad7af172d993a34fde1947892408f53 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 3 Jun 2024 13:28:51 +0100
Subject: KVM: arm64: Ensure that SME controls are disabled in protected mode

KVM (and pKVM) do not support SME guests. Therefore KVM ensures
that the host's SME state is flushed and that SME controls for
enabling access to ZA storage and for streaming are disabled.

pKVM needs to protect against a buggy/malicious host. Ensure that
it wouldn't run a guest when protected mode is enabled should any
of the SME controls be enabled.

Signed-off-by: Fuad Tabba <tabba@google.com>
Link: https://lore.kernel.org/r/20240603122852.3923848-10-tabba@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/fpsimd.c            |  7 +++++++
 arch/arm64/kvm/hyp/nvhe/hyp-main.c | 11 +++++++++++
 2 files changed, 18 insertions(+)

diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index eb21f29d91fc..521b32868d0d 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -90,6 +90,13 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
 			fpsimd_save_and_flush_cpu_state();
 		}
 	}
+
+	/*
+	 * If normal guests gain SME support, maintain this behavior for pKVM
+	 * guests, which don't support SME.
+	 */
+	WARN_ON(is_protected_kvm_enabled() && system_supports_sme() &&
+		read_sysreg_s(SYS_SVCR));
 }
 
 /*
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index bd93b8a9e172..f43d845f3c4e 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -140,6 +140,17 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
 		struct pkvm_hyp_vcpu *hyp_vcpu;
 		struct kvm *host_kvm;
 
+		/*
+		 * KVM (and pKVM) doesn't support SME guests for now, and
+		 * ensures that SME features aren't enabled in pstate when
+		 * loading a vcpu. Therefore, if SME features enabled the host
+		 * is misbehaving.
+		 */
+		if (unlikely(system_supports_sme() && read_sysreg_s(SYS_SVCR))) {
+			ret = -EINVAL;
+			goto out;
+		}
+
 		host_kvm = kern_hyp_va(host_vcpu->kvm);
 		hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle,
 					      host_vcpu->vcpu_idx);
-- 
cgit v1.2.3


From ce3af2ee95170b7d9e15fff6e500d67deab1e7b3 Mon Sep 17 00:00:00 2001
From: José Expósito <jose.exposito89@gmail.com>
Date: Fri, 24 May 2024 15:05:39 +0200
Subject: HID: logitech-dj: Fix memory leak in logi_dj_recv_switch_to_dj_mode()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a memory leak on logi_dj_recv_send_report() error path.

Fixes: 6f20d3261265 ("HID: logitech-dj: Fix error handling in logi_dj_recv_switch_to_dj_mode()")
Signed-off-by: José Expósito <jose.exposito89@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 drivers/hid/hid-logitech-dj.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index 3c3c497b6b91..37958edec55f 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -1284,8 +1284,10 @@ static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev,
 		 */
 		msleep(50);
 
-		if (retval)
+		if (retval) {
+			kfree(dj_report);
 			return retval;
+		}
 	}
 
 	/*
-- 
cgit v1.2.3


From 655a8a7684b897721f87c59798fd04d8b79f1b69 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 26 May 2024 17:05:59 +0200
Subject: HID: nintendo: Fix an error handling path in nintendo_hid_probe()

joycon_leds_create() has a ida_alloc() call. So if an error occurs after
it, a corresponding ida_free() call is needed, as already done in the
.remove function.

This is not 100% perfect, because if ida_alloc() fails, then
'ctlr->player_id' will forced to be U32_MAX, and an error will be logged
when ida_free() is called.

Considering that this can't happen in real life, no special handling is
done to handle it.

Fixes: 5307de63d71d ("HID: nintendo: use ida for LED player id")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Silvan Jegen <s.jegen@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 drivers/hid/hid-nintendo.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c
index b4a97803eca3..3062daf68d31 100644
--- a/drivers/hid/hid-nintendo.c
+++ b/drivers/hid/hid-nintendo.c
@@ -2725,13 +2725,13 @@ static int nintendo_hid_probe(struct hid_device *hdev,
 	ret = joycon_power_supply_create(ctlr);
 	if (ret) {
 		hid_err(hdev, "Failed to create power_supply; ret=%d\n", ret);
-		goto err_close;
+		goto err_ida;
 	}
 
 	ret = joycon_input_create(ctlr);
 	if (ret) {
 		hid_err(hdev, "Failed to create input device; ret=%d\n", ret);
-		goto err_close;
+		goto err_ida;
 	}
 
 	ctlr->ctlr_state = JOYCON_CTLR_STATE_READ;
@@ -2739,6 +2739,8 @@ static int nintendo_hid_probe(struct hid_device *hdev,
 	hid_dbg(hdev, "probe - success\n");
 	return 0;
 
+err_ida:
+	ida_free(&nintendo_player_id_allocator, ctlr->player_id);
 err_close:
 	hid_hw_close(hdev);
 err_stop:
-- 
cgit v1.2.3


From 9e438fe31e96b75c2ec599f6a9239950e6712358 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 3 Jun 2024 09:41:03 +0200
Subject: HID: intel-ish-hid: fix endian-conversion

The newly added file causes a ton of sparse warnings about the
incorrect use of __le32 and similar types:

drivers/hid/intel-ish-hid/ishtp/loader.h:41:23: error: invalid bitfield specifier for type restricted __le32.
drivers/hid/intel-ish-hid/ishtp/loader.h:42:27: error: invalid bitfield specifier for type restricted __le32.
drivers/hid/intel-ish-hid/ishtp/loader.h:43:24: error: invalid bitfield specifier for type restricted __le32.
drivers/hid/intel-ish-hid/ishtp/loader.h:44:24: error: invalid bitfield specifier for type restricted __le32.
drivers/hid/intel-ish-hid/ishtp/loader.h:45:22: error: invalid bitfield specifier for type restricted __le32.
drivers/hid/intel-ish-hid/ishtp/loader.c:172:33: warning: restricted __le32 degrades to integer
drivers/hid/intel-ish-hid/ishtp/loader.c:178:50: warning: incorrect type in assignment (different base types)
drivers/hid/intel-ish-hid/ishtp/loader.c:178:50:    expected restricted __le32 [usertype] length
drivers/hid/intel-ish-hid/ishtp/loader.c:178:50:    got unsigned long
drivers/hid/intel-ish-hid/ishtp/loader.c:179:50: warning: incorrect type in assignment (different base types)
drivers/hid/intel-ish-hid/ishtp/loader.c:179:50:    expected restricted __le32 [usertype] fw_off
drivers/hid/intel-ish-hid/ishtp/loader.c:179:50:    got unsigned int [usertype] offset
drivers/hid/intel-ish-hid/ishtp/loader.c:180:17: warning: cast from restricted __le32
drivers/hid/intel-ish-hid/ishtp/loader.c:183:24: warning: invalid assignment: +=
drivers/hid/intel-ish-hid/ishtp/loader.c:183:24:    left side has type unsigned int
drivers/hid/intel-ish-hid/ishtp/loader.c:183:24:    right side has type restricted __le32

Add the necessary conversions and use temporary variables where appropriate
to avoid converting back.

Fixes: 579a267e4617 ("HID: intel-ish-hid: Implement loading firmware from host feature")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Zhang Lixu <lixu.zhang@intel.com>
Tested-by: Zhang Lixu <lixu.zhang@intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 drivers/hid/intel-ish-hid/ishtp/loader.c | 69 ++++++++++++++++++--------------
 drivers/hid/intel-ish-hid/ishtp/loader.h | 33 ++++++++-------
 2 files changed, 57 insertions(+), 45 deletions(-)

diff --git a/drivers/hid/intel-ish-hid/ishtp/loader.c b/drivers/hid/intel-ish-hid/ishtp/loader.c
index 2785b04a2f5a..fcca070bdecb 100644
--- a/drivers/hid/intel-ish-hid/ishtp/loader.c
+++ b/drivers/hid/intel-ish-hid/ishtp/loader.c
@@ -84,8 +84,8 @@ static int loader_write_message(struct ishtp_device *dev, void *buf, int len)
 static int loader_xfer_cmd(struct ishtp_device *dev, void *req, int req_len,
 			   void *resp, int resp_len)
 {
-	struct loader_msg_header *req_hdr = req;
-	struct loader_msg_header *resp_hdr = resp;
+	union loader_msg_header req_hdr;
+	union loader_msg_header resp_hdr;
 	struct device *devc = dev->devc;
 	int rv;
 
@@ -93,34 +93,37 @@ static int loader_xfer_cmd(struct ishtp_device *dev, void *req, int req_len,
 	dev->fw_loader_rx_size = resp_len;
 
 	rv = loader_write_message(dev, req, req_len);
+	req_hdr.val32 = le32_to_cpup(req);
+
 	if (rv < 0) {
-		dev_err(devc, "write cmd %u failed:%d\n", req_hdr->command, rv);
+		dev_err(devc, "write cmd %u failed:%d\n", req_hdr.command, rv);
 		return rv;
 	}
 
 	/* Wait the ACK */
 	wait_event_interruptible_timeout(dev->wait_loader_recvd_msg, dev->fw_loader_received,
 					 ISHTP_LOADER_TIMEOUT);
+	resp_hdr.val32 = le32_to_cpup(resp);
 	dev->fw_loader_rx_size = 0;
 	dev->fw_loader_rx_buf = NULL;
 	if (!dev->fw_loader_received) {
-		dev_err(devc, "wait response of cmd %u timeout\n", req_hdr->command);
+		dev_err(devc, "wait response of cmd %u timeout\n", req_hdr.command);
 		return -ETIMEDOUT;
 	}
 
-	if (!resp_hdr->is_response) {
-		dev_err(devc, "not a response for %u\n", req_hdr->command);
+	if (!resp_hdr.is_response) {
+		dev_err(devc, "not a response for %u\n", req_hdr.command);
 		return -EBADMSG;
 	}
 
-	if (req_hdr->command != resp_hdr->command) {
-		dev_err(devc, "unexpected cmd response %u:%u\n", req_hdr->command,
-			resp_hdr->command);
+	if (req_hdr.command != resp_hdr.command) {
+		dev_err(devc, "unexpected cmd response %u:%u\n", req_hdr.command,
+			resp_hdr.command);
 		return -EBADMSG;
 	}
 
-	if (resp_hdr->status) {
-		dev_err(devc, "cmd %u failed %u\n", req_hdr->command, resp_hdr->status);
+	if (resp_hdr.status) {
+		dev_err(devc, "cmd %u failed %u\n", req_hdr.command, resp_hdr.status);
 		return -EIO;
 	}
 
@@ -157,30 +160,33 @@ static void release_dma_bufs(struct ishtp_device *dev,
  * @fragment: The ISHTP firmware fragment descriptor
  * @dma_bufs: The array of DMA fragment buffers
  * @fragment_size: The size of a single DMA fragment
+ * @fragment_count: Number of fragments
  *
  * Return: 0 on success, negative error code on failure
  */
 static int prepare_dma_bufs(struct ishtp_device *dev,
 			    const struct firmware *ish_fw,
 			    struct loader_xfer_dma_fragment *fragment,
-			    void **dma_bufs, u32 fragment_size)
+			    void **dma_bufs, u32 fragment_size, u32 fragment_count)
 {
 	dma_addr_t dma_addr;
 	u32 offset = 0;
+	u32 length;
 	int i;
 
-	for (i = 0; i < fragment->fragment_cnt && offset < ish_fw->size; i++) {
+	for (i = 0; i < fragment_count && offset < ish_fw->size; i++) {
 		dma_bufs[i] = dma_alloc_coherent(dev->devc, fragment_size, &dma_addr, GFP_KERNEL);
 		if (!dma_bufs[i])
 			return -ENOMEM;
 
 		fragment->fragment_tbl[i].ddr_adrs = cpu_to_le64(dma_addr);
-		fragment->fragment_tbl[i].length = clamp(ish_fw->size - offset, 0, fragment_size);
-		fragment->fragment_tbl[i].fw_off = offset;
-		memcpy(dma_bufs[i], ish_fw->data + offset, fragment->fragment_tbl[i].length);
+		length = clamp(ish_fw->size - offset, 0, fragment_size);
+		fragment->fragment_tbl[i].length = cpu_to_le32(length);
+		fragment->fragment_tbl[i].fw_off = cpu_to_le32(offset);
+		memcpy(dma_bufs[i], ish_fw->data + offset, length);
 		clflush_cache_range(dma_bufs[i], fragment_size);
 
-		offset += fragment->fragment_tbl[i].length;
+		offset += length;
 	}
 
 	return 0;
@@ -208,17 +214,17 @@ void ishtp_loader_work(struct work_struct *work)
 {
 	DEFINE_RAW_FLEX(struct loader_xfer_dma_fragment, fragment, fragment_tbl, FRAGMENT_MAX_NUM);
 	struct ishtp_device *dev = container_of(work, struct ishtp_device, work_fw_loader);
-	struct loader_xfer_query query = {
-		.header.command = LOADER_CMD_XFER_QUERY,
-	};
-	struct loader_start start = {
-		.header.command = LOADER_CMD_START,
-	};
+	union loader_msg_header query_hdr = { .command = LOADER_CMD_XFER_QUERY, };
+	union loader_msg_header start_hdr = { .command = LOADER_CMD_START, };
+	union loader_msg_header fragment_hdr = { .command = LOADER_CMD_XFER_FRAGMENT, };
+	struct loader_xfer_query query = { .header = cpu_to_le32(query_hdr.val32), };
+	struct loader_start start = { .header = cpu_to_le32(start_hdr.val32), };
 	union loader_recv_message recv_msg;
 	char *filename = dev->driver_data->fw_filename;
 	const struct firmware *ish_fw;
 	void *dma_bufs[FRAGMENT_MAX_NUM] = {};
 	u32 fragment_size;
+	u32 fragment_count;
 	int retry = ISHTP_LOADER_RETRY_TIMES;
 	int rv;
 
@@ -228,23 +234,24 @@ void ishtp_loader_work(struct work_struct *work)
 		return;
 	}
 
-	fragment->fragment.header.command = LOADER_CMD_XFER_FRAGMENT;
-	fragment->fragment.xfer_mode = LOADER_XFER_MODE_DMA;
-	fragment->fragment.is_last = 1;
-	fragment->fragment.size = ish_fw->size;
+	fragment->fragment.header = cpu_to_le32(fragment_hdr.val32);
+	fragment->fragment.xfer_mode = cpu_to_le32(LOADER_XFER_MODE_DMA);
+	fragment->fragment.is_last = cpu_to_le32(1);
+	fragment->fragment.size = cpu_to_le32(ish_fw->size);
 	/* Calculate the size of a single DMA fragment */
 	fragment_size = PFN_ALIGN(DIV_ROUND_UP(ish_fw->size, FRAGMENT_MAX_NUM));
 	/* Calculate the count of DMA fragments */
-	fragment->fragment_cnt = DIV_ROUND_UP(ish_fw->size, fragment_size);
+	fragment_count = DIV_ROUND_UP(ish_fw->size, fragment_size);
+	fragment->fragment_cnt = cpu_to_le32(fragment_count);
 
-	rv = prepare_dma_bufs(dev, ish_fw, fragment, dma_bufs, fragment_size);
+	rv = prepare_dma_bufs(dev, ish_fw, fragment, dma_bufs, fragment_size, fragment_count);
 	if (rv) {
 		dev_err(dev->devc, "prepare DMA buffer failed.\n");
 		goto out;
 	}
 
 	do {
-		query.image_size = ish_fw->size;
+		query.image_size = cpu_to_le32(ish_fw->size);
 		rv = loader_xfer_cmd(dev, &query, sizeof(query), recv_msg.raw_data,
 				     sizeof(struct loader_xfer_query_ack));
 		if (rv)
@@ -257,7 +264,7 @@ void ishtp_loader_work(struct work_struct *work)
 			recv_msg.query_ack.version_build);
 
 		rv = loader_xfer_cmd(dev, fragment,
-				     struct_size(fragment, fragment_tbl, fragment->fragment_cnt),
+				     struct_size(fragment, fragment_tbl, fragment_count),
 				     recv_msg.raw_data, sizeof(struct loader_xfer_fragment_ack));
 		if (rv)
 			continue; /* try again if failed */
diff --git a/drivers/hid/intel-ish-hid/ishtp/loader.h b/drivers/hid/intel-ish-hid/ishtp/loader.h
index 7aa45ebc3f7b..308b96085a4d 100644
--- a/drivers/hid/intel-ish-hid/ishtp/loader.h
+++ b/drivers/hid/intel-ish-hid/ishtp/loader.h
@@ -30,19 +30,23 @@ struct work_struct;
 #define LOADER_XFER_MODE_DMA BIT(0)
 
 /**
- * struct loader_msg_header - ISHTP firmware loader message header
+ * union loader_msg_header - ISHTP firmware loader message header
  * @command: Command type
  * @is_response: Indicates if the message is a response
  * @has_next: Indicates if there is a next message
  * @reserved: Reserved for future use
  * @status: Status of the message
- */
-struct loader_msg_header {
-	__le32 command:7;
-	__le32 is_response:1;
-	__le32 has_next:1;
-	__le32 reserved:15;
-	__le32 status:8;
+ * @val32: entire header as a 32-bit value
+ */
+union loader_msg_header {
+	struct {
+		__u32 command:7;
+		__u32 is_response:1;
+		__u32 has_next:1;
+		__u32 reserved:15;
+		__u32 status:8;
+	};
+	__u32 val32;
 };
 
 /**
@@ -51,7 +55,7 @@ struct loader_msg_header {
  * @image_size: Size of the image
  */
 struct loader_xfer_query {
-	struct loader_msg_header header;
+	__le32 header;
 	__le32 image_size;
 };
 
@@ -103,7 +107,7 @@ struct loader_capability {
  * @capability: Loader capability
  */
 struct loader_xfer_query_ack {
-	struct loader_msg_header header;
+	__le32 header;
 	__le16 version_major;
 	__le16 version_minor;
 	__le16 version_hotfix;
@@ -122,7 +126,7 @@ struct loader_xfer_query_ack {
  * @is_last: Is last
  */
 struct loader_xfer_fragment {
-	struct loader_msg_header header;
+	__le32 header;
 	__le32 xfer_mode;
 	__le32 offset;
 	__le32 size;
@@ -134,7 +138,7 @@ struct loader_xfer_fragment {
  * @header: Header of the message
  */
 struct loader_xfer_fragment_ack {
-	struct loader_msg_header header;
+	__le32 header;
 };
 
 /**
@@ -170,7 +174,7 @@ struct loader_xfer_dma_fragment {
  * @header: Header of the message
  */
 struct loader_start {
-	struct loader_msg_header header;
+	__le32 header;
 };
 
 /**
@@ -178,10 +182,11 @@ struct loader_start {
  * @header: Header of the message
  */
 struct loader_start_ack {
-	struct loader_msg_header header;
+	__le32 header;
 };
 
 union loader_recv_message {
+	__le32 header;
 	struct loader_xfer_query_ack query_ack;
 	struct loader_xfer_fragment_ack fragment_ack;
 	struct loader_start_ack start_ack;
-- 
cgit v1.2.3


From 9c8f05cf1d7abd1bfb53cebe691bf3acb7baee99 Mon Sep 17 00:00:00 2001
From: Jeff Johnson <quic_jjohnson@quicinc.com>
Date: Mon, 3 Jun 2024 23:00:59 -0700
Subject: HID: logitech-hidpp: add missing MODULE_DESCRIPTION() macro

make allmodconfig && make W=1 C=1 reports:
WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/hid/hid-logitech-hidpp.o

Add the missing invocation of the MODULE_DESCRIPTION() macro.

Signed-off-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 drivers/hid/hid-logitech-hidpp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index b81d5bcc76a7..400d70e6dbe2 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -27,6 +27,7 @@
 #include "usbhid/usbhid.h"
 #include "hid-ids.h"
 
+MODULE_DESCRIPTION("Support for Logitech devices relying on the HID++ specification");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>");
 MODULE_AUTHOR("Nestor Lopez Casado <nlopezcasad@logitech.com>");
-- 
cgit v1.2.3


From 0698ff57bf327d9a5735a898f78161b8dada160b Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Mon, 27 May 2024 13:54:08 +0200
Subject: drm/xe/pf: Update the LMTT when freeing VF GT config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The LMTT must be updated whenever we change the VF LMEM configuration.
We missed that step when freeing the whole VF GT config, which could
result in stale PTE in LMTT or LMTT PT object leaks. Fix that.

Fixes: ac6598aed1b3 ("drm/xe/pf: Add support to configure SR-IOV VFs")
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Reviewed-by: Piotr Piórkowski <piotr.piorkowski@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240527115408.1064-1-michal.wajdeczko@intel.com
(cherry picked from commit c063cce7df3a765539e2a2d75ab943f334446cce)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 79116ad58620..476d613333a9 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -1749,6 +1749,7 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid)
 	if (!xe_gt_is_media_type(gt)) {
 		pf_release_vf_config_ggtt(gt, config);
 		pf_release_vf_config_lmem(gt, config);
+		pf_update_vf_lmtt(gt_to_xe(gt), vfid);
 	}
 	pf_release_config_ctxs(gt, config);
 	pf_release_config_dbs(gt, config);
-- 
cgit v1.2.3


From c9d52fb313d3719d69a040f4ca78a3e2e95fba21 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 30 May 2024 18:04:24 -0700
Subject: PCI: Revert the cfg_access_lock lockdep mechanism

While the experiment did reveal that there are additional places that are
missing the lock during secondary bus reset, one of the places that needs
to take cfg_access_lock (pci_bus_lock()) is not prepared for lockdep
annotation.

Specifically, pci_bus_lock() takes pci_dev_lock() recursively and is
currently dependent on the fact that the device_lock() is marked
lockdep_set_novalidate_class(&dev->mutex). Otherwise, without that
annotation, pci_bus_lock() would need to use something like a new
pci_dev_lock_nested() helper, a scheme to track a PCI device's depth in the
topology, and a hope that the depth of a PCI tree never exceeds the max
value for a lockdep subclass.

The alternative to ripping out the lockdep coverage would be to deploy a
dynamic lock key for every PCI device. Unfortunately, there is evidence
that increasing the number of keys that lockdep needs to track to be
per-PCI-device is prohibitively expensive for something like the
cfg_access_lock.

The main motivation for adding the annotation in the first place was to
catch unlocked secondary bus resets, not necessarily catch lock ordering
problems between cfg_access_lock and other locks. Solve that narrower
problem with follow-on patches, and just due to targeted revert for now.

Link: https://lore.kernel.org/r/171711746402.1628941.14575335981264103013.stgit@dwillia2-xfh.jf.intel.com
Fixes: 7e89efc6e9e4 ("PCI: Lock upstream bridge for pci_reset_function()")
Reported-by: Imre Deak <imre.deak@intel.com>
Closes: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_134186v1/shard-dg2-1/igt@device_reset@unbind-reset-rebind.html
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Kalle Valo <kvalo@kernel.org>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Cc: Jani Saarinen <jani.saarinen@intel.com>
---
 drivers/pci/access.c    | 4 ----
 drivers/pci/pci.c       | 1 -
 drivers/pci/probe.c     | 3 ---
 include/linux/lockdep.h | 5 -----
 include/linux/pci.h     | 2 --
 5 files changed, 15 deletions(-)

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 30f031de9cfe..b123da16b63b 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -289,8 +289,6 @@ void pci_cfg_access_lock(struct pci_dev *dev)
 {
 	might_sleep();
 
-	lock_map_acquire(&dev->cfg_access_lock);
-
 	raw_spin_lock_irq(&pci_lock);
 	if (dev->block_cfg_access)
 		pci_wait_cfg(dev);
@@ -345,8 +343,6 @@ void pci_cfg_access_unlock(struct pci_dev *dev)
 	raw_spin_unlock_irqrestore(&pci_lock, flags);
 
 	wake_up_all(&pci_cfg_wait);
-
-	lock_map_release(&dev->cfg_access_lock);
 }
 EXPORT_SYMBOL_GPL(pci_cfg_access_unlock);
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 59e0949fb079..35fb1f17a589 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4883,7 +4883,6 @@ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
  */
 int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
 {
-	lock_map_assert_held(&dev->cfg_access_lock);
 	pcibios_reset_secondary_bus(dev);
 
 	return pci_bridge_wait_for_secondary_bus(dev, "bus reset");
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 8e696e547565..5fbabb4e3425 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2546,9 +2546,6 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	dev->dev.dma_mask = &dev->dma_mask;
 	dev->dev.dma_parms = &dev->dma_parms;
 	dev->dev.coherent_dma_mask = 0xffffffffull;
-	lockdep_register_key(&dev->cfg_access_key);
-	lockdep_init_map(&dev->cfg_access_lock, dev_name(&dev->dev),
-			 &dev->cfg_access_key, 0);
 
 	dma_set_max_seg_size(&dev->dev, 65536);
 	dma_set_seg_boundary(&dev->dev, 0xffffffff);
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 5e51b0de4c4b..08b0d1d9d78b 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -297,9 +297,6 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
 		.wait_type_inner = _wait_type,		\
 		.lock_type = LD_LOCK_WAIT_OVERRIDE, }
 
-#define lock_map_assert_held(l)		\
-	lockdep_assert(lock_is_held(l) != LOCK_STATE_NOT_HELD)
-
 #else /* !CONFIG_LOCKDEP */
 
 static inline void lockdep_init_task(struct task_struct *task)
@@ -391,8 +388,6 @@ extern int lockdep_is_held(const void *);
 #define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type)	\
 	struct lockdep_map __maybe_unused _name = {}
 
-#define lock_map_assert_held(l)			do { (void)(l); } while (0)
-
 #endif /* !LOCKDEP */
 
 #ifdef CONFIG_PROVE_LOCKING
diff --git a/include/linux/pci.h b/include/linux/pci.h
index fb004fd4e889..cafc5ab1cbcb 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -413,8 +413,6 @@ struct pci_dev {
 	struct resource driver_exclusive_resource;	 /* driver exclusive resource ranges */
 
 	bool		match_driver;		/* Skip attaching driver */
-	struct lock_class_key cfg_access_key;
-	struct lockdep_map cfg_access_lock;
 
 	unsigned int	transparent:1;		/* Subtractive decode bridge */
 	unsigned int	io_window:1;		/* Bridge has I/O window */
-- 
cgit v1.2.3


From dc6abbbde4b099e936cd5428e196d86a5e119aae Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 3 Jun 2024 15:25:23 -0300
Subject: tools headers arm64: Sync arm64's cputype.h with the kernel sources

To get the changes in:

  0ce85db6c2141b7f ("arm64: cputype: Add Neoverse-V3 definitions")
  02a0a04676fa7796 ("arm64: cputype: Add Cortex-X4 definitions")
  f4d9d9dcc70b96b5 ("arm64: Add Neoverse-V2 part")

That makes this perf source code to be rebuilt:

  CC      /tmp/build/perf-tools/util/arm-spe.o

The changes in the above patch add MIDR_NEOVERSE_V[23] and
MIDR_NEOVERSE_V1 is used in arm-spe.c, so probably we need to add those
and perhaps MIDR_CORTEX_X4 to that array? Or maybe we need to leave this
for later when this is all tested on those machines?

  static const struct midr_range neoverse_spe[] = {
          MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
          MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
          MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
          {},
  };

Mark Rutland recommended about arm-spe.c:

"I would not touch this for now -- someone would have to go audit the
TRMs to check that those other cores have the same encoding, and I think
it'd be better to do that as a follow-up."

That addresses this perf build warning:

  Warning: Kernel ABI header differences:
    diff -u tools/arch/arm64/include/asm/cputype.h arch/arm64/include/asm/cputype.h

Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Besar Wicaksono <bwicaksono@nvidia.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/lkml/Zl8cYk0Tai2fs7aM@x1
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/arm64/include/asm/cputype.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index 52f076afeb96..7b32b99023a2 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -86,6 +86,9 @@
 #define ARM_CPU_PART_CORTEX_X2		0xD48
 #define ARM_CPU_PART_NEOVERSE_N2	0xD49
 #define ARM_CPU_PART_CORTEX_A78C	0xD4B
+#define ARM_CPU_PART_NEOVERSE_V2	0xD4F
+#define ARM_CPU_PART_CORTEX_X4		0xD82
+#define ARM_CPU_PART_NEOVERSE_V3	0xD84
 
 #define APM_CPU_PART_XGENE		0x000
 #define APM_CPU_VAR_POTENZA		0x00
@@ -159,6 +162,9 @@
 #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
 #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
 #define MIDR_CORTEX_A78C	MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
+#define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2)
+#define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4)
+#define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3)
 #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
-- 
cgit v1.2.3


From 0ea00e249ca992adee54dc71a526ee70ef109e40 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Wed, 29 May 2024 15:23:25 +0300
Subject: tpm_tis: Do *not* flush uninitialized work

tpm_tis_core_init() may fail before tpm_tis_probe_irq_single() is
called, in which case tpm_tis_remove() unconditionally calling
flush_work() is triggering a warning for .func still being NULL.

Cc: stable@vger.kernel.org # v6.5+
Fixes: 481c2d14627d ("tpm,tpm_tis: Disable interrupts after 1000 unhandled IRQs")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 176cd8dbf1db..fdef214b9f6b 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -1020,7 +1020,8 @@ void tpm_tis_remove(struct tpm_chip *chip)
 		interrupt = 0;
 
 	tpm_tis_write32(priv, reg, ~TPM_GLOBAL_INT_ENABLE & interrupt);
-	flush_work(&priv->free_irq_work);
+	if (priv->free_irq_work.func)
+		flush_work(&priv->free_irq_work);
 
 	tpm_tis_clkrun_enable(chip, false);
 
-- 
cgit v1.2.3


From f071d02ecad4cfbf3ab41807c90bd1fef1cbfd3f Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Wed, 5 Jun 2024 01:28:42 +0300
Subject: tpm: Switch to new Intel CPU model defines

New CPU #defines encode vendor and family as well as model.

Link: https://lore.kernel.org/all/20240520224620.9480-4-tony.luck@intel.com/
Signed-off-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm.h          | 2 +-
 drivers/char/tpm/tpm_tis_core.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 6b8b9956ba69..7bb87fa5f7a1 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -28,7 +28,7 @@
 #include <linux/tpm_eventlog.h>
 
 #ifdef CONFIG_X86
-#include <asm/intel-family.h>
+#include <asm/cpu_device_id.h>
 #endif
 
 #define TPM_MINOR		224	/* officially assigned */
diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
index 13e99cf65efe..690ad8e9b731 100644
--- a/drivers/char/tpm/tpm_tis_core.h
+++ b/drivers/char/tpm/tpm_tis_core.h
@@ -210,7 +210,7 @@ static inline int tpm_tis_verify_crc(struct tpm_tis_data *data, size_t len,
 static inline bool is_bsw(void)
 {
 #ifdef CONFIG_X86
-	return ((boot_cpu_data.x86_model == INTEL_FAM6_ATOM_AIRMONT) ? 1 : 0);
+	return (boot_cpu_data.x86_vfm == INTEL_ATOM_AIRMONT) ? 1 : 0;
 #else
 	return false;
 #endif
-- 
cgit v1.2.3


From d0d1df8ba18abc57f28fb3bc053b2bf319367f2c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 4 Jun 2024 17:00:24 +0200
Subject: bpf: Set run context for rawtp test_run callback

syzbot reported crash when rawtp program executed through the
test_run interface calls bpf_get_attach_cookie helper or any
other helper that touches task->bpf_ctx pointer.

Setting the run context (task->bpf_ctx pointer) for test_run
callback.

Fixes: 7adfc6c9b315 ("bpf: Add bpf_get_attach_cookie() BPF helper to access bpf_cookie value")
Reported-by: syzbot+3ab78ff125b7979e45f9@syzkaller.appspotmail.com
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Closes: https://syzkaller.appspot.com/bug?extid=3ab78ff125b7979e45f9
Link: https://lore.kernel.org/bpf/20240604150024.359247-1-jolsa@kernel.org
---
 net/bpf/test_run.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index f6aad4ed2ab2..36ae54f57bf5 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -727,10 +727,16 @@ static void
 __bpf_prog_test_run_raw_tp(void *data)
 {
 	struct bpf_raw_tp_test_run_info *info = data;
+	struct bpf_trace_run_ctx run_ctx = {};
+	struct bpf_run_ctx *old_run_ctx;
+
+	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
 
 	rcu_read_lock();
 	info->retval = bpf_prog_run(info->prog, info->ctx);
 	rcu_read_unlock();
+
+	bpf_reset_run_ctx(old_run_ctx);
 }
 
 int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
-- 
cgit v1.2.3


From 7fcf26b315bbb728036da0862de6b335da83dff2 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 4 Jun 2024 14:29:25 +0200
Subject: Revert "xsk: Support redirect to any socket bound to the same umem"

This reverts commit 2863d665ea41282379f108e4da6c8a2366ba66db.

This patch introduced a potential kernel crash when multiple napi instances
redirect to the same AF_XDP socket. By removing the queue_index check, it is
possible for multiple napi instances to access the Rx ring at the same time,
which will result in a corrupted ring state which can lead to a crash when
flushing the rings in __xsk_flush(). This can happen when the linked list of
sockets to flush gets corrupted by concurrent accesses. A quick and small fix
is not possible, so let us revert this for now.

Reported-by: Yuval El-Hanany <YuvalE@radware.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/xdp-newbies/8100DBDC-0B7C-49DB-9995-6027F6E63147@radware.com
Link: https://lore.kernel.org/bpf/20240604122927.29080-2-magnus.karlsson@gmail.com
---
 net/xdp/xsk.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 727aa20be4bd..7d1c0986f9bb 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -313,13 +313,10 @@ static bool xsk_is_bound(struct xdp_sock *xs)
 
 static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 {
-	struct net_device *dev = xdp->rxq->dev;
-	u32 qid = xdp->rxq->queue_index;
-
 	if (!xsk_is_bound(xs))
 		return -ENXIO;
 
-	if (!dev->_rx[qid].pool || xs->umem != dev->_rx[qid].pool->umem)
+	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
 		return -EINVAL;
 
 	if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) {
-- 
cgit v1.2.3


From 03e38d315f3c5258270ad50f2ae784b6372e87c3 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 4 Jun 2024 14:29:26 +0200
Subject: Revert "xsk: Document ability to redirect to any socket bound to the
 same umem"

This reverts commit 968595a93669b6b4f6d1fcf80cf2d97956b6868f.

Reported-by: Yuval El-Hanany <YuvalE@radware.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/xdp-newbies/8100DBDC-0B7C-49DB-9995-6027F6E63147@radware.com
Link: https://lore.kernel.org/bpf/20240604122927.29080-3-magnus.karlsson@gmail.com
---
 Documentation/networking/af_xdp.rst | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst
index 72da7057e4cf..dceeb0d763aa 100644
--- a/Documentation/networking/af_xdp.rst
+++ b/Documentation/networking/af_xdp.rst
@@ -329,24 +329,23 @@ XDP_SHARED_UMEM option and provide the initial socket's fd in the
 sxdp_shared_umem_fd field as you registered the UMEM on that
 socket. These two sockets will now share one and the same UMEM.
 
-In this case, it is possible to use the NIC's packet steering
-capabilities to steer the packets to the right queue. This is not
-possible in the previous example as there is only one queue shared
-among sockets, so the NIC cannot do this steering as it can only steer
-between queues.
-
-In libxdp (or libbpf prior to version 1.0), you need to use the
-xsk_socket__create_shared() API as it takes a reference to a FILL ring
-and a COMPLETION ring that will be created for you and bound to the
-shared UMEM. You can use this function for all the sockets you create,
-or you can use it for the second and following ones and use
-xsk_socket__create() for the first one. Both methods yield the same
-result.
+There is no need to supply an XDP program like the one in the previous
+case where sockets were bound to the same queue id and
+device. Instead, use the NIC's packet steering capabilities to steer
+the packets to the right queue. In the previous example, there is only
+one queue shared among sockets, so the NIC cannot do this steering. It
+can only steer between queues.
+
+In libbpf, you need to use the xsk_socket__create_shared() API as it
+takes a reference to a FILL ring and a COMPLETION ring that will be
+created for you and bound to the shared UMEM. You can use this
+function for all the sockets you create, or you can use it for the
+second and following ones and use xsk_socket__create() for the first
+one. Both methods yield the same result.
 
 Note that a UMEM can be shared between sockets on the same queue id
 and device, as well as between queues on the same device and between
-devices at the same time. It is also possible to redirect to any
-socket as long as it is bound to the same umem with XDP_SHARED_UMEM.
+devices at the same time.
 
 XDP_USE_NEED_WAKEUP bind flag
 -----------------------------
@@ -823,10 +822,6 @@ A: The short answer is no, that is not supported at the moment. The
    switch, or other distribution mechanism, in your NIC to direct
    traffic to the correct queue id and socket.
 
-   Note that if you are using the XDP_SHARED_UMEM option, it is
-   possible to switch traffic between any socket bound to the same
-   umem.
-
 Q: My packets are sometimes corrupted. What is wrong?
 
 A: Care has to be taken not to feed the same buffer in the UMEM into
-- 
cgit v1.2.3


From 99280413a5b785f22d91e8a8a66dc38f4a214495 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 4 Jun 2024 17:45:20 +0200
Subject: efi: Add missing __nocfi annotations to runtime wrappers

The EFI runtime wrappers are a sandbox for calling into EFI runtime
services, which are invoked using indirect calls. When running with kCFI
enabled, the compiler will require the target of any indirect call to be
type annotated.

Given that the EFI runtime services prototypes and calling convention
are governed by the EFI spec, not the Linux kernel, adding such type
annotations for firmware routines is infeasible, and so the compiler
must be informed that prototype validation should be omitted.

Add the __nocfi annotation at the appropriate places in the EFI runtime
wrapper code to achieve this.

Note that this currently only affects 32-bit ARM, given that other
architectures that support both kCFI and EFI use an asm wrapper to call
EFI runtime services, and this hides the indirect call from the
compiler.

Fixes: 1a4fec49efe5 ("ARM: 9392/2: Support CLANG CFI")
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/runtime-wrappers.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 5d56bc40a79d..708b777857d3 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -213,7 +213,7 @@ extern struct semaphore __efi_uv_runtime_lock __alias(efi_runtime_lock);
  * Calls the appropriate efi_runtime_service() with the appropriate
  * arguments.
  */
-static void efi_call_rts(struct work_struct *work)
+static void __nocfi efi_call_rts(struct work_struct *work)
 {
 	const union efi_rts_args *args = efi_rts_work.args;
 	efi_status_t status = EFI_NOT_FOUND;
@@ -435,7 +435,7 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
 	return status;
 }
 
-static efi_status_t
+static efi_status_t __nocfi
 virt_efi_set_variable_nb(efi_char16_t *name, efi_guid_t *vendor, u32 attr,
 			 unsigned long data_size, void *data)
 {
@@ -469,7 +469,7 @@ static efi_status_t virt_efi_query_variable_info(u32 attr,
 	return status;
 }
 
-static efi_status_t
+static efi_status_t __nocfi
 virt_efi_query_variable_info_nb(u32 attr, u64 *storage_space,
 				u64 *remaining_space, u64 *max_variable_size)
 {
@@ -499,10 +499,9 @@ static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
 	return status;
 }
 
-static void virt_efi_reset_system(int reset_type,
-				  efi_status_t status,
-				  unsigned long data_size,
-				  efi_char16_t *data)
+static void __nocfi
+virt_efi_reset_system(int reset_type, efi_status_t status,
+		      unsigned long data_size, efi_char16_t *data)
 {
 	if (down_trylock(&efi_runtime_lock)) {
 		pr_warn("failed to invoke the reset_system() runtime service:\n"
-- 
cgit v1.2.3


From 8b0f7410942cdc420c4557eda02bfcdf60ccec17 Mon Sep 17 00:00:00 2001
From: Subbaraya Sundeep <sbhatta@marvell.com>
Date: Wed, 29 May 2024 20:59:44 +0530
Subject: octeontx2-af: Always allocate PF entries from low prioriy zone

PF mcam entries has to be at low priority always so that VF
can install longest prefix match rules at higher priority.
This was taken care currently but when priority allocation
wrt reference entry is requested then entries are allocated
from mid-zone instead of low priority zone. Fix this and
always allocate entries from low priority zone for PFs.

Fixes: 7df5b4b260dd ("octeontx2-af: Allocate low priority entries for PF")
Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/octeontx2/af/rvu_npc.c    | 33 ++++++++++++++--------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index e8b73b9d75e3..97722ce8c4cb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -2519,7 +2519,17 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc,
 	 * - when available free entries are less.
 	 * Lower priority ones out of avaialble free entries are always
 	 * chosen when 'high vs low' question arises.
+	 *
+	 * For a VF base MCAM match rule is set by its PF. And all the
+	 * further MCAM rules installed by VF on its own are
+	 * concatenated with the base rule set by its PF. Hence PF entries
+	 * should be at lower priority compared to VF entries. Otherwise
+	 * base rule is hit always and rules installed by VF will be of
+	 * no use. Hence if the request is from PF then allocate low
+	 * priority entries.
 	 */
+	if (!(pcifunc & RVU_PFVF_FUNC_MASK))
+		goto lprio_alloc;
 
 	/* Get the search range for priority allocation request */
 	if (req->priority) {
@@ -2528,17 +2538,6 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc,
 		goto alloc;
 	}
 
-	/* For a VF base MCAM match rule is set by its PF. And all the
-	 * further MCAM rules installed by VF on its own are
-	 * concatenated with the base rule set by its PF. Hence PF entries
-	 * should be at lower priority compared to VF entries. Otherwise
-	 * base rule is hit always and rules installed by VF will be of
-	 * no use. Hence if the request is from PF and NOT a priority
-	 * allocation request then allocate low priority entries.
-	 */
-	if (!(pcifunc & RVU_PFVF_FUNC_MASK))
-		goto lprio_alloc;
-
 	/* Find out the search range for non-priority allocation request
 	 *
 	 * Get MCAM free entry count in middle zone.
@@ -2568,6 +2567,18 @@ lprio_alloc:
 		reverse = true;
 		start = 0;
 		end = mcam->bmap_entries;
+		/* Ensure PF requests are always at bottom and if PF requests
+		 * for higher/lower priority entry wrt reference entry then
+		 * honour that criteria and start search for entries from bottom
+		 * and not in mid zone.
+		 */
+		if (!(pcifunc & RVU_PFVF_FUNC_MASK) &&
+		    req->priority == NPC_MCAM_HIGHER_PRIO)
+			end = req->ref_entry;
+
+		if (!(pcifunc & RVU_PFVF_FUNC_MASK) &&
+		    req->priority == NPC_MCAM_LOWER_PRIO)
+			start = req->ref_entry;
 	}
 
 alloc:
-- 
cgit v1.2.3


From fb0aa0781a5f457e3864da68af52c3b1f4f7fd8f Mon Sep 17 00:00:00 2001
From: Wen Gu <guwen@linux.alibaba.com>
Date: Fri, 31 May 2024 16:54:17 +0800
Subject: net/smc: avoid overwriting when adjusting sock bufsizes

When copying smc settings to clcsock, avoid setting clcsock's sk_sndbuf
to sysctl_tcp_wmem[1], since this may overwrite the value set by
tcp_sndbuf_expand() in TCP connection establishment.

And the other setting sk_{snd|rcv}buf to sysctl value in
smc_adjust_sock_bufsizes() can also be omitted since the initialization
of smc sock and clcsock has set sk_{snd|rcv}buf to smc.sysctl_{w|r}mem
or ipv4_sysctl_tcp_{w|r}mem[1].

Fixes: 30c3c4a4497c ("net/smc: Use correct buffer sizes when switching between TCP and SMC")
Link: https://lore.kernel.org/r/5eaf3858-e7fd-4db8-83e8-3d7a3e0e9ae2@linux.alibaba.com
Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com>, too.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index e50a286fd0fb..c5f98c6b2561 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -459,29 +459,11 @@ out:
 static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
 				     unsigned long mask)
 {
-	struct net *nnet = sock_net(nsk);
-
 	nsk->sk_userlocks = osk->sk_userlocks;
-	if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+	if (osk->sk_userlocks & SOCK_SNDBUF_LOCK)
 		nsk->sk_sndbuf = osk->sk_sndbuf;
-	} else {
-		if (mask == SK_FLAGS_SMC_TO_CLC)
-			WRITE_ONCE(nsk->sk_sndbuf,
-				   READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
-		else
-			WRITE_ONCE(nsk->sk_sndbuf,
-				   2 * READ_ONCE(nnet->smc.sysctl_wmem));
-	}
-	if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
+	if (osk->sk_userlocks & SOCK_RCVBUF_LOCK)
 		nsk->sk_rcvbuf = osk->sk_rcvbuf;
-	} else {
-		if (mask == SK_FLAGS_SMC_TO_CLC)
-			WRITE_ONCE(nsk->sk_rcvbuf,
-				   READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
-		else
-			WRITE_ONCE(nsk->sk_rcvbuf,
-				   2 * READ_ONCE(nnet->smc.sysctl_rmem));
-	}
 }
 
 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
-- 
cgit v1.2.3


From 0a8d3f2e3e8d8aea8af017e14227b91d5989b696 Mon Sep 17 00:00:00 2001
From: Tristram Ha <tristram.ha@microchip.com>
Date: Thu, 30 May 2024 18:38:01 -0700
Subject: net: phy: Micrel KSZ8061: fix errata solution not taking effect
 problem

KSZ8061 needs to write to a MMD register at driver initialization to fix
an errata.  This worked in 5.0 kernel but not in newer kernels.  The
issue is the main phylib code no longer resets PHY at the very beginning.
Calling phy resuming code later will reset the chip if it is already
powered down at the beginning.  This wipes out the MMD register write.
Solution is to implement a phy resume function for KSZ8061 to take care
of this problem.

Fixes: 232ba3a51cc2 ("net: phy: Micrel KSZ8061: link failure after cable connect")
Signed-off-by: Tristram Ha <tristram.ha@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 8c20cf937530..5aada7cf3da7 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -866,6 +866,17 @@ static int ksz8061_config_init(struct phy_device *phydev)
 {
 	int ret;
 
+	/* Chip can be powered down by the bootstrap code. */
+	ret = phy_read(phydev, MII_BMCR);
+	if (ret < 0)
+		return ret;
+	if (ret & BMCR_PDOWN) {
+		ret = phy_write(phydev, MII_BMCR, ret & ~BMCR_PDOWN);
+		if (ret < 0)
+			return ret;
+		usleep_range(1000, 2000);
+	}
+
 	ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_DEVID1, 0xB61A);
 	if (ret)
 		return ret;
@@ -2135,6 +2146,35 @@ static int ksz9477_resume(struct phy_device *phydev)
 	return 0;
 }
 
+static int ksz8061_resume(struct phy_device *phydev)
+{
+	int ret;
+
+	/* This function can be called twice when the Ethernet device is on. */
+	ret = phy_read(phydev, MII_BMCR);
+	if (ret < 0)
+		return ret;
+	if (!(ret & BMCR_PDOWN))
+		return 0;
+
+	genphy_resume(phydev);
+	usleep_range(1000, 2000);
+
+	/* Re-program the value after chip is reset. */
+	ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_DEVID1, 0xB61A);
+	if (ret)
+		return ret;
+
+	/* Enable PHY Interrupts */
+	if (phy_interrupt_is_valid(phydev)) {
+		phydev->interrupts = PHY_INTERRUPT_ENABLED;
+		if (phydev->drv->config_intr)
+			phydev->drv->config_intr(phydev);
+	}
+
+	return 0;
+}
+
 static int kszphy_probe(struct phy_device *phydev)
 {
 	const struct kszphy_type *type = phydev->drv->driver_data;
@@ -5389,7 +5429,7 @@ static struct phy_driver ksphy_driver[] = {
 	.config_intr	= kszphy_config_intr,
 	.handle_interrupt = kszphy_handle_interrupt,
 	.suspend	= kszphy_suspend,
-	.resume		= kszphy_resume,
+	.resume		= ksz8061_resume,
 }, {
 	.phy_id		= PHY_ID_KSZ9021,
 	.phy_id_mask	= 0x000ffffe,
-- 
cgit v1.2.3


From 491aee894a08bc9b8bb52e7363b9d4bc6403f363 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Mon, 3 Jun 2024 04:57:55 +0000
Subject: ionic: fix kernel panic in XDP_TX action

In the XDP_TX path, ionic driver sends a packet to the TX path with rx
page and corresponding dma address.
After tx is done, ionic_tx_clean() frees that page.
But RX ring buffer isn't reset to NULL.
So, it uses a freed page, which causes kernel panic.

BUG: unable to handle page fault for address: ffff8881576c110c
PGD 773801067 P4D 773801067 PUD 87f086067 PMD 87efca067 PTE 800ffffea893e060
Oops: Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC KASAN NOPTI
CPU: 1 PID: 25 Comm: ksoftirqd/1 Not tainted 6.9.0+ #11
Hardware name: ASUS System Product Name/PRIME Z690-P D4, BIOS 0603 11/01/2021
RIP: 0010:bpf_prog_f0b8caeac1068a55_balancer_ingress+0x3b/0x44f
Code: 00 53 41 55 41 56 41 57 b8 01 00 00 00 48 8b 5f 08 4c 8b 77 00 4c 89 f7 48 83 c7 0e 48 39 d8
RSP: 0018:ffff888104e6fa28 EFLAGS: 00010283
RAX: 0000000000000002 RBX: ffff8881576c1140 RCX: 0000000000000002
RDX: ffffffffc0051f64 RSI: ffffc90002d33048 RDI: ffff8881576c110e
RBP: ffff888104e6fa88 R08: 0000000000000000 R09: ffffed1027a04a23
R10: 0000000000000000 R11: 0000000000000000 R12: ffff8881b03a21a8
R13: ffff8881589f800f R14: ffff8881576c1100 R15: 00000001576c1100
FS: 0000000000000000(0000) GS:ffff88881ae00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffff8881576c110c CR3: 0000000767a90000 CR4: 00000000007506f0
PKRU: 55555554
Call Trace:
<TASK>
? __die+0x20/0x70
? page_fault_oops+0x254/0x790
? __pfx_page_fault_oops+0x10/0x10
? __pfx_is_prefetch.constprop.0+0x10/0x10
? search_bpf_extables+0x165/0x260
? fixup_exception+0x4a/0x970
? exc_page_fault+0xcb/0xe0
? asm_exc_page_fault+0x22/0x30
? 0xffffffffc0051f64
? bpf_prog_f0b8caeac1068a55_balancer_ingress+0x3b/0x44f
? do_raw_spin_unlock+0x54/0x220
ionic_rx_service+0x11ab/0x3010 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
? ionic_tx_clean+0x29b/0xc60 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
? __pfx_ionic_tx_clean+0x10/0x10 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
? __pfx_ionic_rx_service+0x10/0x10 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
? ionic_tx_cq_service+0x25d/0xa00 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
? __pfx_ionic_rx_service+0x10/0x10 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
ionic_cq_service+0x69/0x150 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
ionic_txrx_napi+0x11a/0x540 [ionic 9180c3001ab627d82bbc5f3ebe8a0decaf6bb864]
__napi_poll.constprop.0+0xa0/0x440
net_rx_action+0x7e7/0xc30
? __pfx_net_rx_action+0x10/0x10

Fixes: 8eeed8373e1c ("ionic: Add XDP_TX support")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Reviewed-by: Shannon Nelson <shannon.nelson@amd.com>
Reviewed-by: Brett Creeley <brett.creeley@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 5dba6d2d633c..2427610f4306 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -586,6 +586,7 @@ static bool ionic_run_xdp(struct ionic_rx_stats *stats,
 			netdev_dbg(netdev, "tx ionic_xdp_post_frame err %d\n", err);
 			goto out_xdp_abort;
 		}
+		buf_info->page = NULL;
 		stats->xdp_tx++;
 
 		/* the Tx completion will free the buffers */
-- 
cgit v1.2.3


From affc18fdc694190ca7575b9a86632a73b9fe043d Mon Sep 17 00:00:00 2001
From: Hangyu Hua <hbh25y@gmail.com>
Date: Mon, 3 Jun 2024 15:13:03 +0800
Subject: net: sched: sch_multiq: fix possible OOB write in multiq_tune()

q->bands will be assigned to qopt->bands to execute subsequent code logic
after kmalloc. So the old q->bands should not be used in kmalloc.
Otherwise, an out-of-bounds write will occur.

Fixes: c2999f7fb05b ("net: sched: multiq: don't call qdisc_put() while holding tree lock")
Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
Acked-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_multiq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 79e93a19d5fa..06e03f5cd7ce 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -185,7 +185,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
 
 	qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
 
-	removed = kmalloc(sizeof(*removed) * (q->max_bands - q->bands),
+	removed = kmalloc(sizeof(*removed) * (q->max_bands - qopt->bands),
 			  GFP_KERNEL);
 	if (!removed)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 1cd4bc987abb2823836cbb8f887026011ccddc8a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 3 Jun 2024 10:59:26 +0200
Subject: vxlan: Fix regression when dropping packets due to invalid src
 addresses

Commit f58f45c1e5b9 ("vxlan: drop packets from invalid src-address")
has recently been added to vxlan mainly in the context of source
address snooping/learning so that when it is enabled, an entry in the
FDB is not being created for an invalid address for the corresponding
tunnel endpoint.

Before commit f58f45c1e5b9 vxlan was similarly behaving as geneve in
that it passed through whichever macs were set in the L2 header. It
turns out that this change in behavior breaks setups, for example,
Cilium with netkit in L3 mode for Pods as well as tunnel mode has been
passing before the change in f58f45c1e5b9 for both vxlan and geneve.
After mentioned change it is only passing for geneve as in case of
vxlan packets are dropped due to vxlan_set_mac() returning false as
source and destination macs are zero which for E/W traffic via tunnel
is totally fine.

Fix it by only opting into the is_valid_ether_addr() check in
vxlan_set_mac() when in fact source address snooping/learning is
actually enabled in vxlan. This is done by moving the check into
vxlan_snoop(). With this change, the Cilium connectivity test suite
passes again for both tunnel flavors.

Fixes: f58f45c1e5b9 ("vxlan: drop packets from invalid src-address")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Bauer <mail@david-bauer.net>
Cc: Ido Schimmel <idosch@nvidia.com>
Cc: Nikolay Aleksandrov <razor@blackwall.org>
Cc: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: David Bauer <mail@david-bauer.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan/vxlan_core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index f78dd0438843..567cb3faab70 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -1446,6 +1446,10 @@ static bool vxlan_snoop(struct net_device *dev,
 	struct vxlan_fdb *f;
 	u32 ifindex = 0;
 
+	/* Ignore packets from invalid src-address */
+	if (!is_valid_ether_addr(src_mac))
+		return true;
+
 #if IS_ENABLED(CONFIG_IPV6)
 	if (src_ip->sa.sa_family == AF_INET6 &&
 	    (ipv6_addr_type(&src_ip->sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL))
@@ -1616,10 +1620,6 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan,
 	if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
 		return false;
 
-	/* Ignore packets from invalid src-address */
-	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
-		return false;
-
 	/* Get address from the outer IP header */
 	if (vxlan_get_sk_family(vs) == AF_INET) {
 		saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
-- 
cgit v1.2.3


From 712115a24b1a5318c10fc757d48d8f33815a6bfa Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 3 Jun 2024 17:30:19 +0800
Subject: selftests: hsr: add missing config for CONFIG_BRIDGE

hsr_redbox.sh test need to create bridge for testing. Add the missing
config CONFIG_BRIDGE in config file.

Fixes: eafbf0574e05 ("test: hsr: Extend the hsr_redbox.sh to have more SAN devices connected")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Tested-by: Simon Horman <horms@kernel.org>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/hsr/config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config
index 22061204fb69..241542441c51 100644
--- a/tools/testing/selftests/net/hsr/config
+++ b/tools/testing/selftests/net/hsr/config
@@ -2,3 +2,4 @@ CONFIG_IPV6=y
 CONFIG_NET_SCH_NETEM=m
 CONFIG_HSR=y
 CONFIG_VETH=y
+CONFIG_BRIDGE=y
-- 
cgit v1.2.3


From 14951beaec93696b092a906baa0f29322cf34004 Mon Sep 17 00:00:00 2001
From: Wei Li <liwei391@huawei.com>
Date: Tue, 23 Apr 2024 17:35:01 +0800
Subject: arm64: armv8_deprecated: Fix warning in isndep cpuhp starting process

The function run_all_insn_set_hw_mode() is registered as startup callback
of 'CPUHP_AP_ARM64_ISNDEP_STARTING', it invokes set_hw_mode() methods of
all emulated instructions.

As the STARTING callbacks are not expected to fail, if one of the
set_hw_mode() fails, e.g. due to el0 mixed-endian is not supported for
'setend', it will report a warning:

```
CPU[2] cannot support the emulation of setend
CPU 2 UP state arm64/isndep:starting (136) failed (-22)
CPU2: Booted secondary processor 0x0000000002 [0x414fd0c1]
```

To fix it, add a check for INSN_UNAVAILABLE status and skip the process.

Signed-off-by: Wei Li <liwei391@huawei.com>
Tested-by: Huisong Li <lihuisong@huawei.com>
Link: https://lore.kernel.org/r/20240423093501.3460764-1-liwei391@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/armv8_deprecated.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index dd6ce86d4332..b776e7424fe9 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -462,6 +462,9 @@ static int run_all_insn_set_hw_mode(unsigned int cpu)
 	for (int i = 0; i < ARRAY_SIZE(insn_emulations); i++) {
 		struct insn_emulation *insn = insn_emulations[i];
 		bool enable = READ_ONCE(insn->current_mode) == INSN_HW;
+		if (insn->status == INSN_UNAVAILABLE)
+			continue;
+
 		if (insn->set_hw_mode && insn->set_hw_mode(enable)) {
 			pr_warn("CPU[%u] cannot support the emulation of %s",
 				cpu, insn->name);
-- 
cgit v1.2.3


From db574f2f96d0c9a245a9e787e3d9ec288fb2b445 Mon Sep 17 00:00:00 2001
From: Tao Su <tao1.su@linux.intel.com>
Date: Tue, 28 May 2024 18:22:34 +0800
Subject: KVM: x86/mmu: Don't save mmu_invalidate_seq after checking private
 attr

Drop the second snapshot of mmu_invalidate_seq in kvm_faultin_pfn().
Before checking the mismatch of private vs. shared, mmu_invalidate_seq is
saved to fault->mmu_seq, which can be used to detect an invalidation
related to the gfn occurred, i.e. KVM will not install a mapping in page
table if fault->mmu_seq != mmu_invalidate_seq.

Currently there is a second snapshot of mmu_invalidate_seq, which may not
be same as the first snapshot in kvm_faultin_pfn(), i.e. the gfn attribute
may be changed between the two snapshots, but the gfn may be mapped in
page table without hindrance. Therefore, drop the second snapshot as it
has no obvious benefits.

Fixes: f6adeae81f35 ("KVM: x86/mmu: Handle no-slot faults at the beginning of kvm_faultin_pfn()")
Signed-off-by: Tao Su <tao1.su@linux.intel.com>
Message-ID: <20240528102234.2162763-1-tao1.su@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b7b4426a7221..8d74bdef68c1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4411,9 +4411,6 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 			return RET_PF_EMULATE;
 	}
 
-	fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq;
-	smp_rmb();
-
 	/*
 	 * Check for a relevant mmu_notifier invalidation event before getting
 	 * the pfn from the primary MMU, and before acquiring mmu_lock.
-- 
cgit v1.2.3


From a46d0ea5c94205f40ecf912d1bb7806a8a64704f Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Tue, 4 Jun 2024 01:02:16 +0800
Subject: tcp: count CLOSE-WAIT sockets for TCP_MIB_CURRESTAB

According to RFC 1213, we should also take CLOSE-WAIT sockets into
consideration:

  "tcpCurrEstab OBJECT-TYPE
   ...
   The number of TCP connections for which the current state
   is either ESTABLISHED or CLOSE- WAIT."

After this, CurrEstab counter will display the total number of
ESTABLISHED and CLOSE-WAIT sockets.

The logic of counting
When we increment the counter?
a) if we change the state to ESTABLISHED.
b) if we change the state from SYN-RECEIVED to CLOSE-WAIT.

When we decrement the counter?
a) if the socket leaves ESTABLISHED and will never go into CLOSE-WAIT,
say, on the client side, changing from ESTABLISHED to FIN-WAIT-1.
b) if the socket leaves CLOSE-WAIT, say, on the server side, changing
from CLOSE-WAIT to LAST-ACK.

Please note: there are two chances that old state of socket can be changed
to CLOSE-WAIT in tcp_fin(). One is SYN-RECV, the other is ESTABLISHED.
So we have to take care of the former case.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4d8cc2ebb64c..e6790ea74877 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2649,6 +2649,10 @@ void tcp_set_state(struct sock *sk, int state)
 		if (oldstate != TCP_ESTABLISHED)
 			TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
 		break;
+	case TCP_CLOSE_WAIT:
+		if (oldstate == TCP_SYN_RECV)
+			TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
+		break;
 
 	case TCP_CLOSE:
 		if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
@@ -2660,7 +2664,7 @@ void tcp_set_state(struct sock *sk, int state)
 			inet_put_port(sk);
 		fallthrough;
 	default:
-		if (oldstate == TCP_ESTABLISHED)
+		if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT)
 			TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
 	}
 
-- 
cgit v1.2.3


From 9633e9377e6af0244f7381e86b9aac5276f5be97 Mon Sep 17 00:00:00 2001
From: Jason Xing <kernelxing@tencent.com>
Date: Tue, 4 Jun 2024 01:02:17 +0800
Subject: mptcp: count CLOSE-WAIT sockets for MPTCP_MIB_CURRESTAB

Like previous patch does in TCP, we need to adhere to RFC 1213:

  "tcpCurrEstab OBJECT-TYPE
   ...
   The number of TCP connections for which the current state
   is either ESTABLISHED or CLOSE- WAIT."

So let's consider CLOSE-WAIT sockets.

The logic of counting
When we increment the counter?
a) Only if we change the state to ESTABLISHED.

When we decrement the counter?
a) if the socket leaves ESTABLISHED and will never go into CLOSE-WAIT,
say, on the client side, changing from ESTABLISHED to FIN-WAIT-1.
b) if the socket leaves CLOSE-WAIT, say, on the server side, changing
from CLOSE-WAIT to LAST-ACK.

Fixes: d9cd27b8cd19 ("mptcp: add CurrEstab MIB counter support")
Signed-off-by: Jason Xing <kernelxing@tencent.com>
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 7d44196ec5b6..96b113854bd3 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2916,9 +2916,14 @@ void mptcp_set_state(struct sock *sk, int state)
 		if (oldstate != TCP_ESTABLISHED)
 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
 		break;
-
+	case TCP_CLOSE_WAIT:
+		/* Unlike TCP, MPTCP sk would not have the TCP_SYN_RECV state:
+		 * MPTCP "accepted" sockets will be created later on. So no
+		 * transition from TCP_SYN_RECV to TCP_CLOSE_WAIT.
+		 */
+		break;
 	default:
-		if (oldstate == TCP_ESTABLISHED)
+		if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT)
 			MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
 	}
 
-- 
cgit v1.2.3


From 5b4b62a169e10401cca34a6e7ac39161986f5605 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 3 Jun 2024 11:48:26 -0700
Subject: rtnetlink: make the "split" NLM_DONE handling generic

Jaroslav reports Dell's OMSA Systems Management Data Engine
expects NLM_DONE in a separate recvmsg(), both for rtnl_dump_ifinfo()
and inet_dump_ifaddr(). We already added a similar fix previously in
commit 460b0d33cf10 ("inet: bring NLM_DONE out to a separate recv() again")

Instead of modifying all the dump handlers, and making them look
different than modern for_each_netdev_dump()-based dump handlers -
put the workaround in rtnetlink code. This will also help us move
the custom rtnl-locking from af_netlink in the future (in net-next).

Note that this change is not touching rtnl_dump_all(). rtnl_dump_all()
is different kettle of fish and a potential problem. We now mix families
in a single recvmsg(), but NLM_DONE is not coalesced.

Tested:

  ./cli.py --dbg-small-recv 4096 --spec netlink/specs/rt_addr.yaml \
           --dump getaddr --json '{"ifa-family": 2}'

  ./cli.py --dbg-small-recv 4096 --spec netlink/specs/rt_route.yaml \
           --dump getroute --json '{"rtm-family": 2}'

  ./cli.py --dbg-small-recv 4096 --spec netlink/specs/rt_link.yaml \
           --dump getlink

Fixes: 3e41af90767d ("rtnetlink: use xarray iterator to implement rtnl_dump_ifinfo()")
Fixes: cdb2f80f1c10 ("inet: use xa_array iterator to implement inet_dump_ifaddr()")
Reported-by: Jaroslav Pulchart <jaroslav.pulchart@gooddata.com>
Link: https://lore.kernel.org/all/CAK8fFZ7MKoFSEzMBDAOjoUt+vTZRRQgLDNXEOfdCCXSoXXKE0g@mail.gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rtnetlink.h |  1 +
 net/core/rtnetlink.c    | 44 ++++++++++++++++++++++++++++++++++++++++++--
 net/ipv4/devinet.c      |  2 +-
 net/ipv4/fib_frontend.c |  7 +------
 4 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 3bfb80bad173..b45d57b5968a 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -13,6 +13,7 @@ enum rtnl_link_flags {
 	RTNL_FLAG_DOIT_UNLOCKED		= BIT(0),
 	RTNL_FLAG_BULK_DEL_SUPPORTED	= BIT(1),
 	RTNL_FLAG_DUMP_UNLOCKED		= BIT(2),
+	RTNL_FLAG_DUMP_SPLIT_NLM_DONE	= BIT(3),	/* legacy behavior */
 };
 
 enum rtnl_kinds {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b86b0a87367d..4668d6718040 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -6484,6 +6484,46 @@ static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 /* Process one rtnetlink message. */
 
+static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	rtnl_dumpit_func dumpit = cb->data;
+	int err;
+
+	/* Previous iteration have already finished, avoid calling->dumpit()
+	 * again, it may not expect to be called after it reached the end.
+	 */
+	if (!dumpit)
+		return 0;
+
+	err = dumpit(skb, cb);
+
+	/* Old dump handlers used to send NLM_DONE as in a separate recvmsg().
+	 * Some applications which parse netlink manually depend on this.
+	 */
+	if (cb->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) {
+		if (err < 0 && err != -EMSGSIZE)
+			return err;
+		if (!err)
+			cb->data = NULL;
+
+		return skb->len;
+	}
+	return err;
+}
+
+static int rtnetlink_dump_start(struct sock *ssk, struct sk_buff *skb,
+				const struct nlmsghdr *nlh,
+				struct netlink_dump_control *control)
+{
+	if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) {
+		WARN_ON(control->data);
+		control->data = control->dump;
+		control->dump = rtnl_dumpit;
+	}
+
+	return netlink_dump_start(ssk, skb, nlh, control);
+}
+
 static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 			     struct netlink_ext_ack *extack)
 {
@@ -6548,7 +6588,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 				.module		= owner,
 				.flags		= flags,
 			};
-			err = netlink_dump_start(rtnl, skb, nlh, &c);
+			err = rtnetlink_dump_start(rtnl, skb, nlh, &c);
 			/* netlink_dump_start() will keep a reference on
 			 * module if dump is still in progress.
 			 */
@@ -6694,7 +6734,7 @@ void __init rtnetlink_init(void)
 	register_netdevice_notifier(&rtnetlink_dev_notifier);
 
 	rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
-		      rtnl_dump_ifinfo, 0);
+		      rtnl_dump_ifinfo, RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
 	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0);
 	rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f3892ee9dfb3..d09f557eaa77 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2805,7 +2805,7 @@ void __init devinet_init(void)
 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr,
-		      RTNL_FLAG_DUMP_UNLOCKED);
+		      RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
 		      inet_netconf_dump_devconf,
 		      RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c484b1c0fc00..7ad2cafb9276 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1050,11 +1050,6 @@ next:
 			e++;
 		}
 	}
-
-	/* Don't let NLM_DONE coalesce into a message, even if it could.
-	 * Some user space expects NLM_DONE in a separate recv().
-	 */
-	err = skb->len;
 out:
 
 	cb->args[1] = e;
@@ -1665,5 +1660,5 @@ void __init ip_fib_init(void)
 	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0);
 	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0);
 	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib,
-		      RTNL_FLAG_DUMP_UNLOCKED);
+		      RTNL_FLAG_DUMP_UNLOCKED | RTNL_FLAG_DUMP_SPLIT_NLM_DONE);
 }
-- 
cgit v1.2.3


From 5c40e428aea644c9d924e491b1bc22fa9f272bcc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 4 Jun 2024 22:59:57 +0200
Subject: arm64/io: add constant-argument check

In some configurations __const_iowrite32_copy() does not get inlined
and gcc runs into the BUILD_BUG():

In file included from <command-line>:
In function '__const_memcpy_toio_aligned32',
    inlined from '__const_iowrite32_copy' at arch/arm64/include/asm/io.h:203:3,
    inlined from '__const_iowrite32_copy' at arch/arm64/include/asm/io.h:199:20:
include/linux/compiler_types.h:487:45: error: call to '__compiletime_assert_538' declared with attribute error: BUILD_BUG failed
  487 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
      |                                             ^
include/linux/compiler_types.h:468:25: note: in definition of macro '__compiletime_assert'
  468 |                         prefix ## suffix();                             \
      |                         ^~~~~~
include/linux/compiler_types.h:487:9: note: in expansion of macro '_compiletime_assert'
  487 |         _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
      |         ^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:39:37: note: in expansion of macro 'compiletime_assert'
   39 | #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
      |                                     ^~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:59:21: note: in expansion of macro 'BUILD_BUG_ON_MSG'
   59 | #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
      |                     ^~~~~~~~~~~~~~~~
arch/arm64/include/asm/io.h:193:17: note: in expansion of macro 'BUILD_BUG'
  193 |                 BUILD_BUG();
      |                 ^~~~~~~~~

Move the check for constant arguments into the inline function to ensure
it is still constant if the compiler decides against inlining it, and
mark them as __always_inline to override the logic that sometimes leads
to the compiler not producing the simplified output.

Note that either the __always_inline annotation or the check for a
constant value are sufficient here, but combining the two looks cleaner
as it also avoids the macro. With clang-8 and older, the macro was still
needed, but all versions of gcc and clang can reliably perform constant
folding here.

Fixes: ead79118dae6 ("arm64/io: Provide a WC friendly __iowriteXX_copy()")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20240604210006.668912-1-arnd@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/io.h | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 4ff0ae3f6d66..41fd90895dfc 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -153,8 +153,9 @@ extern void __memset_io(volatile void __iomem *, int, size_t);
  * emit the large TLP from the CPU.
  */
 
-static inline void __const_memcpy_toio_aligned32(volatile u32 __iomem *to,
-						 const u32 *from, size_t count)
+static __always_inline void
+__const_memcpy_toio_aligned32(volatile u32 __iomem *to, const u32 *from,
+			      size_t count)
 {
 	switch (count) {
 	case 8:
@@ -196,24 +197,22 @@ static inline void __const_memcpy_toio_aligned32(volatile u32 __iomem *to,
 
 void __iowrite32_copy_full(void __iomem *to, const void *from, size_t count);
 
-static inline void __const_iowrite32_copy(void __iomem *to, const void *from,
-					  size_t count)
+static __always_inline void
+__iowrite32_copy(void __iomem *to, const void *from, size_t count)
 {
-	if (count == 8 || count == 4 || count == 2 || count == 1) {
+	if (__builtin_constant_p(count) &&
+	    (count == 8 || count == 4 || count == 2 || count == 1)) {
 		__const_memcpy_toio_aligned32(to, from, count);
 		dgh();
 	} else {
 		__iowrite32_copy_full(to, from, count);
 	}
 }
+#define __iowrite32_copy __iowrite32_copy
 
-#define __iowrite32_copy(to, from, count)                  \
-	(__builtin_constant_p(count) ?                     \
-		 __const_iowrite32_copy(to, from, count) : \
-		 __iowrite32_copy_full(to, from, count))
-
-static inline void __const_memcpy_toio_aligned64(volatile u64 __iomem *to,
-						 const u64 *from, size_t count)
+static __always_inline void
+__const_memcpy_toio_aligned64(volatile u64 __iomem *to, const u64 *from,
+			      size_t count)
 {
 	switch (count) {
 	case 8:
@@ -255,21 +254,18 @@ static inline void __const_memcpy_toio_aligned64(volatile u64 __iomem *to,
 
 void __iowrite64_copy_full(void __iomem *to, const void *from, size_t count);
 
-static inline void __const_iowrite64_copy(void __iomem *to, const void *from,
-					  size_t count)
+static __always_inline void
+__iowrite64_copy(void __iomem *to, const void *from, size_t count)
 {
-	if (count == 8 || count == 4 || count == 2 || count == 1) {
+	if (__builtin_constant_p(count) &&
+	    (count == 8 || count == 4 || count == 2 || count == 1)) {
 		__const_memcpy_toio_aligned64(to, from, count);
 		dgh();
 	} else {
 		__iowrite64_copy_full(to, from, count);
 	}
 }
-
-#define __iowrite64_copy(to, from, count)                  \
-	(__builtin_constant_p(count) ?                     \
-		 __const_iowrite64_copy(to, from, count) : \
-		 __iowrite64_copy_full(to, from, count))
+#define __iowrite64_copy __iowrite64_copy
 
 /*
  * I/O memory mapping functions.
-- 
cgit v1.2.3


From c57e558194430d10d5e5f4acd8a8655b68dade13 Mon Sep 17 00:00:00 2001
From: Frank Wunderlich <frank-w@public-files.de>
Date: Mon, 3 Jun 2024 21:25:05 +0200
Subject: net: ethernet: mtk_eth_soc: handle dma buffer size soc specific

The mainline MTK ethernet driver suffers long time from rarly but
annoying tx queue timeouts. We think that this is caused by fixed
dma sizes hardcoded for all SoCs.

We suspect this problem arises from a low level of free TX DMADs,
the TX Ring alomost full.

The transmit timeout is caused by the Tx queue not waking up. The
Tx queue stops when the free counter is less than ring->thres, and
it will wake up once the free counter is greater than ring->thres.
If the CPU is too late to wake up the Tx queues, it may cause a
transmit timeout.
Therefore, we increased the TX and RX DMADs to improve this error
situation.

Use the dma-size implementation from SDK in a per SoC manner. In
difference to SDK we have no RSS feature yet, so all RX/TX sizes
should be raised from 512 to 2048 byte except fqdma on mt7988 to
avoid the tx timeout issue.

Fixes: 656e705243fd ("net-next: mediatek: add support for MT7623 ethernet")
Suggested-by: Daniel Golle <daniel@makrotopia.org>
Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 104 +++++++++++++++++++---------
 drivers/net/ethernet/mediatek/mtk_eth_soc.h |   9 ++-
 2 files changed, 77 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index cae46290a7ae..c84ce54a84a0 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1131,9 +1131,9 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
 {
 	const struct mtk_soc_data *soc = eth->soc;
 	dma_addr_t phy_ring_tail;
-	int cnt = MTK_QDMA_RING_SIZE;
+	int cnt = soc->tx.fq_dma_size;
 	dma_addr_t dma_addr;
-	int i;
+	int i, j, len;
 
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SRAM))
 		eth->scratch_ring = eth->sram_base;
@@ -1142,40 +1142,46 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
 						       cnt * soc->tx.desc_size,
 						       &eth->phy_scratch_ring,
 						       GFP_KERNEL);
+
 	if (unlikely(!eth->scratch_ring))
 		return -ENOMEM;
 
-	eth->scratch_head = kcalloc(cnt, MTK_QDMA_PAGE_SIZE, GFP_KERNEL);
-	if (unlikely(!eth->scratch_head))
-		return -ENOMEM;
+	phy_ring_tail = eth->phy_scratch_ring + soc->tx.desc_size * (cnt - 1);
 
-	dma_addr = dma_map_single(eth->dma_dev,
-				  eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE,
-				  DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr)))
-		return -ENOMEM;
+	for (j = 0; j < DIV_ROUND_UP(soc->tx.fq_dma_size, MTK_FQ_DMA_LENGTH); j++) {
+		len = min_t(int, cnt - j * MTK_FQ_DMA_LENGTH, MTK_FQ_DMA_LENGTH);
+		eth->scratch_head[j] = kcalloc(len, MTK_QDMA_PAGE_SIZE, GFP_KERNEL);
 
-	phy_ring_tail = eth->phy_scratch_ring + soc->tx.desc_size * (cnt - 1);
+		if (unlikely(!eth->scratch_head[j]))
+			return -ENOMEM;
 
-	for (i = 0; i < cnt; i++) {
-		dma_addr_t addr = dma_addr + i * MTK_QDMA_PAGE_SIZE;
-		struct mtk_tx_dma_v2 *txd;
+		dma_addr = dma_map_single(eth->dma_dev,
+					  eth->scratch_head[j], len * MTK_QDMA_PAGE_SIZE,
+					  DMA_FROM_DEVICE);
 
-		txd = eth->scratch_ring + i * soc->tx.desc_size;
-		txd->txd1 = addr;
-		if (i < cnt - 1)
-			txd->txd2 = eth->phy_scratch_ring +
-				    (i + 1) * soc->tx.desc_size;
+		if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr)))
+			return -ENOMEM;
 
-		txd->txd3 = TX_DMA_PLEN0(MTK_QDMA_PAGE_SIZE);
-		if (MTK_HAS_CAPS(soc->caps, MTK_36BIT_DMA))
-			txd->txd3 |= TX_DMA_PREP_ADDR64(addr);
-		txd->txd4 = 0;
-		if (mtk_is_netsys_v2_or_greater(eth)) {
-			txd->txd5 = 0;
-			txd->txd6 = 0;
-			txd->txd7 = 0;
-			txd->txd8 = 0;
+		for (i = 0; i < cnt; i++) {
+			struct mtk_tx_dma_v2 *txd;
+
+			txd = eth->scratch_ring + (j * MTK_FQ_DMA_LENGTH + i) * soc->tx.desc_size;
+			txd->txd1 = dma_addr + i * MTK_QDMA_PAGE_SIZE;
+			if (j * MTK_FQ_DMA_LENGTH + i < cnt)
+				txd->txd2 = eth->phy_scratch_ring +
+					    (j * MTK_FQ_DMA_LENGTH + i + 1) * soc->tx.desc_size;
+
+			txd->txd3 = TX_DMA_PLEN0(MTK_QDMA_PAGE_SIZE);
+			if (MTK_HAS_CAPS(soc->caps, MTK_36BIT_DMA))
+				txd->txd3 |= TX_DMA_PREP_ADDR64(dma_addr + i * MTK_QDMA_PAGE_SIZE);
+
+			txd->txd4 = 0;
+			if (mtk_is_netsys_v2_or_greater(eth)) {
+				txd->txd5 = 0;
+				txd->txd6 = 0;
+				txd->txd7 = 0;
+				txd->txd8 = 0;
+			}
 		}
 	}
 
@@ -2457,7 +2463,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
 	if (MTK_HAS_CAPS(soc->caps, MTK_QDMA))
 		ring_size = MTK_QDMA_RING_SIZE;
 	else
-		ring_size = MTK_DMA_SIZE;
+		ring_size = soc->tx.dma_size;
 
 	ring->buf = kcalloc(ring_size, sizeof(*ring->buf),
 			       GFP_KERNEL);
@@ -2465,8 +2471,8 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
 		goto no_tx_mem;
 
 	if (MTK_HAS_CAPS(soc->caps, MTK_SRAM)) {
-		ring->dma = eth->sram_base + ring_size * sz;
-		ring->phys = eth->phy_scratch_ring + ring_size * (dma_addr_t)sz;
+		ring->dma = eth->sram_base + soc->tx.fq_dma_size * sz;
+		ring->phys = eth->phy_scratch_ring + soc->tx.fq_dma_size * (dma_addr_t)sz;
 	} else {
 		ring->dma = dma_alloc_coherent(eth->dma_dev, ring_size * sz,
 					       &ring->phys, GFP_KERNEL);
@@ -2588,6 +2594,7 @@ static void mtk_tx_clean(struct mtk_eth *eth)
 static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
 {
 	const struct mtk_reg_map *reg_map = eth->soc->reg_map;
+	const struct mtk_soc_data *soc = eth->soc;
 	struct mtk_rx_ring *ring;
 	int rx_data_len, rx_dma_size, tx_ring_size;
 	int i;
@@ -2595,7 +2602,7 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
 		tx_ring_size = MTK_QDMA_RING_SIZE;
 	else
-		tx_ring_size = MTK_DMA_SIZE;
+		tx_ring_size = soc->tx.dma_size;
 
 	if (rx_flag == MTK_RX_FLAGS_QDMA) {
 		if (ring_no)
@@ -2610,7 +2617,7 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
 		rx_dma_size = MTK_HW_LRO_DMA_SIZE;
 	} else {
 		rx_data_len = ETH_DATA_LEN;
-		rx_dma_size = MTK_DMA_SIZE;
+		rx_dma_size = soc->rx.dma_size;
 	}
 
 	ring->frag_size = mtk_max_frag_size(rx_data_len);
@@ -3139,7 +3146,10 @@ static void mtk_dma_free(struct mtk_eth *eth)
 			mtk_rx_clean(eth, &eth->rx_ring[i], false);
 	}
 
-	kfree(eth->scratch_head);
+	for (i = 0; i < DIV_ROUND_UP(soc->tx.fq_dma_size, MTK_FQ_DMA_LENGTH); i++) {
+		kfree(eth->scratch_head[i]);
+		eth->scratch_head[i] = NULL;
+	}
 }
 
 static bool mtk_hw_reset_check(struct mtk_eth *eth)
@@ -5052,11 +5062,14 @@ static const struct mtk_soc_data mt2701_data = {
 		.desc_size = sizeof(struct mtk_tx_dma),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
+		.dma_size = MTK_DMA_SIZE(2K),
+		.fq_dma_size = MTK_DMA_SIZE(2K),
 	},
 	.rx = {
 		.desc_size = sizeof(struct mtk_rx_dma),
 		.irq_done_mask = MTK_RX_DONE_INT,
 		.dma_l4_valid = RX_DMA_L4_VALID,
+		.dma_size = MTK_DMA_SIZE(2K),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
 	},
@@ -5076,11 +5089,14 @@ static const struct mtk_soc_data mt7621_data = {
 		.desc_size = sizeof(struct mtk_tx_dma),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
+		.dma_size = MTK_DMA_SIZE(2K),
+		.fq_dma_size = MTK_DMA_SIZE(2K),
 	},
 	.rx = {
 		.desc_size = sizeof(struct mtk_rx_dma),
 		.irq_done_mask = MTK_RX_DONE_INT,
 		.dma_l4_valid = RX_DMA_L4_VALID,
+		.dma_size = MTK_DMA_SIZE(2K),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
 	},
@@ -5102,11 +5118,14 @@ static const struct mtk_soc_data mt7622_data = {
 		.desc_size = sizeof(struct mtk_tx_dma),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
+		.dma_size = MTK_DMA_SIZE(2K),
+		.fq_dma_size = MTK_DMA_SIZE(2K),
 	},
 	.rx = {
 		.desc_size = sizeof(struct mtk_rx_dma),
 		.irq_done_mask = MTK_RX_DONE_INT,
 		.dma_l4_valid = RX_DMA_L4_VALID,
+		.dma_size = MTK_DMA_SIZE(2K),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
 	},
@@ -5127,11 +5146,14 @@ static const struct mtk_soc_data mt7623_data = {
 		.desc_size = sizeof(struct mtk_tx_dma),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
+		.dma_size = MTK_DMA_SIZE(2K),
+		.fq_dma_size = MTK_DMA_SIZE(2K),
 	},
 	.rx = {
 		.desc_size = sizeof(struct mtk_rx_dma),
 		.irq_done_mask = MTK_RX_DONE_INT,
 		.dma_l4_valid = RX_DMA_L4_VALID,
+		.dma_size = MTK_DMA_SIZE(2K),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
 	},
@@ -5150,11 +5172,14 @@ static const struct mtk_soc_data mt7629_data = {
 		.desc_size = sizeof(struct mtk_tx_dma),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
+		.dma_size = MTK_DMA_SIZE(2K),
+		.fq_dma_size = MTK_DMA_SIZE(2K),
 	},
 	.rx = {
 		.desc_size = sizeof(struct mtk_rx_dma),
 		.irq_done_mask = MTK_RX_DONE_INT,
 		.dma_l4_valid = RX_DMA_L4_VALID,
+		.dma_size = MTK_DMA_SIZE(2K),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
 	},
@@ -5176,6 +5201,8 @@ static const struct mtk_soc_data mt7981_data = {
 		.desc_size = sizeof(struct mtk_tx_dma_v2),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN_V2,
 		.dma_len_offset = 8,
+		.dma_size = MTK_DMA_SIZE(2K),
+		.fq_dma_size = MTK_DMA_SIZE(2K),
 	},
 	.rx = {
 		.desc_size = sizeof(struct mtk_rx_dma),
@@ -5183,6 +5210,7 @@ static const struct mtk_soc_data mt7981_data = {
 		.dma_l4_valid = RX_DMA_L4_VALID_V2,
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
+		.dma_size = MTK_DMA_SIZE(2K),
 	},
 };
 
@@ -5202,6 +5230,8 @@ static const struct mtk_soc_data mt7986_data = {
 		.desc_size = sizeof(struct mtk_tx_dma_v2),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN_V2,
 		.dma_len_offset = 8,
+		.dma_size = MTK_DMA_SIZE(2K),
+		.fq_dma_size = MTK_DMA_SIZE(2K),
 	},
 	.rx = {
 		.desc_size = sizeof(struct mtk_rx_dma),
@@ -5209,6 +5239,7 @@ static const struct mtk_soc_data mt7986_data = {
 		.dma_l4_valid = RX_DMA_L4_VALID_V2,
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
+		.dma_size = MTK_DMA_SIZE(2K),
 	},
 };
 
@@ -5228,6 +5259,8 @@ static const struct mtk_soc_data mt7988_data = {
 		.desc_size = sizeof(struct mtk_tx_dma_v2),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN_V2,
 		.dma_len_offset = 8,
+		.dma_size = MTK_DMA_SIZE(2K),
+		.fq_dma_size = MTK_DMA_SIZE(4K),
 	},
 	.rx = {
 		.desc_size = sizeof(struct mtk_rx_dma_v2),
@@ -5235,6 +5268,7 @@ static const struct mtk_soc_data mt7988_data = {
 		.dma_l4_valid = RX_DMA_L4_VALID_V2,
 		.dma_max_len = MTK_TX_DMA_BUF_LEN_V2,
 		.dma_len_offset = 8,
+		.dma_size = MTK_DMA_SIZE(2K),
 	},
 };
 
@@ -5249,6 +5283,7 @@ static const struct mtk_soc_data rt5350_data = {
 		.desc_size = sizeof(struct mtk_tx_dma),
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
+		.dma_size = MTK_DMA_SIZE(2K),
 	},
 	.rx = {
 		.desc_size = sizeof(struct mtk_rx_dma),
@@ -5256,6 +5291,7 @@ static const struct mtk_soc_data rt5350_data = {
 		.dma_l4_valid = RX_DMA_L4_VALID_PDMA,
 		.dma_max_len = MTK_TX_DMA_BUF_LEN,
 		.dma_len_offset = 16,
+		.dma_size = MTK_DMA_SIZE(2K),
 	},
 };
 
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 4eab30b44070..f5174f6cb1bb 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -32,7 +32,9 @@
 #define MTK_TX_DMA_BUF_LEN	0x3fff
 #define MTK_TX_DMA_BUF_LEN_V2	0xffff
 #define MTK_QDMA_RING_SIZE	2048
-#define MTK_DMA_SIZE		512
+#define MTK_DMA_SIZE(x)		(SZ_##x)
+#define MTK_FQ_DMA_HEAD		32
+#define MTK_FQ_DMA_LENGTH	2048
 #define MTK_RX_ETH_HLEN		(ETH_HLEN + ETH_FCS_LEN)
 #define MTK_RX_HLEN		(NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN)
 #define MTK_DMA_DUMMY_DESC	0xffffffff
@@ -1176,6 +1178,8 @@ struct mtk_soc_data {
 		u32	desc_size;
 		u32	dma_max_len;
 		u32	dma_len_offset;
+		u32	dma_size;
+		u32	fq_dma_size;
 	} tx;
 	struct {
 		u32	desc_size;
@@ -1183,6 +1187,7 @@ struct mtk_soc_data {
 		u32	dma_l4_valid;
 		u32	dma_max_len;
 		u32	dma_len_offset;
+		u32	dma_size;
 	} rx;
 };
 
@@ -1264,7 +1269,7 @@ struct mtk_eth {
 	struct napi_struct		rx_napi;
 	void				*scratch_ring;
 	dma_addr_t			phy_scratch_ring;
-	void				*scratch_head;
+	void				*scratch_head[MTK_FQ_DMA_HEAD];
 	struct clk			*clks[MTK_CLK_MAX];
 
 	struct mii_bus			*mii_bus;
-- 
cgit v1.2.3


From 33afbfcc105a572159750f2ebee834a8a70fdd96 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Tue, 4 Jun 2024 00:04:42 +0300
Subject: net/mlx5: Stop waiting for PCI if pci channel is offline

In case pci channel becomes offline the driver should not wait for PCI
reads during health dump and recovery flow. The driver has timeout for
each of these loops trying to read PCI, so it would fail anyway.
However, in case of recovery waiting till timeout may cause the pci
error_detected() callback fail to meet pci_dpc_recovered() wait timeout.

Fixes: b3bd076f7501 ("net/mlx5: Report devlink health on FW fatal issues")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Shay Drori <shayd@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw.c          | 4 ++++
 drivers/net/ethernet/mellanox/mlx5/core/health.c      | 8 ++++++++
 drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c | 4 ++++
 3 files changed, 16 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 2d95a9b7b44e..b61b7d966114 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -373,6 +373,10 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
 	do {
 		if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
 			break;
+		if (pci_channel_offline(dev->pdev)) {
+			mlx5_core_err(dev, "PCI channel offline, stop waiting for NIC IFC\n");
+			return -EACCES;
+		}
 
 		cond_resched();
 	} while (!time_after(jiffies, end));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index ad38e31822df..a6329ca2d9bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -248,6 +248,10 @@ recover_from_sw_reset:
 	do {
 		if (mlx5_get_nic_state(dev) == MLX5_INITIAL_SEG_NIC_INTERFACE_DISABLED)
 			break;
+		if (pci_channel_offline(dev->pdev)) {
+			mlx5_core_err(dev, "PCI channel offline, stop waiting for NIC IFC\n");
+			goto unlock;
+		}
 
 		msleep(20);
 	} while (!time_after(jiffies, end));
@@ -317,6 +321,10 @@ int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
 			mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n");
 			return -ENODEV;
 		}
+		if (pci_channel_offline(dev->pdev)) {
+			mlx5_core_err(dev, "PCI channel offline, stop waiting for PCI\n");
+			return -EACCES;
+		}
 		msleep(100);
 	}
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
index 6b774e0c2766..d0b595ba6110 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c
@@ -74,6 +74,10 @@ int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev)
 			ret = -EBUSY;
 			goto pci_unlock;
 		}
+		if (pci_channel_offline(dev->pdev)) {
+			ret = -EACCES;
+			goto pci_unlock;
+		}
 
 		/* Check if semaphore is already locked */
 		ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val);
-- 
cgit v1.2.3


From c8b3f38d2dae0397944814d691a419c451f9906f Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 4 Jun 2024 00:04:43 +0300
Subject: net/mlx5: Always stop health timer during driver removal

Currently, if teardown_hca fails to execute during driver removal, mlx5
does not stop the health timer. Afterwards, mlx5 continue with driver
teardown. This may lead to a UAF bug, which results in page fault
Oops[1], since the health timer invokes after resources were freed.

Hence, stop the health monitor even if teardown_hca fails.

[1]
mlx5_core 0000:18:00.0: E-Switch: Unload vfs: mode(LEGACY), nvfs(0), necvfs(0), active vports(0)
mlx5_core 0000:18:00.0: E-Switch: Disable: mode(LEGACY), nvfs(0), necvfs(0), active vports(0)
mlx5_core 0000:18:00.0: E-Switch: Disable: mode(LEGACY), nvfs(0), necvfs(0), active vports(0)
mlx5_core 0000:18:00.0: E-Switch: cleanup
mlx5_core 0000:18:00.0: wait_func:1155:(pid 1967079): TEARDOWN_HCA(0x103) timeout. Will cause a leak of a command resource
mlx5_core 0000:18:00.0: mlx5_function_close:1288:(pid 1967079): tear_down_hca failed, skip cleanup
BUG: unable to handle page fault for address: ffffa26487064230
PGD 100c00067 P4D 100c00067 PUD 100e5a067 PMD 105ed7067 PTE 0
Oops: 0000 [#1] PREEMPT SMP PTI
CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           OE     -------  ---  6.7.0-68.fc38.x86_64 #1
Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0013.121520200651 12/15/2020
RIP: 0010:ioread32be+0x34/0x60
RSP: 0018:ffffa26480003e58 EFLAGS: 00010292
RAX: ffffa26487064200 RBX: ffff9042d08161a0 RCX: ffff904c108222c0
RDX: 000000010bbf1b80 RSI: ffffffffc055ddb0 RDI: ffffa26487064230
RBP: ffff9042d08161a0 R08: 0000000000000022 R09: ffff904c108222e8
R10: 0000000000000004 R11: 0000000000000441 R12: ffffffffc055ddb0
R13: ffffa26487064200 R14: ffffa26480003f00 R15: ffff904c108222c0
FS:  0000000000000000(0000) GS:ffff904c10800000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffa26487064230 CR3: 00000002c4420006 CR4: 00000000007706f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
 <IRQ>
 ? __die+0x23/0x70
 ? page_fault_oops+0x171/0x4e0
 ? exc_page_fault+0x175/0x180
 ? asm_exc_page_fault+0x26/0x30
 ? __pfx_poll_health+0x10/0x10 [mlx5_core]
 ? __pfx_poll_health+0x10/0x10 [mlx5_core]
 ? ioread32be+0x34/0x60
 mlx5_health_check_fatal_sensors+0x20/0x100 [mlx5_core]
 ? __pfx_poll_health+0x10/0x10 [mlx5_core]
 poll_health+0x42/0x230 [mlx5_core]
 ? __next_timer_interrupt+0xbc/0x110
 ? __pfx_poll_health+0x10/0x10 [mlx5_core]
 call_timer_fn+0x21/0x130
 ? __pfx_poll_health+0x10/0x10 [mlx5_core]
 __run_timers+0x222/0x2c0
 run_timer_softirq+0x1d/0x40
 __do_softirq+0xc9/0x2c8
 __irq_exit_rcu+0xa6/0xc0
 sysvec_apic_timer_interrupt+0x72/0x90
 </IRQ>
 <TASK>
 asm_sysvec_apic_timer_interrupt+0x1a/0x20
RIP: 0010:cpuidle_enter_state+0xcc/0x440
 ? cpuidle_enter_state+0xbd/0x440
 cpuidle_enter+0x2d/0x40
 do_idle+0x20d/0x270
 cpu_startup_entry+0x2a/0x30
 rest_init+0xd0/0xd0
 arch_call_rest_init+0xe/0x30
 start_kernel+0x709/0xa90
 x86_64_start_reservations+0x18/0x30
 x86_64_start_kernel+0x96/0xa0
 secondary_startup_64_no_verify+0x18f/0x19b
---[ end trace 0000000000000000 ]---

Fixes: 9b98d395b85d ("net/mlx5: Start health poll at earlier stage of driver load")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 6574c145dc1e..459a836a5d9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1298,6 +1298,9 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
 
 	if (!err)
 		mlx5_function_disable(dev, boot);
+	else
+		mlx5_stop_health_poll(dev, boot);
+
 	return err;
 }
 
-- 
cgit v1.2.3


From 74751ef5c1912ebd3e65c3b65f45587e05ce5d36 Mon Sep 17 00:00:00 2001
From: Haifeng Xu <haifeng.xu@shopee.com>
Date: Mon, 13 May 2024 10:39:48 +0000
Subject: perf/core: Fix missing wakeup when waiting for context reference

In our production environment, we found many hung tasks which are
blocked for more than 18 hours. Their call traces are like this:

[346278.191038] __schedule+0x2d8/0x890
[346278.191046] schedule+0x4e/0xb0
[346278.191049] perf_event_free_task+0x220/0x270
[346278.191056] ? init_wait_var_entry+0x50/0x50
[346278.191060] copy_process+0x663/0x18d0
[346278.191068] kernel_clone+0x9d/0x3d0
[346278.191072] __do_sys_clone+0x5d/0x80
[346278.191076] __x64_sys_clone+0x25/0x30
[346278.191079] do_syscall_64+0x5c/0xc0
[346278.191083] ? syscall_exit_to_user_mode+0x27/0x50
[346278.191086] ? do_syscall_64+0x69/0xc0
[346278.191088] ? irqentry_exit_to_user_mode+0x9/0x20
[346278.191092] ? irqentry_exit+0x19/0x30
[346278.191095] ? exc_page_fault+0x89/0x160
[346278.191097] ? asm_exc_page_fault+0x8/0x30
[346278.191102] entry_SYSCALL_64_after_hwframe+0x44/0xae

The task was waiting for the refcount become to 1, but from the vmcore,
we found the refcount has already been 1. It seems that the task didn't
get woken up by perf_event_release_kernel() and got stuck forever. The
below scenario may cause the problem.

Thread A					Thread B
...						...
perf_event_free_task				perf_event_release_kernel
						   ...
						   acquire event->child_mutex
						   ...
						   get_ctx
   ...						   release event->child_mutex
   acquire ctx->mutex
   ...
   perf_free_event (acquire/release event->child_mutex)
   ...
   release ctx->mutex
   wait_var_event
						   acquire ctx->mutex
						   acquire event->child_mutex
						   # move existing events to free_list
						   release event->child_mutex
						   release ctx->mutex
						   put_ctx
...						...

In this case, all events of the ctx have been freed, so we couldn't
find the ctx in free_list and Thread A will miss the wakeup. It's thus
necessary to add a wakeup after dropping the reference.

Fixes: 1cf8dfe8a661 ("perf/core: Fix race between close() and fork()")
Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20240513103948.33570-1-haifeng.xu@shopee.com
---
 kernel/events/core.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index f0128c5ff278..8f908f077935 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5384,6 +5384,7 @@ int perf_event_release_kernel(struct perf_event *event)
 again:
 	mutex_lock(&event->child_mutex);
 	list_for_each_entry(child, &event->child_list, child_list) {
+		void *var = NULL;
 
 		/*
 		 * Cannot change, child events are not migrated, see the
@@ -5424,11 +5425,23 @@ again:
 			 * this can't be the last reference.
 			 */
 			put_event(event);
+		} else {
+			var = &ctx->refcount;
 		}
 
 		mutex_unlock(&event->child_mutex);
 		mutex_unlock(&ctx->mutex);
 		put_ctx(ctx);
+
+		if (var) {
+			/*
+			 * If perf_event_free_task() has deleted all events from the
+			 * ctx while the child_mutex got released above, make sure to
+			 * notify about the preceding put_ctx().
+			 */
+			smp_mb(); /* pairs with wait_var_event() */
+			wake_up_var(var);
+		}
 		goto again;
 	}
 	mutex_unlock(&event->child_mutex);
-- 
cgit v1.2.3


From f92a59f6d12e31ead999fee9585471b95a8ae8a3 Mon Sep 17 00:00:00 2001
From: Carlos Llamas <cmllamas@google.com>
Date: Wed, 15 May 2024 13:37:10 +0000
Subject: locking/atomic: scripts: fix ${atomic}_sub_and_test() kerneldoc

For ${atomic}_sub_and_test() the @i parameter is the value to subtract,
not add. Fix the typo in the kerneldoc template and generate the headers
with this update.

Fixes: ad8110706f38 ("locking/atomic: scripts: generate kerneldoc comments")
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Carlos Llamas <cmllamas@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20240515133844.3502360-1-cmllamas@google.com
---
 include/linux/atomic/atomic-arch-fallback.h | 6 +++---
 include/linux/atomic/atomic-instrumented.h  | 8 ++++----
 include/linux/atomic/atomic-long.h          | 4 ++--
 scripts/atomic/kerneldoc/sub_and_test       | 2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h
index 956bcba5dbf2..2f9d36b72bd8 100644
--- a/include/linux/atomic/atomic-arch-fallback.h
+++ b/include/linux/atomic/atomic-arch-fallback.h
@@ -2242,7 +2242,7 @@ raw_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 
 /**
  * raw_atomic_sub_and_test() - atomic subtract and test if zero with full ordering
- * @i: int value to add
+ * @i: int value to subtract
  * @v: pointer to atomic_t
  *
  * Atomically updates @v to (@v - @i) with full ordering.
@@ -4368,7 +4368,7 @@ raw_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 
 /**
  * raw_atomic64_sub_and_test() - atomic subtract and test if zero with full ordering
- * @i: s64 value to add
+ * @i: s64 value to subtract
  * @v: pointer to atomic64_t
  *
  * Atomically updates @v to (@v - @i) with full ordering.
@@ -4690,4 +4690,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v)
 }
 
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 14850c0b0db20c62fdc78ccd1d42b98b88d76331
+// b565db590afeeff0d7c9485ccbca5bb6e155749f
diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h
index debd487fe971..9409a6ddf3e0 100644
--- a/include/linux/atomic/atomic-instrumented.h
+++ b/include/linux/atomic/atomic-instrumented.h
@@ -1349,7 +1349,7 @@ atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 
 /**
  * atomic_sub_and_test() - atomic subtract and test if zero with full ordering
- * @i: int value to add
+ * @i: int value to subtract
  * @v: pointer to atomic_t
  *
  * Atomically updates @v to (@v - @i) with full ordering.
@@ -2927,7 +2927,7 @@ atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 
 /**
  * atomic64_sub_and_test() - atomic subtract and test if zero with full ordering
- * @i: s64 value to add
+ * @i: s64 value to subtract
  * @v: pointer to atomic64_t
  *
  * Atomically updates @v to (@v - @i) with full ordering.
@@ -4505,7 +4505,7 @@ atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 
 /**
  * atomic_long_sub_and_test() - atomic subtract and test if zero with full ordering
- * @i: long value to add
+ * @i: long value to subtract
  * @v: pointer to atomic_long_t
  *
  * Atomically updates @v to (@v - @i) with full ordering.
@@ -5050,4 +5050,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 
 
 #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
-// ce5b65e0f1f8a276268b667194581d24bed219d4
+// 8829b337928e9508259079d32581775ececd415b
diff --git a/include/linux/atomic/atomic-long.h b/include/linux/atomic/atomic-long.h
index 3ef844b3ab8a..f86b29d90877 100644
--- a/include/linux/atomic/atomic-long.h
+++ b/include/linux/atomic/atomic-long.h
@@ -1535,7 +1535,7 @@ raw_atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 
 /**
  * raw_atomic_long_sub_and_test() - atomic subtract and test if zero with full ordering
- * @i: long value to add
+ * @i: long value to subtract
  * @v: pointer to atomic_long_t
  *
  * Atomically updates @v to (@v - @i) with full ordering.
@@ -1809,4 +1809,4 @@ raw_atomic_long_dec_if_positive(atomic_long_t *v)
 }
 
 #endif /* _LINUX_ATOMIC_LONG_H */
-// 1c4a26fc77f345342953770ebe3c4d08e7ce2f9a
+// eadf183c3600b8b92b91839dd3be6bcc560c752d
diff --git a/scripts/atomic/kerneldoc/sub_and_test b/scripts/atomic/kerneldoc/sub_and_test
index d3760f7749d4..96615e50836b 100644
--- a/scripts/atomic/kerneldoc/sub_and_test
+++ b/scripts/atomic/kerneldoc/sub_and_test
@@ -1,7 +1,7 @@
 cat <<EOF
 /**
  * ${class}${atomicname}() - atomic subtract and test if zero with ${desc_order} ordering
- * @i: ${int} value to add
+ * @i: ${int} value to subtract
  * @v: pointer to ${atomic}_t
  *
  * Atomically updates @v to (@v - @i) with ${desc_order} ordering.
-- 
cgit v1.2.3


From 5b3cde198878b2f3269d5e7efbc0d514899b1fd8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 4 Jun 2024 11:00:22 -0300
Subject: Revert "perf record: Reduce memory for recording
 PERF_RECORD_LOST_SAMPLES event"

This reverts commit 7d1405c71df21f6c394b8a885aa8a133f749fa22.

This causes segfaults in some cases, as reported by Milian:

  ```
  sudo /usr/bin/perf record -z --call-graph dwarf -e cycles -e
  raw_syscalls:sys_enter ls
  ...
  [ perf record: Woken up 3 times to write data ]
  malloc(): invalid next size (unsorted)
  Aborted
  ```

  Backtrace with GDB + debuginfod:

  ```
  malloc(): invalid next size (unsorted)

  Thread 1 "perf" received signal SIGABRT, Aborted.
  __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6,
  no_tid=no_tid@entry=0) at pthread_kill.c:44
  Downloading source file /usr/src/debug/glibc/glibc/nptl/pthread_kill.c
  44            return INTERNAL_SYSCALL_ERROR_P (ret) ? INTERNAL_SYSCALL_ERRNO
  (ret) : 0;
  (gdb) bt
  #0  __pthread_kill_implementation (threadid=<optimized out>,
  signo=signo@entry=6, no_tid=no_tid@entry=0) at pthread_kill.c:44
  #1  0x00007ffff6ea8eb3 in __pthread_kill_internal (threadid=<optimized out>,
  signo=6) at pthread_kill.c:78
  #2  0x00007ffff6e50a30 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/
  raise.c:26
  #3  0x00007ffff6e384c3 in __GI_abort () at abort.c:79
  #4  0x00007ffff6e39354 in __libc_message_impl (fmt=fmt@entry=0x7ffff6fc22ea
  "%s\n") at ../sysdeps/posix/libc_fatal.c:132
  #5  0x00007ffff6eb3085 in malloc_printerr (str=str@entry=0x7ffff6fc5850
  "malloc(): invalid next size (unsorted)") at malloc.c:5772
  #6  0x00007ffff6eb657c in _int_malloc (av=av@entry=0x7ffff6ff6ac0
  <main_arena>, bytes=bytes@entry=368) at malloc.c:4081
  #7  0x00007ffff6eb877e in __libc_calloc (n=<optimized out>,
  elem_size=<optimized out>) at malloc.c:3754
  #8  0x000055555569bdb6 in perf_session.do_write_header ()
  #9  0x00005555555a373a in __cmd_record.constprop.0 ()
  #10 0x00005555555a6846 in cmd_record ()
  #11 0x000055555564db7f in run_builtin ()
  #12 0x000055555558ed77 in main ()
  ```

  Valgrind memcheck:
  ```
  ==45136== Invalid write of size 8
  ==45136==    at 0x2B38A5: perf_event__synthesize_id_sample (in /usr/bin/perf)
  ==45136==    by 0x157069: __cmd_record.constprop.0 (in /usr/bin/perf)
  ==45136==    by 0x15A845: cmd_record (in /usr/bin/perf)
  ==45136==    by 0x201B7E: run_builtin (in /usr/bin/perf)
  ==45136==    by 0x142D76: main (in /usr/bin/perf)
  ==45136==  Address 0x6a866a8 is 0 bytes after a block of size 40 alloc'd
  ==45136==    at 0x4849BF3: calloc (vg_replace_malloc.c:1675)
  ==45136==    by 0x3574AB: zalloc (in /usr/bin/perf)
  ==45136==    by 0x1570E0: __cmd_record.constprop.0 (in /usr/bin/perf)
  ==45136==    by 0x15A845: cmd_record (in /usr/bin/perf)
  ==45136==    by 0x201B7E: run_builtin (in /usr/bin/perf)
  ==45136==    by 0x142D76: main (in /usr/bin/perf)
  ==45136==
  ==45136== Syscall param write(buf) points to unaddressable byte(s)
  ==45136==    at 0x575953D: __libc_write (write.c:26)
  ==45136==    by 0x575953D: write (write.c:24)
  ==45136==    by 0x35761F: ion (in /usr/bin/perf)
  ==45136==    by 0x357778: writen (in /usr/bin/perf)
  ==45136==    by 0x1548F7: record__write (in /usr/bin/perf)
  ==45136==    by 0x15708A: __cmd_record.constprop.0 (in /usr/bin/perf)
  ==45136==    by 0x15A845: cmd_record (in /usr/bin/perf)
  ==45136==    by 0x201B7E: run_builtin (in /usr/bin/perf)
  ==45136==    by 0x142D76: main (in /usr/bin/perf)
  ==45136==  Address 0x6a866a8 is 0 bytes after a block of size 40 alloc'd
  ==45136==    at 0x4849BF3: calloc (vg_replace_malloc.c:1675)
  ==45136==    by 0x3574AB: zalloc (in /usr/bin/perf)
  ==45136==    by 0x1570E0: __cmd_record.constprop.0 (in /usr/bin/perf)
  ==45136==    by 0x15A845: cmd_record (in /usr/bin/perf)
  ==45136==    by 0x201B7E: run_builtin (in /usr/bin/perf)
  ==45136==    by 0x142D76: main (in /usr/bin/perf)
  ==45136==
 -----

Closes: https://lore.kernel.org/linux-perf-users/23879991.0LEYPuXRzz@milian-workstation/
Reported-by: Milian Wolff <milian.wolff@kdab.com>
Tested-by: Milian Wolff <milian.wolff@kdab.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: stable@kernel.org # 6.8+
Link: https://lore.kernel.org/lkml/Zl9ksOlHJHnKM70p@x1
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 66a3de8ac661..0a8ba1323d64 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1956,8 +1956,7 @@ static void record__read_lost_samples(struct record *rec)
 
 				if (count.lost) {
 					if (!lost) {
-						lost = zalloc(sizeof(*lost) +
-							      session->machines.host.id_hdr_size);
+						lost = zalloc(PERF_SAMPLE_MAX_SIZE);
 						if (!lost) {
 							pr_debug("Memory allocation failed\n");
 							return;
@@ -1973,8 +1972,7 @@ static void record__read_lost_samples(struct record *rec)
 		lost_count = perf_bpf_filter__lost_count(evsel);
 		if (lost_count) {
 			if (!lost) {
-				lost = zalloc(sizeof(*lost) +
-					      session->machines.host.id_hdr_size);
+				lost = zalloc(PERF_SAMPLE_MAX_SIZE);
 				if (!lost) {
 					pr_debug("Memory allocation failed\n");
 					return;
-- 
cgit v1.2.3


From ca9680821dfec73c9100860bda4fab1f1309722e Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 21 May 2024 10:07:40 -0700
Subject: perf bpf: Fix handling of minimal vmlinux.h file when interrupting
 the build

Ingo reported that he was seeing these when hitting Control+C during a
perf tools build:

  Makefile.perf:1149: *** Missing bpftool input for generating vmlinux.h. Stop.

The failure happens when you don't have vmlinux.h or vmlinux with BTF.

ifeq ($(VMLINUX_H),)
  ifeq ($(VMLINUX_BTF),)
    $(error Missing bpftool input for generating vmlinux.h)
  endif
endif

VMLINUX_BTF can be empty if you didn't build a kernel or it doesn't have
a BTF section and the current kernel also has no BTF.  This is totally
ok.

But VMLINUX_H should be set to the minimal version in the source tree
(unless you overwrite it manually) when you don't pass GEN_VMLINUX_H=1
(which requires VMLINUX_BTF should not be empty).  The problem is that
it's defined in Makefile.config which is not included for `make clean`.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Link: http://lore.kernel.org/lkml/CAM9d7ch5HTr+k+_GpbMrX0HUo5BZ11byh1xq0Two7B7RQACuNw@mail.gmail.com
Link: http://lore.kernel.org/lkml/ZjssGrj+abyC6mYP@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 5c35c0d89306..e6d56b555369 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -214,6 +214,7 @@ NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help
 
 ifdef MAKECMDGOALS
 ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
+  VMLINUX_H=$(src-perf)/util/bpf_skel/vmlinux/vmlinux.h
   config := 0
 endif
 endif
-- 
cgit v1.2.3


From 9a64e1bfd8a10c015c41fe0d289e89862486c50d Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 2 Jun 2024 22:52:24 -0400
Subject: bcachefs: Fix GFP_KERNEL allocation in break_cycle()

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/btree_locking.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index c3e9b0cc7bbd..d66fff22109a 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -215,6 +215,7 @@ static noinline int break_cycle(struct lock_graph *g, struct printbuf *cycle)
 
 	if (unlikely(!best)) {
 		struct printbuf buf = PRINTBUF;
+		buf.atomic++;
 
 		prt_printf(&buf, bch2_fmt(g->g->trans->c, "cycle of nofail locks"));
 
-- 
cgit v1.2.3


From fdccb24352e589bb59c9ba90f23c4e0994b90518 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 2 Jun 2024 22:25:18 -0400
Subject: bcachefs: Rereplicate now moves data off of durability=0 devices

This fixes an issue where setting a device to durability=0 after it's
been used makes it impossible to remove.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/move.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 8171f947fac8..2706d8242345 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -920,7 +920,20 @@ static bool rereplicate_pred(struct bch_fs *c, void *arg,
 		? c->opts.metadata_replicas
 		: io_opts->data_replicas;
 
-	if (!nr_good || nr_good >= replicas)
+	rcu_read_lock();
+	struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+	unsigned i = 0;
+	bkey_for_each_ptr(ptrs, ptr) {
+		struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
+		if (!ptr->cached &&
+		    (!ca || !ca->mi.durability))
+			data_opts->kill_ptrs |= BIT(i);
+		i++;
+	}
+	rcu_read_unlock();
+
+	if (!data_opts->kill_ptrs &&
+	    (!nr_good || nr_good >= replicas))
 		return false;
 
 	data_opts->target		= 0;
-- 
cgit v1.2.3


From 319fef29e96524966bb8593117ce0c5867846eea Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Mon, 3 Jun 2024 18:00:48 -0400
Subject: bcachefs: Fix trans->locked assert

in bch2_move_data_btree, we might start with the trans unlocked from a
previous loop iteration - we need a trans_begin() before iter_init().

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/move.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 2706d8242345..6e477fadaa2a 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -547,6 +547,7 @@ static int bch2_move_data_btree(struct moving_context *ctxt,
 		ctxt->stats->pos	= BBPOS(btree_id, start);
 	}
 
+	bch2_trans_begin(trans);
 	bch2_trans_iter_init(trans, &iter, btree_id, start,
 			     BTREE_ITER_prefetch|
 			     BTREE_ITER_all_snapshots);
-- 
cgit v1.2.3


From d38e48563c1f70460503de7ffff9a7f46b54b67d Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Mon, 3 Jun 2024 21:03:42 +0200
Subject: s390/crash: Do not use VM info if os_info does not have it

The virtual memory information stored in os_info area is
required for creation of the kernel image PT_LOAD program
header for kernels since commit a2ec5bec56dd ("s390/mm:
uncouple physical vs virtual address spaces").

By contrast, if such information in os_info is absent the
PT_LOAD program header should not be created.

Currently the proper PT_LOAD program header is created for
kernels that contain the virtual memory information, but
for kernels without one an invalid header of zero size is
created. That in turn leads to stand-alone dump failures.

Use OS_INFO_KASLR_OFFSET variable to check whether os_info
is present or not (same as crash and makedumpfile tools do)
and based on that create or do not create the kernel image
PT_LOAD program header.

Fixes: f4cac27dc0d6 ("s390/crash: Use old os_info to create PT_LOAD headers")
Tested-by: Mikhail Zaslonko <zaslonko@linux.ibm.com>
Acked-by: Mikhail Zaslonko <zaslonko@linux.ibm.com>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
---
 arch/s390/kernel/crash_dump.c | 54 ++++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 9863ebe75019..edae13416196 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -451,7 +451,7 @@ static void *nt_final(void *ptr)
 /*
  * Initialize ELF header (new kernel)
  */
-static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+static void *ehdr_init(Elf64_Ehdr *ehdr, int phdr_count)
 {
 	memset(ehdr, 0, sizeof(*ehdr));
 	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
@@ -465,11 +465,8 @@ static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
 	ehdr->e_phoff = sizeof(Elf64_Ehdr);
 	ehdr->e_ehsize = sizeof(Elf64_Ehdr);
 	ehdr->e_phentsize = sizeof(Elf64_Phdr);
-	/*
-	 * Number of memory chunk PT_LOAD program headers plus one kernel
-	 * image PT_LOAD program header plus one PT_NOTE program header.
-	 */
-	ehdr->e_phnum = mem_chunk_cnt + 1 + 1;
+	/* Number of PT_LOAD program headers plus PT_NOTE program header */
+	ehdr->e_phnum = phdr_count + 1;
 	return ehdr + 1;
 }
 
@@ -503,12 +500,14 @@ static int get_mem_chunk_cnt(void)
 /*
  * Initialize ELF loads (new kernel)
  */
-static void loads_init(Elf64_Phdr *phdr)
+static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm)
 {
-	unsigned long old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
+	unsigned long old_identity_base = 0;
 	phys_addr_t start, end;
 	u64 idx;
 
+	if (os_info_has_vm)
+		old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
 	for_each_physmem_range(idx, &oldmem_type, &start, &end) {
 		phdr->p_type = PT_LOAD;
 		phdr->p_vaddr = old_identity_base + start;
@@ -522,6 +521,11 @@ static void loads_init(Elf64_Phdr *phdr)
 	}
 }
 
+static bool os_info_has_vm(void)
+{
+	return os_info_old_value(OS_INFO_KASLR_OFFSET);
+}
+
 /*
  * Prepare PT_LOAD type program header for kernel image region
  */
@@ -566,7 +570,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
 	return ptr;
 }
 
-static size_t get_elfcorehdr_size(int mem_chunk_cnt)
+static size_t get_elfcorehdr_size(int phdr_count)
 {
 	size_t size;
 
@@ -581,10 +585,8 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
 	size += nt_vmcoreinfo_size();
 	/* nt_final */
 	size += sizeof(Elf64_Nhdr);
-	/* PT_LOAD type program header for kernel text region */
-	size += sizeof(Elf64_Phdr);
 	/* PT_LOADS */
-	size += mem_chunk_cnt * sizeof(Elf64_Phdr);
+	size += phdr_count * sizeof(Elf64_Phdr);
 
 	return size;
 }
@@ -595,8 +597,8 @@ static size_t get_elfcorehdr_size(int mem_chunk_cnt)
 int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
 {
 	Elf64_Phdr *phdr_notes, *phdr_loads, *phdr_text;
+	int mem_chunk_cnt, phdr_text_cnt;
 	size_t alloc_size;
-	int mem_chunk_cnt;
 	void *ptr, *hdr;
 	u64 hdr_off;
 
@@ -615,12 +617,14 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
 	}
 
 	mem_chunk_cnt = get_mem_chunk_cnt();
+	phdr_text_cnt = os_info_has_vm() ? 1 : 0;
 
-	alloc_size = get_elfcorehdr_size(mem_chunk_cnt);
+	alloc_size = get_elfcorehdr_size(mem_chunk_cnt + phdr_text_cnt);
 
 	hdr = kzalloc(alloc_size, GFP_KERNEL);
 
-	/* Without elfcorehdr /proc/vmcore cannot be created. Thus creating
+	/*
+	 * Without elfcorehdr /proc/vmcore cannot be created. Thus creating
 	 * a dump with this crash kernel will fail. Panic now to allow other
 	 * dump mechanisms to take over.
 	 */
@@ -628,21 +632,23 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
 		panic("s390 kdump allocating elfcorehdr failed");
 
 	/* Init elf header */
-	ptr = ehdr_init(hdr, mem_chunk_cnt);
+	phdr_notes = ehdr_init(hdr, mem_chunk_cnt + phdr_text_cnt);
 	/* Init program headers */
-	phdr_notes = ptr;
-	ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
-	phdr_text = ptr;
-	ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
-	phdr_loads = ptr;
-	ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+	if (phdr_text_cnt) {
+		phdr_text = phdr_notes + 1;
+		phdr_loads = phdr_text + 1;
+	} else {
+		phdr_loads = phdr_notes + 1;
+	}
+	ptr = PTR_ADD(phdr_loads, sizeof(Elf64_Phdr) * mem_chunk_cnt);
 	/* Init notes */
 	hdr_off = PTR_DIFF(ptr, hdr);
 	ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
 	/* Init kernel text program header */
-	text_init(phdr_text);
+	if (phdr_text_cnt)
+		text_init(phdr_text);
 	/* Init loads */
-	loads_init(phdr_loads);
+	loads_init(phdr_loads, phdr_text_cnt);
 	/* Finalize program headers */
 	hdr_off = PTR_DIFF(ptr, hdr);
 	*addr = (unsigned long long) hdr;
-- 
cgit v1.2.3


From 01c51a32dc18f128d2e55a7b2128b77fc01a2285 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 12 Apr 2024 10:43:29 +0200
Subject: KVM: s390x: selftests: Add shared zeropage test

Let's test that we can have shared zeropages in our process as long as
storage keys are not getting used, that shared zeropages are properly
unshared (replaced by anonymous pages) once storage keys are enabled,
and that no new shared zeropages are populated after storage keys
were enabled.

We require the new pagemap interface to detect the shared zeropage.

On an old kernel (zeropages always disabled):
	# ./s390x/shared_zeropage_test
	TAP version 13
	1..3
	not ok 1 Shared zeropages should be enabled
	ok 2 Shared zeropage should be gone
	ok 3 Shared zeropages should be disabled
	# Totals: pass:2 fail:1 xfail:0 xpass:0 skip:0 error:0

On a fixed kernel:
	# ./s390x/shared_zeropage_test
	TAP version 13
	1..3
	ok 1 Shared zeropages should be enabled
	ok 2 Shared zeropage should be gone
	ok 3 Shared zeropages should be disabled
	# Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0

Testing of UFFDIO_ZEROPAGE can be added later.

[ agordeev: Fixed checkpatch complaint, added ucall_common.h include ]

Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: Thomas Huth <thuth@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Acked-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Tested-by: Alexander Gordeev <agordeev@linux.ibm.com>
Link: https://lore.kernel.org/r/20240412084329.30315-1-david@redhat.com
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
---
 tools/testing/selftests/kvm/Makefile               |   1 +
 .../selftests/kvm/s390x/shared_zeropage_test.c     | 111 +++++++++++++++++++++
 2 files changed, 112 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/s390x/shared_zeropage_test.c

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index ce8ff8e8ce3a..ac280dcba996 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -183,6 +183,7 @@ TEST_GEN_PROGS_s390x += s390x/sync_regs_test
 TEST_GEN_PROGS_s390x += s390x/tprot
 TEST_GEN_PROGS_s390x += s390x/cmma_test
 TEST_GEN_PROGS_s390x += s390x/debug_test
+TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test
 TEST_GEN_PROGS_s390x += demand_paging_test
 TEST_GEN_PROGS_s390x += dirty_log_test
 TEST_GEN_PROGS_s390x += guest_print_test
diff --git a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c
new file mode 100644
index 000000000000..bba0d9a6dcc8
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test shared zeropage handling (with/without storage keys)
+ *
+ * Copyright (C) 2024, Red Hat, Inc.
+ */
+#include <sys/mman.h>
+
+#include <linux/fs.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+
+static void set_storage_key(void *addr, uint8_t skey)
+{
+	asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+}
+
+static void guest_code(void)
+{
+	/* Issue some storage key instruction. */
+	set_storage_key((void *)0, 0x98);
+	GUEST_DONE();
+}
+
+/*
+ * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped.
+ * Returns < 0 on error or if nothing is mapped.
+ */
+static int maps_shared_zeropage(int pagemap_fd, void *addr)
+{
+	struct page_region region;
+	struct pm_scan_arg arg = {
+		.start = (uintptr_t)addr,
+		.end = (uintptr_t)addr + 4096,
+		.vec = (uintptr_t)&region,
+		.vec_len = 1,
+		.size = sizeof(struct pm_scan_arg),
+		.category_mask = PAGE_IS_PFNZERO,
+		.category_anyof_mask = PAGE_IS_PRESENT,
+		.return_mask = PAGE_IS_PFNZERO,
+	};
+	return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+}
+
+int main(int argc, char *argv[])
+{
+	char *mem, *page0, *page1, *page2, tmp;
+	const size_t pagesize = getpagesize();
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	struct ucall uc;
+	int pagemap_fd;
+
+	ksft_print_header();
+	ksft_set_plan(3);
+
+	/*
+	 * We'll use memory that is not mapped into the VM for simplicity.
+	 * Shared zeropages are enabled/disabled per-process.
+	 */
+	mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
+	TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+
+	/* Disable THP. Ignore errors on older kernels. */
+	madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE);
+
+	page0 = mem;
+	page1 = page0 + pagesize;
+	page2 = page1 + pagesize;
+
+	/* Can we even detect shared zeropages? */
+	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+	TEST_REQUIRE(pagemap_fd >= 0);
+
+	tmp = *page0;
+	asm volatile("" : "+r" (tmp));
+	TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1);
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+	/* Verify that we get the shared zeropage after VM creation. */
+	tmp = *page1;
+	asm volatile("" : "+r" (tmp));
+	ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1,
+			 "Shared zeropages should be enabled\n");
+
+	/*
+	 * Let our VM execute a storage key instruction that should
+	 * unshare all shared zeropages.
+	 */
+	vcpu_run(vcpu);
+	get_ucall(vcpu, &uc);
+	TEST_ASSERT_EQ(uc.cmd, UCALL_DONE);
+
+	/* Verify that we don't have a shared zeropage anymore. */
+	ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1),
+			 "Shared zeropage should be gone\n");
+
+	/* Verify that we don't get any new shared zeropages. */
+	tmp = *page2;
+	asm volatile("" : "+r" (tmp));
+	ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2),
+			 "Shared zeropages should be disabled\n");
+
+	kvm_vm_free(vm);
+
+	ksft_finished();
+}
-- 
cgit v1.2.3


From 9d274c19a71b3a276949933859610721a453946b Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Fri, 24 May 2024 13:58:11 -0700
Subject: btrfs: fix crash on racing fsync and size-extending write into
 prealloc

We have been seeing crashes on duplicate keys in
btrfs_set_item_key_safe():

  BTRFS critical (device vdb): slot 4 key (450 108 8192) new key (450 108 8192)
  ------------[ cut here ]------------
  kernel BUG at fs/btrfs/ctree.c:2620!
  invalid opcode: 0000 [#1] PREEMPT SMP PTI
  CPU: 0 PID: 3139 Comm: xfs_io Kdump: loaded Not tainted 6.9.0 #6
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-2.fc40 04/01/2014
  RIP: 0010:btrfs_set_item_key_safe+0x11f/0x290 [btrfs]

With the following stack trace:

  #0  btrfs_set_item_key_safe (fs/btrfs/ctree.c:2620:4)
  #1  btrfs_drop_extents (fs/btrfs/file.c:411:4)
  #2  log_one_extent (fs/btrfs/tree-log.c:4732:9)
  #3  btrfs_log_changed_extents (fs/btrfs/tree-log.c:4955:9)
  #4  btrfs_log_inode (fs/btrfs/tree-log.c:6626:9)
  #5  btrfs_log_inode_parent (fs/btrfs/tree-log.c:7070:8)
  #6  btrfs_log_dentry_safe (fs/btrfs/tree-log.c:7171:8)
  #7  btrfs_sync_file (fs/btrfs/file.c:1933:8)
  #8  vfs_fsync_range (fs/sync.c:188:9)
  #9  vfs_fsync (fs/sync.c:202:9)
  #10 do_fsync (fs/sync.c:212:9)
  #11 __do_sys_fdatasync (fs/sync.c:225:9)
  #12 __se_sys_fdatasync (fs/sync.c:223:1)
  #13 __x64_sys_fdatasync (fs/sync.c:223:1)
  #14 do_syscall_x64 (arch/x86/entry/common.c:52:14)
  #15 do_syscall_64 (arch/x86/entry/common.c:83:7)
  #16 entry_SYSCALL_64+0xaf/0x14c (arch/x86/entry/entry_64.S:121)

So we're logging a changed extent from fsync, which is splitting an
extent in the log tree. But this split part already exists in the tree,
triggering the BUG().

This is the state of the log tree at the time of the crash, dumped with
drgn (https://github.com/osandov/drgn/blob/main/contrib/btrfs_tree.py)
to get more details than btrfs_print_leaf() gives us:

  >>> print_extent_buffer(prog.crashed_thread().stack_trace()[0]["eb"])
  leaf 33439744 level 0 items 72 generation 9 owner 18446744073709551610
  leaf 33439744 flags 0x100000000000000
  fs uuid e5bd3946-400c-4223-8923-190ef1f18677
  chunk uuid d58cb17e-6d02-494a-829a-18b7d8a399da
          item 0 key (450 INODE_ITEM 0) itemoff 16123 itemsize 160
                  generation 7 transid 9 size 8192 nbytes 8473563889606862198
                  block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0
                  sequence 204 flags 0x10(PREALLOC)
                  atime 1716417703.220000000 (2024-05-22 15:41:43)
                  ctime 1716417704.983333333 (2024-05-22 15:41:44)
                  mtime 1716417704.983333333 (2024-05-22 15:41:44)
                  otime 17592186044416.000000000 (559444-03-08 01:40:16)
          item 1 key (450 INODE_REF 256) itemoff 16110 itemsize 13
                  index 195 namelen 3 name: 193
          item 2 key (450 XATTR_ITEM 1640047104) itemoff 16073 itemsize 37
                  location key (0 UNKNOWN.0 0) type XATTR
                  transid 7 data_len 1 name_len 6
                  name: user.a
                  data a
          item 3 key (450 EXTENT_DATA 0) itemoff 16020 itemsize 53
                  generation 9 type 1 (regular)
                  extent data disk byte 303144960 nr 12288
                  extent data offset 0 nr 4096 ram 12288
                  extent compression 0 (none)
          item 4 key (450 EXTENT_DATA 4096) itemoff 15967 itemsize 53
                  generation 9 type 2 (prealloc)
                  prealloc data disk byte 303144960 nr 12288
                  prealloc data offset 4096 nr 8192
          item 5 key (450 EXTENT_DATA 8192) itemoff 15914 itemsize 53
                  generation 9 type 2 (prealloc)
                  prealloc data disk byte 303144960 nr 12288
                  prealloc data offset 8192 nr 4096
  ...

So the real problem happened earlier: notice that items 4 (4k-12k) and 5
(8k-12k) overlap. Both are prealloc extents. Item 4 straddles i_size and
item 5 starts at i_size.

Here is the state of the filesystem tree at the time of the crash:

  >>> root = prog.crashed_thread().stack_trace()[2]["inode"].root
  >>> ret, nodes, slots = btrfs_search_slot(root, BtrfsKey(450, 0, 0))
  >>> print_extent_buffer(nodes[0])
  leaf 30425088 level 0 items 184 generation 9 owner 5
  leaf 30425088 flags 0x100000000000000
  fs uuid e5bd3946-400c-4223-8923-190ef1f18677
  chunk uuid d58cb17e-6d02-494a-829a-18b7d8a399da
  	...
          item 179 key (450 INODE_ITEM 0) itemoff 4907 itemsize 160
                  generation 7 transid 7 size 4096 nbytes 12288
                  block group 0 mode 100600 links 1 uid 0 gid 0 rdev 0
                  sequence 6 flags 0x10(PREALLOC)
                  atime 1716417703.220000000 (2024-05-22 15:41:43)
                  ctime 1716417703.220000000 (2024-05-22 15:41:43)
                  mtime 1716417703.220000000 (2024-05-22 15:41:43)
                  otime 1716417703.220000000 (2024-05-22 15:41:43)
          item 180 key (450 INODE_REF 256) itemoff 4894 itemsize 13
                  index 195 namelen 3 name: 193
          item 181 key (450 XATTR_ITEM 1640047104) itemoff 4857 itemsize 37
                  location key (0 UNKNOWN.0 0) type XATTR
                  transid 7 data_len 1 name_len 6
                  name: user.a
                  data a
          item 182 key (450 EXTENT_DATA 0) itemoff 4804 itemsize 53
                  generation 9 type 1 (regular)
                  extent data disk byte 303144960 nr 12288
                  extent data offset 0 nr 8192 ram 12288
                  extent compression 0 (none)
          item 183 key (450 EXTENT_DATA 8192) itemoff 4751 itemsize 53
                  generation 9 type 2 (prealloc)
                  prealloc data disk byte 303144960 nr 12288
                  prealloc data offset 8192 nr 4096

Item 5 in the log tree corresponds to item 183 in the filesystem tree,
but nothing matches item 4. Furthermore, item 183 is the last item in
the leaf.

btrfs_log_prealloc_extents() is responsible for logging prealloc extents
beyond i_size. It first truncates any previously logged prealloc extents
that start beyond i_size. Then, it walks the filesystem tree and copies
the prealloc extent items to the log tree.

If it hits the end of a leaf, then it calls btrfs_next_leaf(), which
unlocks the tree and does another search. However, while the filesystem
tree is unlocked, an ordered extent completion may modify the tree. In
particular, it may insert an extent item that overlaps with an extent
item that was already copied to the log tree.

This may manifest in several ways depending on the exact scenario,
including an EEXIST error that is silently translated to a full sync,
overlapping items in the log tree, or this crash. This particular crash
is triggered by the following sequence of events:

- Initially, the file has i_size=4k, a regular extent from 0-4k, and a
  prealloc extent beyond i_size from 4k-12k. The prealloc extent item is
  the last item in its B-tree leaf.
- The file is fsync'd, which copies its inode item and both extent items
  to the log tree.
- An xattr is set on the file, which sets the
  BTRFS_INODE_COPY_EVERYTHING flag.
- The range 4k-8k in the file is written using direct I/O. i_size is
  extended to 8k, but the ordered extent is still in flight.
- The file is fsync'd. Since BTRFS_INODE_COPY_EVERYTHING is set, this
  calls copy_inode_items_to_log(), which calls
  btrfs_log_prealloc_extents().
- btrfs_log_prealloc_extents() finds the 4k-12k prealloc extent in the
  filesystem tree. Since it starts before i_size, it skips it. Since it
  is the last item in its B-tree leaf, it calls btrfs_next_leaf().
- btrfs_next_leaf() unlocks the path.
- The ordered extent completion runs, which converts the 4k-8k part of
  the prealloc extent to written and inserts the remaining prealloc part
  from 8k-12k.
- btrfs_next_leaf() does a search and finds the new prealloc extent
  8k-12k.
- btrfs_log_prealloc_extents() copies the 8k-12k prealloc extent into
  the log tree. Note that it overlaps with the 4k-12k prealloc extent
  that was copied to the log tree by the first fsync.
- fsync calls btrfs_log_changed_extents(), which tries to log the 4k-8k
  extent that was written.
- This tries to drop the range 4k-8k in the log tree, which requires
  adjusting the start of the 4k-12k prealloc extent in the log tree to
  8k.
- btrfs_set_item_key_safe() sees that there is already an extent
  starting at 8k in the log tree and calls BUG().

Fix this by detecting when we're about to insert an overlapping file
extent item in the log tree and truncating the part that would overlap.

CC: stable@vger.kernel.org # 6.1+
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 5146387b416b..26a2e5aa08e9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4860,18 +4860,23 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
 			path->slots[0]++;
 			continue;
 		}
-		if (!dropped_extents) {
-			/*
-			 * Avoid logging extent items logged in past fsync calls
-			 * and leading to duplicate keys in the log tree.
-			 */
+		/*
+		 * Avoid overlapping items in the log tree. The first time we
+		 * get here, get rid of everything from a past fsync. After
+		 * that, if the current extent starts before the end of the last
+		 * extent we copied, truncate the last one. This can happen if
+		 * an ordered extent completion modifies the subvolume tree
+		 * while btrfs_next_leaf() has the tree unlocked.
+		 */
+		if (!dropped_extents || key.offset < truncate_offset) {
 			ret = truncate_inode_items(trans, root->log_root, inode,
-						   truncate_offset,
+						   min(key.offset, truncate_offset),
 						   BTRFS_EXTENT_DATA_KEY);
 			if (ret)
 				goto out;
 			dropped_extents = true;
 		}
+		truncate_offset = btrfs_file_extent_end(path);
 		if (ins_nr == 0)
 			start_slot = slot;
 		ins_nr++;
-- 
cgit v1.2.3


From fb33eb2ef0d88e75564983ef057b44c5b7e4fded Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 3 Jun 2024 12:49:08 +0100
Subject: btrfs: fix leak of qgroup extent records after transaction abort

Qgroup extent records are created when delayed ref heads are created and
then released after accounting extents at btrfs_qgroup_account_extents(),
called during the transaction commit path.

If a transaction is aborted we free the qgroup records by calling
btrfs_qgroup_destroy_extent_records() at btrfs_destroy_delayed_refs(),
unless we don't have delayed references. We are incorrectly assuming
that no delayed references means we don't have qgroup extents records.

We can currently have no delayed references because we ran them all
during a transaction commit and the transaction was aborted after that
due to some error in the commit path.

So fix this by ensuring we btrfs_qgroup_destroy_extent_records() at
btrfs_destroy_delayed_refs() even if we don't have any delayed references.

Reported-by: syzbot+0fecc032fa134afd49df@syzkaller.appspotmail.com
Link: https://lore.kernel.org/linux-btrfs/0000000000004e7f980619f91835@google.com/
Fixes: 81f7eb00ff5b ("btrfs: destroy qgroup extent records on transaction abort")
CC: stable@vger.kernel.org # 6.1+
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a91a8056758a..242ada7e47b4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4538,18 +4538,10 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 				       struct btrfs_fs_info *fs_info)
 {
 	struct rb_node *node;
-	struct btrfs_delayed_ref_root *delayed_refs;
+	struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
 	struct btrfs_delayed_ref_node *ref;
 
-	delayed_refs = &trans->delayed_refs;
-
 	spin_lock(&delayed_refs->lock);
-	if (atomic_read(&delayed_refs->num_entries) == 0) {
-		spin_unlock(&delayed_refs->lock);
-		btrfs_debug(fs_info, "delayed_refs has NO entry");
-		return;
-	}
-
 	while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
 		struct btrfs_delayed_ref_head *head;
 		struct rb_node *n;
-- 
cgit v1.2.3


From 267cace556e8a53d703119f7435ab556209e5b6a Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Sun, 26 May 2024 07:59:08 -0500
Subject: drm/amd: Fix shutdown (again) on some SMU v13.0.4/11 platforms

commit cd94d1b182d2 ("dm/amd/pm: Fix problems with reboot/shutdown for
some SMU 13.0.4/13.0.11 users") attempted to fix shutdown issues
that were reported since commit 31729e8c21ec ("drm/amd/pm: fixes a
random hang in S4 for SMU v13.0.4/11") but caused issues for some
people.

Adjust the workaround flow to properly only apply in the S4 case:
-> For shutdown go through SMU_MSG_PrepareMp1ForUnload
-> For S4 go through SMU_MSG_GfxDeviceDriverReset and
   SMU_MSG_PrepareMp1ForUnload

Reported-and-tested-by: lectrode <electrodexsnet@gmail.com>
Closes: https://github.com/void-linux/void-packages/issues/50417
Cc: stable@vger.kernel.org
Fixes: cd94d1b182d2 ("dm/amd/pm: Fix problems with reboot/shutdown for some SMU 13.0.4/13.0.11 users")
Reviewed-by: Tim Huang <Tim.Huang@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index bc241b593db1..b6257f34a7c6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -226,15 +226,17 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en)
 	struct amdgpu_device *adev = smu->adev;
 	int ret = 0;
 
-	if (!en && adev->in_s4) {
-		/* Adds a GFX reset as workaround just before sending the
-		 * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering
-		 * an invalid state.
-		 */
-		ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
-						      SMU_RESET_MODE_2, NULL);
-		if (ret)
-			return ret;
+	if (!en && !adev->in_s0ix) {
+		if (adev->in_s4) {
+			/* Adds a GFX reset as workaround just before sending the
+			 * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering
+			 * an invalid state.
+			 */
+			ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
+							      SMU_RESET_MODE_2, NULL);
+			if (ret)
+				return ret;
+		}
 
 		ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
 	}
-- 
cgit v1.2.3


From c6c4dd54012551cce5cde408b35468f2c62b0cce Mon Sep 17 00:00:00 2001
From: Tasos Sahanidis <tasos@tasossah.com>
Date: Fri, 31 May 2024 19:23:30 +0300
Subject: drm/amdgpu/pptable: Fix UBSAN array-index-out-of-bounds

Flexible arrays used [1] instead of []. Replace the former with the latter
to resolve multiple UBSAN warnings observed on boot with a BONAIRE card.

In addition, use the __counted_by attribute where possible to hint the
length of the arrays to the compiler and any sanitizers.

Signed-off-by: Tasos Sahanidis <tasos@tasossah.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/include/pptable.h | 91 +++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/pptable.h b/drivers/gpu/drm/amd/include/pptable.h
index 2e8e6c9875f6..f83ace2d7ec3 100644
--- a/drivers/gpu/drm/amd/include/pptable.h
+++ b/drivers/gpu/drm/amd/include/pptable.h
@@ -477,31 +477,30 @@ typedef struct _ATOM_PPLIB_STATE_V2
 } ATOM_PPLIB_STATE_V2;
 
 typedef struct _StateArray{
-    //how many states we have 
-    UCHAR ucNumEntries;
-    
-    ATOM_PPLIB_STATE_V2 states[1];
+	//how many states we have
+	UCHAR ucNumEntries;
+
+	ATOM_PPLIB_STATE_V2 states[] /* __counted_by(ucNumEntries) */;
 }StateArray;
 
 
 typedef struct _ClockInfoArray{
-    //how many clock levels we have
-    UCHAR ucNumEntries;
-    
-    //sizeof(ATOM_PPLIB_CLOCK_INFO)
-    UCHAR ucEntrySize;
-    
-    UCHAR clockInfo[];
+	//how many clock levels we have
+	UCHAR ucNumEntries;
+
+	//sizeof(ATOM_PPLIB_CLOCK_INFO)
+	UCHAR ucEntrySize;
+
+	UCHAR clockInfo[];
 }ClockInfoArray;
 
 typedef struct _NonClockInfoArray{
+	//how many non-clock levels we have. normally should be same as number of states
+	UCHAR ucNumEntries;
+	//sizeof(ATOM_PPLIB_NONCLOCK_INFO)
+	UCHAR ucEntrySize;
 
-    //how many non-clock levels we have. normally should be same as number of states
-    UCHAR ucNumEntries;
-    //sizeof(ATOM_PPLIB_NONCLOCK_INFO)
-    UCHAR ucEntrySize;
-    
-    ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[];
+	ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[] __counted_by(ucNumEntries);
 }NonClockInfoArray;
 
 typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record
@@ -513,8 +512,10 @@ typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record
 
 typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Table
 {
-    UCHAR ucNumEntries;                                                // Number of entries.
-    ATOM_PPLIB_Clock_Voltage_Dependency_Record entries[1];             // Dynamically allocate entries.
+	// Number of entries.
+	UCHAR ucNumEntries;
+	// Dynamically allocate entries.
+	ATOM_PPLIB_Clock_Voltage_Dependency_Record entries[] __counted_by(ucNumEntries);
 }ATOM_PPLIB_Clock_Voltage_Dependency_Table;
 
 typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Record
@@ -529,8 +530,10 @@ typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Record
 
 typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Table
 {
-    UCHAR ucNumEntries;                                                // Number of entries.
-    ATOM_PPLIB_Clock_Voltage_Limit_Record entries[1];                  // Dynamically allocate entries.
+	// Number of entries.
+	UCHAR ucNumEntries;
+	// Dynamically allocate entries.
+	ATOM_PPLIB_Clock_Voltage_Limit_Record entries[] __counted_by(ucNumEntries);
 }ATOM_PPLIB_Clock_Voltage_Limit_Table;
 
 union _ATOM_PPLIB_CAC_Leakage_Record
@@ -553,8 +556,10 @@ typedef union _ATOM_PPLIB_CAC_Leakage_Record ATOM_PPLIB_CAC_Leakage_Record;
 
 typedef struct _ATOM_PPLIB_CAC_Leakage_Table
 {
-    UCHAR ucNumEntries;                                                 // Number of entries.
-    ATOM_PPLIB_CAC_Leakage_Record entries[1];                           // Dynamically allocate entries.
+	// Number of entries.
+	UCHAR ucNumEntries;
+	// Dynamically allocate entries.
+	ATOM_PPLIB_CAC_Leakage_Record entries[] __counted_by(ucNumEntries);
 }ATOM_PPLIB_CAC_Leakage_Table;
 
 typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Record
@@ -568,8 +573,10 @@ typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Record
 
 typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Table
 {
-    UCHAR ucNumEntries;                                                 // Number of entries.
-    ATOM_PPLIB_PhaseSheddingLimits_Record entries[1];                   // Dynamically allocate entries.
+	// Number of entries.
+	UCHAR ucNumEntries;
+	// Dynamically allocate entries.
+	ATOM_PPLIB_PhaseSheddingLimits_Record entries[] __counted_by(ucNumEntries);
 }ATOM_PPLIB_PhaseSheddingLimits_Table;
 
 typedef struct _VCEClockInfo{
@@ -580,8 +587,8 @@ typedef struct _VCEClockInfo{
 }VCEClockInfo;
 
 typedef struct _VCEClockInfoArray{
-    UCHAR ucNumEntries;
-    VCEClockInfo entries[1];
+	UCHAR ucNumEntries;
+	VCEClockInfo entries[] __counted_by(ucNumEntries);
 }VCEClockInfoArray;
 
 typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record
@@ -592,8 +599,8 @@ typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record
 
 typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table
 {
-    UCHAR numEntries;
-    ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record entries[1];
+	UCHAR numEntries;
+	ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record entries[] __counted_by(numEntries);
 }ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table;
 
 typedef struct _ATOM_PPLIB_VCE_State_Record
@@ -604,8 +611,8 @@ typedef struct _ATOM_PPLIB_VCE_State_Record
 
 typedef struct _ATOM_PPLIB_VCE_State_Table
 {
-    UCHAR numEntries;
-    ATOM_PPLIB_VCE_State_Record entries[1];
+	UCHAR numEntries;
+	ATOM_PPLIB_VCE_State_Record entries[] __counted_by(numEntries);
 }ATOM_PPLIB_VCE_State_Table;
 
 
@@ -626,8 +633,8 @@ typedef struct _UVDClockInfo{
 }UVDClockInfo;
 
 typedef struct _UVDClockInfoArray{
-    UCHAR ucNumEntries;
-    UVDClockInfo entries[1];
+	UCHAR ucNumEntries;
+	UVDClockInfo entries[] __counted_by(ucNumEntries);
 }UVDClockInfoArray;
 
 typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record
@@ -638,8 +645,8 @@ typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record
 
 typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table
 {
-    UCHAR numEntries;
-    ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record entries[1];
+	UCHAR numEntries;
+	ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record entries[] __counted_by(numEntries);
 }ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table;
 
 typedef struct _ATOM_PPLIB_UVD_Table
@@ -657,8 +664,8 @@ typedef struct _ATOM_PPLIB_SAMClk_Voltage_Limit_Record
 }ATOM_PPLIB_SAMClk_Voltage_Limit_Record;
 
 typedef struct _ATOM_PPLIB_SAMClk_Voltage_Limit_Table{
-    UCHAR numEntries;
-    ATOM_PPLIB_SAMClk_Voltage_Limit_Record entries[];
+	UCHAR numEntries;
+	ATOM_PPLIB_SAMClk_Voltage_Limit_Record entries[] __counted_by(numEntries);
 }ATOM_PPLIB_SAMClk_Voltage_Limit_Table;
 
 typedef struct _ATOM_PPLIB_SAMU_Table
@@ -675,8 +682,8 @@ typedef struct _ATOM_PPLIB_ACPClk_Voltage_Limit_Record
 }ATOM_PPLIB_ACPClk_Voltage_Limit_Record;
 
 typedef struct _ATOM_PPLIB_ACPClk_Voltage_Limit_Table{
-    UCHAR numEntries;
-    ATOM_PPLIB_ACPClk_Voltage_Limit_Record entries[1];
+	UCHAR numEntries;
+	ATOM_PPLIB_ACPClk_Voltage_Limit_Record entries[] __counted_by(numEntries);
 }ATOM_PPLIB_ACPClk_Voltage_Limit_Table;
 
 typedef struct _ATOM_PPLIB_ACP_Table
@@ -743,9 +750,9 @@ typedef struct ATOM_PPLIB_VQ_Budgeting_Record{
 } ATOM_PPLIB_VQ_Budgeting_Record;
 
 typedef struct ATOM_PPLIB_VQ_Budgeting_Table {
-    UCHAR revid;
-    UCHAR numEntries;
-    ATOM_PPLIB_VQ_Budgeting_Record         entries[1];
+	UCHAR revid;
+	UCHAR numEntries;
+	ATOM_PPLIB_VQ_Budgeting_Record entries[] __counted_by(numEntries);
 } ATOM_PPLIB_VQ_Budgeting_Table;
 
 #pragma pack()
-- 
cgit v1.2.3


From c462ecd659b5fce731f1d592285832fd6ad54053 Mon Sep 17 00:00:00 2001
From: Andreas Hindborg <a.hindborg@samsung.com>
Date: Mon, 3 Jun 2024 21:26:45 +0200
Subject: null_blk: fix validation of block size

Block size should be between 512 and PAGE_SIZE and be a power of 2. The current
check does not validate this, so update the check.

Without this patch, null_blk would Oops due to a null pointer deref when
loaded with bs=1536 [1].

Link: https://lore.kernel.org/all/87wmn8mocd.fsf@metaspace.dk/

Signed-off-by: Andreas Hindborg <a.hindborg@samsung.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20240603192645.977968-1-nmi@metaspace.dk
[axboe: remove unnecessary braces and != 0 check]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 631dca2e4e84..75f189e42f88 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1824,8 +1824,8 @@ static int null_validate_conf(struct nullb_device *dev)
 		dev->queue_mode = NULL_Q_MQ;
 	}
 
-	dev->blocksize = round_down(dev->blocksize, 512);
-	dev->blocksize = clamp_t(unsigned int, dev->blocksize, 512, 4096);
+	if (blk_validate_block_size(dev->blocksize))
+		return -EINVAL;
 
 	if (dev->use_per_node_hctx) {
 		if (dev->submit_queues != nr_online_nodes)
-- 
cgit v1.2.3


From c625dabbf1c4a8e77e4734014f2fde7aa9071a1f Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Mon, 3 Apr 2023 16:42:44 +0000
Subject: x86/amd_nb: Check for invalid SMN reads

AMD Zen-based systems use a System Management Network (SMN) that
provides access to implementation-specific registers.

SMN accesses are done indirectly through an index/data pair in PCI
config space. The PCI config access may fail and return an error code.
This would prevent the "read" value from being updated.

However, the PCI config access may succeed, but the return value may be
invalid. This is in similar fashion to PCI bad reads, i.e. return all
bits set.

Most systems will return 0 for SMN addresses that are not accessible.
This is in line with AMD convention that unavailable registers are
Read-as-Zero/Writes-Ignored.

However, some systems will return a "PCI Error Response" instead. This
value, along with an error code of 0 from the PCI config access, will
confuse callers of the amd_smn_read() function.

Check for this condition, clear the return value, and set a proper error
code.

Fixes: ddfe43cdc0da ("x86/amd_nb: Add SMN and Indirect Data Fabric access for AMD Fam17h")
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20230403164244.471141-1-yazen.ghannam@amd.com
---
 arch/x86/kernel/amd_nb.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 3cf156f70859..027a8c7a2c9e 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -215,7 +215,14 @@ out:
 
 int amd_smn_read(u16 node, u32 address, u32 *value)
 {
-	return __amd_smn_rw(node, address, value, false);
+	int err = __amd_smn_rw(node, address, value, false);
+
+	if (PCI_POSSIBLE_ERROR(*value)) {
+		err = -ENODEV;
+		*value = 0;
+	}
+
+	return err;
 }
 EXPORT_SYMBOL_GPL(amd_smn_read);
 
-- 
cgit v1.2.3


From 229bedbf62b13af5aba6525ad10b62ad38d9ccb5 Mon Sep 17 00:00:00 2001
From: Aleksandr Mishin <amishin@t-argos.ru>
Date: Tue, 4 Jun 2024 13:05:52 +0300
Subject: net/mlx5: Fix tainted pointer delete is case of flow rules creation
 fail

In case of flow rule creation fail in mlx5_lag_create_port_sel_table(),
instead of previously created rules, the tainted pointer is deleted
deveral times.
Fix this bug by using correct flow rules pointers.

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Fixes: 352899f384d4 ("net/mlx5: Lag, use buckets in hash mode")
Signed-off-by: Aleksandr Mishin <amishin@t-argos.ru>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://lore.kernel.org/r/20240604100552.25201-1-amishin@t-argos.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
index c16b462ddedf..ab2717012b79 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -88,9 +88,13 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev,
 								      &dest, 1);
 			if (IS_ERR(lag_definer->rules[idx])) {
 				err = PTR_ERR(lag_definer->rules[idx]);
-				while (i--)
-					while (j--)
+				do {
+					while (j--) {
+						idx = i * ldev->buckets + j;
 						mlx5_del_flow_rules(lag_definer->rules[idx]);
+					}
+					j = ldev->buckets;
+				} while (i--);
 				goto destroy_fg;
 			}
 		}
-- 
cgit v1.2.3


From f921a58ae20852d188f70842431ce6519c4fdc36 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 4 Jun 2024 18:15:11 +0000
Subject: net/sched: taprio: always validate TCA_TAPRIO_ATTR_PRIOMAP

If one TCA_TAPRIO_ATTR_PRIOMAP attribute has been provided,
taprio_parse_mqprio_opt() must validate it, or userspace
can inject arbitrary data to the kernel, the second time
taprio_change() is called.

First call (with valid attributes) sets dev->num_tc
to a non zero value.

Second call (with arbitrary mqprio attributes)
returns early from taprio_parse_mqprio_opt()
and bad things can happen.

Fixes: a3d43c0d56f1 ("taprio: Add support adding an admin schedule")
Reported-by: Noam Rathaus <noamr@ssd-disclosure.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20240604181511.769870-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/sch_taprio.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 937a0c513c17..b284a06b5a75 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1176,16 +1176,13 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
 {
 	bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags);
 
-	if (!qopt && !dev->num_tc) {
-		NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
-		return -EINVAL;
-	}
-
-	/* If num_tc is already set, it means that the user already
-	 * configured the mqprio part
-	 */
-	if (dev->num_tc)
+	if (!qopt) {
+		if (!dev->num_tc) {
+			NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
+			return -EINVAL;
+		}
 		return 0;
+	}
 
 	/* taprio imposes that traffic classes map 1:n to tx queues */
 	if (qopt->num_tc > dev->num_tx_queues) {
-- 
cgit v1.2.3


From 323a359f9b077f382f4483023d096a4d316fd135 Mon Sep 17 00:00:00 2001
From: Karol Kolacinski <karol.kolacinski@intel.com>
Date: Tue, 4 Jun 2024 14:05:27 +0200
Subject: ptp: Fix error message on failed pin verification

On failed verification of PTP clock pin, error message prints channel
number instead of pin index after "pin", which is incorrect.

Fix error message by adding channel number to the message and printing
pin number instead of channel number.

Fixes: 6092315dfdec ("ptp: introduce programmable pins.")
Signed-off-by: Karol Kolacinski <karol.kolacinski@intel.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Link: https://lore.kernel.org/r/20240604120555.16643-1-karol.kolacinski@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/ptp/ptp_chardev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 7513018c9f9a..2067b0120d08 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -85,7 +85,8 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin,
 	}
 
 	if (info->verify(info, pin, func, chan)) {
-		pr_err("driver cannot use function %u on pin %u\n", func, chan);
+		pr_err("driver cannot use function %u and channel %u on pin %u\n",
+		       func, chan, pin);
 		return -EOPNOTSUPP;
 	}
 
-- 
cgit v1.2.3


From 0d648dd5c899f33154b98a6aef6e3dab0f4de613 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Thu, 23 May 2024 10:36:39 +0800
Subject: mm: drop the 'anon_' prefix for swap-out mTHP counters

The mTHP swap related counters: 'anon_swpout' and 'anon_swpout_fallback'
are confusing with an 'anon_' prefix, since the shmem can swap out
non-anonymous pages.  So drop the 'anon_' prefix to keep consistent with
the old swap counter names.

This is needed in 6.10-rcX to avoid having an inconsistent ABI out in the
field.

Link: https://lkml.kernel.org/r/7a8989c13299920d7589007a30065c3e2c19f0e0.1716431702.git.baolin.wang@linux.alibaba.com
Fixes: d0f048ac39f6 ("mm: add per-order mTHP anon_swpout and anon_swpout_fallback counters")
Fixes: 42248b9d34ea ("mm: add docs for per-order mTHP counters and transhuge_page ABI")
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Suggested-by: "Huang, Ying" <ying.huang@intel.com>
Acked-by: Barry Song <baohua@kernel.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/mm/transhuge.rst | 4 ++--
 include/linux/huge_mm.h                    | 4 ++--
 mm/huge_memory.c                           | 8 ++++----
 mm/page_io.c                               | 2 +-
 mm/vmscan.c                                | 2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 076443cc10a6..d414d3f5592a 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -467,11 +467,11 @@ anon_fault_fallback_charge
 	instead falls back to using huge pages with lower orders or
 	small pages even though the allocation was successful.
 
-anon_swpout
+swpout
 	is incremented every time a huge page is swapped out in one
 	piece without splitting.
 
-anon_swpout_fallback
+swpout_fallback
 	is incremented if a huge page has to be split before swapout.
 	Usually because failed to allocate some continuous swap space
 	for the huge page.
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index c8d3ec116e29..8c72d3786583 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -269,8 +269,8 @@ enum mthp_stat_item {
 	MTHP_STAT_ANON_FAULT_ALLOC,
 	MTHP_STAT_ANON_FAULT_FALLBACK,
 	MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
-	MTHP_STAT_ANON_SWPOUT,
-	MTHP_STAT_ANON_SWPOUT_FALLBACK,
+	MTHP_STAT_SWPOUT,
+	MTHP_STAT_SWPOUT_FALLBACK,
 	__MTHP_STAT_COUNT
 };
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 317de2afd371..89932fd0f62e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -558,15 +558,15 @@ static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
 DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC);
 DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
 DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
-DEFINE_MTHP_STAT_ATTR(anon_swpout, MTHP_STAT_ANON_SWPOUT);
-DEFINE_MTHP_STAT_ATTR(anon_swpout_fallback, MTHP_STAT_ANON_SWPOUT_FALLBACK);
+DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
+DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
 
 static struct attribute *stats_attrs[] = {
 	&anon_fault_alloc_attr.attr,
 	&anon_fault_fallback_attr.attr,
 	&anon_fault_fallback_charge_attr.attr,
-	&anon_swpout_attr.attr,
-	&anon_swpout_fallback_attr.attr,
+	&swpout_attr.attr,
+	&swpout_fallback_attr.attr,
 	NULL,
 };
 
diff --git a/mm/page_io.c b/mm/page_io.c
index 46c603dddf04..0a150c240bf4 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -217,7 +217,7 @@ static inline void count_swpout_vm_event(struct folio *folio)
 		count_memcg_folio_events(folio, THP_SWPOUT, 1);
 		count_vm_event(THP_SWPOUT);
 	}
-	count_mthp_stat(folio_order(folio), MTHP_STAT_ANON_SWPOUT);
+	count_mthp_stat(folio_order(folio), MTHP_STAT_SWPOUT);
 #endif
 	count_vm_events(PSWPOUT, folio_nr_pages(folio));
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d55e8d07ffc4..2e34de9cd0d4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1227,7 +1227,7 @@ retry:
 							THP_SWPOUT_FALLBACK, 1);
 						count_vm_event(THP_SWPOUT_FALLBACK);
 					}
-					count_mthp_stat(order, MTHP_STAT_ANON_SWPOUT_FALLBACK);
+					count_mthp_stat(order, MTHP_STAT_SWPOUT_FALLBACK);
 #endif
 					if (!add_to_swap(folio))
 						goto activate_locked_split;
-- 
cgit v1.2.3


From 94d46bf17916965e918bd2f3d2eec057f7c5578d Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Fri, 24 May 2024 08:50:48 +1200
Subject: mm: huge_mm: fix undefined reference to `mthp_stats' for
 CONFIG_SYSFS=n

if CONFIG_SYSFS is not enabled in config, we get the below error,

All errors (new ones prefixed by >>):

   s390-linux-ld: mm/memory.o: in function `count_mthp_stat':
>> include/linux/huge_mm.h:285:(.text+0x191c): undefined reference to `mthp_stats'
   s390-linux-ld: mm/huge_memory.o:(.rodata+0x10): undefined reference to `mthp_stats'

vim +285 include/linux/huge_mm.h

   279
   280  static inline void count_mthp_stat(int order, enum mthp_stat_item item)
   281  {
   282          if (order <= 0 || order > PMD_ORDER)
   283                  return;
   284
 > 285          this_cpu_inc(mthp_stats.stats[order][item]);
   286  }
   287

Link: https://lkml.kernel.org/r/20240523210045.40444-1-21cnbao@gmail.com
Fixes: ec33687c6749 ("mm: add per-order mTHP anon_fault_alloc and anon_fault_fallback counters")
Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202405231728.tCAogiSI-lkp@intel.com/
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Tested-by: Yujie Liu <yujie.liu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/huge_mm.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 8c72d3786583..2aa986a5cd1b 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -278,6 +278,7 @@ struct mthp_stat {
 	unsigned long stats[ilog2(MAX_PTRS_PER_PTE) + 1][__MTHP_STAT_COUNT];
 };
 
+#ifdef CONFIG_SYSFS
 DECLARE_PER_CPU(struct mthp_stat, mthp_stats);
 
 static inline void count_mthp_stat(int order, enum mthp_stat_item item)
@@ -287,6 +288,11 @@ static inline void count_mthp_stat(int order, enum mthp_stat_item item)
 
 	this_cpu_inc(mthp_stats.stats[order][item]);
 }
+#else
+static inline void count_mthp_stat(int order, enum mthp_stat_item item)
+{
+}
+#endif
 
 #define transparent_hugepage_use_zero_page()				\
 	(transparent_hugepage_flags &					\
-- 
cgit v1.2.3


From 6434e69814b159608a23135ca2be36024f402717 Mon Sep 17 00:00:00 2001
From: Barry Song <v-songbaohua@oppo.com>
Date: Fri, 24 May 2024 12:54:44 +1200
Subject: mm: arm64: fix the out-of-bounds issue in
 contpte_clear_young_dirty_ptes

We are passing a huge nr to __clear_young_dirty_ptes() right now.  While
we should pass the number of pages, we are actually passing CONT_PTE_SIZE.
This is causing lots of crashes of MADV_FREE, panic oops could vary
everytime.

Link: https://lkml.kernel.org/r/20240524005444.135417-1-21cnbao@gmail.com
Fixes: 89e86854fb0a ("mm/arm64: override clear_young_dirty_ptes() batch helper")
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: Lance Yang <ioworker0@gmail.com>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Chris Li <chrisl@kernel.org>
Cc: Barry Song <21cnbao@gmail.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Jeff Xie <xiehuan09@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yin Fengwei <fengwei.yin@intel.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm64/mm/contpte.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
index 9f9486de0004..a3edced29ac1 100644
--- a/arch/arm64/mm/contpte.c
+++ b/arch/arm64/mm/contpte.c
@@ -376,7 +376,7 @@ void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
 	 * clearing access/dirty for the whole block.
 	 */
 	unsigned long start = addr;
-	unsigned long end = start + nr;
+	unsigned long end = start + nr * PAGE_SIZE;
 
 	if (pte_cont(__ptep_get(ptep + nr - 1)))
 		end = ALIGN(end, CONT_PTE_SIZE);
@@ -386,7 +386,7 @@ void contpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
 		ptep = contpte_align_down(ptep);
 	}
 
-	__clear_young_dirty_ptes(vma, start, ptep, end - start, flags);
+	__clear_young_dirty_ptes(vma, start, ptep, (end - start) / PAGE_SIZE, flags);
 }
 EXPORT_SYMBOL_GPL(contpte_clear_young_dirty_ptes);
 
-- 
cgit v1.2.3


From 36eef400c2d571d05b6e41a9a61f874d4a7b82c5 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 28 May 2024 16:13:41 +0200
Subject: memcg: remove the lockdep assert from __mod_objcg_mlstate()

The assert was introduced in the commit cited below as an insurance that
the semantic is the same after the local_irq_save() has been removed and
the function has been made static.

The original requirement to disable interrupt was due the modification
of per-CPU counters which require interrupts to be disabled because the
counter update operation is not atomic and some of the counters are
updated from interrupt context.

All callers of __mod_objcg_mlstate() acquire a lock
(memcg_stock.stock_lock) which disables interrupts on !PREEMPT_RT and
the lockdep assert is satisfied. On PREEMPT_RT the interrupts are not
disabled and the assert triggers.

The safety of the counter update is already ensured by
VM_WARN_ON_IRQS_ENABLED() which is part of __mod_memcg_lruvec_state() and
does not require yet another check.

Remove the lockdep assert from __mod_objcg_mlstate().

Link: https://lkml.kernel.org/r/20240528141341.rz_rytN_@linutronix.de
Fixes: 91882c1617c1 ("memcg: simple cleanup of stats update functions")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memcontrol.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7fad15b2290c..36793e509f47 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3147,8 +3147,6 @@ static inline void __mod_objcg_mlstate(struct obj_cgroup *objcg,
 	struct mem_cgroup *memcg;
 	struct lruvec *lruvec;
 
-	lockdep_assert_irqs_disabled();
-
 	rcu_read_lock();
 	memcg = obj_cgroup_memcg(objcg);
 	lruvec = mem_cgroup_lruvec(memcg, pgdat);
-- 
cgit v1.2.3


From a4ca369ca221bb7e06c725792ac107f0e48e82e7 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Date: Thu, 30 May 2024 23:15:56 +0900
Subject: nilfs2: fix potential kernel bug due to lack of writeback flag
 waiting

Destructive writes to a block device on which nilfs2 is mounted can cause
a kernel bug in the folio/page writeback start routine or writeback end
routine (__folio_start_writeback in the log below):

 kernel BUG at mm/page-writeback.c:3070!
 Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
 ...
 RIP: 0010:__folio_start_writeback+0xbaa/0x10e0
 Code: 25 ff 0f 00 00 0f 84 18 01 00 00 e8 40 ca c6 ff e9 17 f6 ff ff
  e8 36 ca c6 ff 4c 89 f7 48 c7 c6 80 c0 12 84 e8 e7 b3 0f 00 90 <0f>
  0b e8 1f ca c6 ff 4c 89 f7 48 c7 c6 a0 c6 12 84 e8 d0 b3 0f 00
 ...
 Call Trace:
  <TASK>
  nilfs_segctor_do_construct+0x4654/0x69d0 [nilfs2]
  nilfs_segctor_construct+0x181/0x6b0 [nilfs2]
  nilfs_segctor_thread+0x548/0x11c0 [nilfs2]
  kthread+0x2f0/0x390
  ret_from_fork+0x4b/0x80
  ret_from_fork_asm+0x1a/0x30
  </TASK>

This is because when the log writer starts a writeback for segment summary
blocks or a super root block that use the backing device's page cache, it
does not wait for the ongoing folio/page writeback, resulting in an
inconsistent writeback state.

Fix this issue by waiting for ongoing writebacks when putting
folios/pages on the backing device into writeback state.

Link: https://lkml.kernel.org/r/20240530141556.4411-1-konishi.ryusuke@gmail.com
Fixes: 9ff05123e3bf ("nilfs2: segment constructor")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/nilfs2/segment.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 60d4f59f7665..6ea81f1d5094 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1652,6 +1652,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
 			if (bh->b_folio != bd_folio) {
 				if (bd_folio) {
 					folio_lock(bd_folio);
+					folio_wait_writeback(bd_folio);
 					folio_clear_dirty_for_io(bd_folio);
 					folio_start_writeback(bd_folio);
 					folio_unlock(bd_folio);
@@ -1665,6 +1666,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
 			if (bh == segbuf->sb_super_root) {
 				if (bh->b_folio != bd_folio) {
 					folio_lock(bd_folio);
+					folio_wait_writeback(bd_folio);
 					folio_clear_dirty_for_io(bd_folio);
 					folio_start_writeback(bd_folio);
 					folio_unlock(bd_folio);
@@ -1681,6 +1683,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
 	}
 	if (bd_folio) {
 		folio_lock(bd_folio);
+		folio_wait_writeback(bd_folio);
 		folio_clear_dirty_for_io(bd_folio);
 		folio_start_writeback(bd_folio);
 		folio_unlock(bd_folio);
-- 
cgit v1.2.3


From 7cc5a5d65011983952a9c62f170f5b79e24b1239 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 29 May 2024 18:18:12 -0400
Subject: mm: page_alloc: fix highatomic typing in multi-block buddies

Christoph reports a page allocator splat triggered by xfstests:

generic/176 214s ... [ 1204.507931] run fstests generic/176 at 2024-05-27 12:52:30
XFS (nvme0n1): Mounting V5 Filesystem cd936307-415f-48a3-b99d-a2d52ae1f273
XFS (nvme0n1): Ending clean mount
XFS (nvme1n1): Mounting V5 Filesystem ab3ee1a4-af62-4934-9a6a-6c2fde321850
XFS (nvme1n1): Ending clean mount
XFS (nvme1n1): Unmounting Filesystem ab3ee1a4-af62-4934-9a6a-6c2fde321850
XFS (nvme1n1): Mounting V5 Filesystem 7099b02d-9c58-4d1d-be1d-2cc472d12cd9
XFS (nvme1n1): Ending clean mount
------------[ cut here ]------------
page type is 3, passed migratetype is 1 (nr=512)
WARNING: CPU: 0 PID: 509870 at mm/page_alloc.c:645 expand+0x1c5/0x1f0
Modules linked in: i2c_i801 crc32_pclmul i2c_smbus [last unloaded: scsi_debug]
CPU: 0 PID: 509870 Comm: xfs_io Not tainted 6.10.0-rc1+ #2437
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:expand+0x1c5/0x1f0
Code: 05 16 70 bf 02 01 e8 ca fc ff ff 8b 54 24 34 44 89 e1 48 c7 c7 80 a2 28 83 48 89 c6 b8 01 00 3
RSP: 0018:ffffc90003b2b968 EFLAGS: 00010082
RAX: 0000000000000000 RBX: ffffffff83fa9480 RCX: 0000000000000000
RDX: 0000000000000005 RSI: 0000000000000027 RDI: 00000000ffffffff
RBP: 00000000001f2600 R08: 00000000fffeffff R09: 0000000000000001
R10: 0000000000000000 R11: ffffffff83676200 R12: 0000000000000009
R13: 0000000000000200 R14: 0000000000000001 R15: ffffea0007c98000
FS:  00007f72ca3d5780(0000) GS:ffff8881f9c00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f72ca1fff38 CR3: 00000001aa0c6002 CR4: 0000000000770ef0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff07f0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
 <TASK>
 ? __warn+0x7b/0x120
 ? expand+0x1c5/0x1f0
 ? report_bug+0x191/0x1c0
 ? handle_bug+0x3c/0x80
 ? exc_invalid_op+0x17/0x70
 ? asm_exc_invalid_op+0x1a/0x20
 ? expand+0x1c5/0x1f0
 ? expand+0x1c5/0x1f0
 __rmqueue_pcplist+0x3a9/0x730
 get_page_from_freelist+0x7a0/0xf00
 __alloc_pages_noprof+0x153/0x2e0
 __folio_alloc_noprof+0x10/0xa0
 __filemap_get_folio+0x16b/0x370
 iomap_write_begin+0x496/0x680

While trying to service a movable allocation (page type 1), the page
allocator runs into a two-pageblock buddy on the movable freelist whose
second block is typed as highatomic (page type 3).

This inconsistency is caused by the highatomic reservation system
operating on single pageblocks, while MAX_ORDER can be bigger than that -
in this configuration, pageblock_order is 9 while MAX_PAGE_ORDER is 10.
The test case is observed to make several adjacent order-3 requests with
__GFP_DIRECT_RECLAIM cleared, which marks the surrounding block as
highatomic.  Upon freeing, the blocks merge into an order-10 buddy.  When
the highatomic pool is drained later on, this order-10 buddy gets moved
back to the movable list, but only the first pageblock is marked movable
again.  A subsequent expand() of this buddy warns about the tail being of
a different type.

This is a long-standing bug that's surfaced by the recent block type
warnings added to the allocator.  The consequences seem mostly benign, it
just results in odd behavior: the highatomic tail blocks are not properly
drained, instead they end up on the movable list first, then go back to
the highatomic list after an alloc-free cycle.

To fix this, make the highatomic reservation code aware that
allocations/buddies can be larger than a pageblock.

While it's an old quirk, the recently added type consistency warnings seem
to be the most prominent consequence of it.  Set the Fixes: tag
accordingly to highlight this backporting dependency.

Link: https://lkml.kernel.org/r/20240530114203.GA1222079@cmpxchg.org
Fixes: e0932b6c1f94 ("mm: page_alloc: consolidate free page accounting")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reported-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Tested-by: Christoph Hellwig <hch@lst.de>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page_alloc.c | 50 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2e22ce5675ca..222299b5c0e6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1955,10 +1955,12 @@ int find_suitable_fallback(struct free_area *area, unsigned int order,
 }
 
 /*
- * Reserve a pageblock for exclusive use of high-order atomic allocations if
- * there are no empty page blocks that contain a page with a suitable order
+ * Reserve the pageblock(s) surrounding an allocation request for
+ * exclusive use of high-order atomic allocations if there are no
+ * empty page blocks that contain a page with a suitable order
  */
-static void reserve_highatomic_pageblock(struct page *page, struct zone *zone)
+static void reserve_highatomic_pageblock(struct page *page, int order,
+					 struct zone *zone)
 {
 	int mt;
 	unsigned long max_managed, flags;
@@ -1984,10 +1986,17 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone)
 	/* Yoink! */
 	mt = get_pageblock_migratetype(page);
 	/* Only reserve normal pageblocks (i.e., they can merge with others) */
-	if (migratetype_is_mergeable(mt))
-		if (move_freepages_block(zone, page, mt,
-					 MIGRATE_HIGHATOMIC) != -1)
-			zone->nr_reserved_highatomic += pageblock_nr_pages;
+	if (!migratetype_is_mergeable(mt))
+		goto out_unlock;
+
+	if (order < pageblock_order) {
+		if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1)
+			goto out_unlock;
+		zone->nr_reserved_highatomic += pageblock_nr_pages;
+	} else {
+		change_pageblock_range(page, order, MIGRATE_HIGHATOMIC);
+		zone->nr_reserved_highatomic += 1 << order;
+	}
 
 out_unlock:
 	spin_unlock_irqrestore(&zone->lock, flags);
@@ -1999,7 +2008,7 @@ out_unlock:
  * intense memory pressure but failed atomic allocations should be easier
  * to recover from than an OOM.
  *
- * If @force is true, try to unreserve a pageblock even though highatomic
+ * If @force is true, try to unreserve pageblocks even though highatomic
  * pageblock is exhausted.
  */
 static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
@@ -2041,6 +2050,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
 			 * adjust the count once.
 			 */
 			if (is_migrate_highatomic(mt)) {
+				unsigned long size;
 				/*
 				 * It should never happen but changes to
 				 * locking could inadvertently allow a per-cpu
@@ -2048,9 +2058,9 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
 				 * while unreserving so be safe and watch for
 				 * underflows.
 				 */
-				zone->nr_reserved_highatomic -= min(
-						pageblock_nr_pages,
-						zone->nr_reserved_highatomic);
+				size = max(pageblock_nr_pages, 1UL << order);
+				size = min(size, zone->nr_reserved_highatomic);
+				zone->nr_reserved_highatomic -= size;
 			}
 
 			/*
@@ -2062,11 +2072,19 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
 			 * of pageblocks that cannot be completely freed
 			 * may increase.
 			 */
-			ret = move_freepages_block(zone, page, mt,
-						   ac->migratetype);
+			if (order < pageblock_order)
+				ret = move_freepages_block(zone, page, mt,
+							   ac->migratetype);
+			else {
+				move_to_free_list(page, zone, order, mt,
+						  ac->migratetype);
+				change_pageblock_range(page, order,
+						       ac->migratetype);
+				ret = 1;
+			}
 			/*
-			 * Reserving this block already succeeded, so this should
-			 * not fail on zone boundaries.
+			 * Reserving the block(s) already succeeded,
+			 * so this should not fail on zone boundaries.
 			 */
 			WARN_ON_ONCE(ret == -1);
 			if (ret > 0) {
@@ -3406,7 +3424,7 @@ try_this_zone:
 			 * if the pageblock should be reserved for the future
 			 */
 			if (unlikely(alloc_flags & ALLOC_HIGHATOMIC))
-				reserve_highatomic_pageblock(page, zone);
+				reserve_highatomic_pageblock(page, order, zone);
 
 			return page;
 		} else {
-- 
cgit v1.2.3


From 0105eaabb27f31d9b8d340aca6fb6a3420cab30f Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Tue, 28 May 2024 09:08:38 -0700
Subject: vmalloc: check CONFIG_EXECMEM in is_vmalloc_or_module_addr()

After commit 2c9e5d4a0082 ("bpf: remove CONFIG_BPF_JIT dependency on
CONFIG_MODULES of") CONFIG_BPF_JIT does not depend on CONFIG_MODULES any
more and bpf jit also uses the [MODULES_VADDR, MODULES_END] memory region.
But is_vmalloc_or_module_addr() still checks CONFIG_MODULES, which then
returns false for a bpf jit memory region when CONFIG_MODULES is not
defined.  It leads to the following kernel BUG:

[    1.567023] ------------[ cut here ]------------
[    1.567883] kernel BUG at mm/vmalloc.c:745!
[    1.568477] Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI
[    1.569367] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.9.0+ #448
[    1.570247] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014
[    1.570786] RIP: 0010:vmalloc_to_page+0x48/0x1ec
[    1.570786] Code: 0f 00 00 e8 eb 1a 05 00 b8 37 00 00 00 48 ba fe ff ff ff ff 1f 00 00 4c 03 25 76 49 c6 02 48 c1 e0 28 48 01 e8 48 39 d0 76 02 <0f> 0b 4c 89 e7 e8 bf 1a 05 00 49 8b 04 24 48 a9 9f ff ff ff 0f 84
[    1.570786] RSP: 0018:ffff888007787960 EFLAGS: 00010212
[    1.570786] RAX: 000036ffa0000000 RBX: 0000000000000640 RCX: ffffffff8147e93c
[    1.570786] RDX: 00001ffffffffffe RSI: dffffc0000000000 RDI: ffffffff840e32c8
[    1.570786] RBP: ffffffffa0000000 R08: 0000000000000000 R09: 0000000000000000
[    1.570786] R10: ffff888007787a88 R11: ffffffff8475d8e7 R12: ffffffff83e80ff8
[    1.570786] R13: 0000000000000640 R14: 0000000000000640 R15: 0000000000000640
[    1.570786] FS:  0000000000000000(0000) GS:ffff88806cc00000(0000) knlGS:0000000000000000
[    1.570786] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    1.570786] CR2: ffff888006a01000 CR3: 0000000003e80000 CR4: 0000000000350ef0
[    1.570786] Call Trace:
[    1.570786]  <TASK>
[    1.570786]  ? __die_body+0x1b/0x58
[    1.570786]  ? die+0x31/0x4b
[    1.570786]  ? do_trap+0x9d/0x138
[    1.570786]  ? vmalloc_to_page+0x48/0x1ec
[    1.570786]  ? do_error_trap+0xcd/0x102
[    1.570786]  ? vmalloc_to_page+0x48/0x1ec
[    1.570786]  ? vmalloc_to_page+0x48/0x1ec
[    1.570786]  ? handle_invalid_op+0x2f/0x38
[    1.570786]  ? vmalloc_to_page+0x48/0x1ec
[    1.570786]  ? exc_invalid_op+0x2b/0x41
[    1.570786]  ? asm_exc_invalid_op+0x16/0x20
[    1.570786]  ? vmalloc_to_page+0x26/0x1ec
[    1.570786]  ? vmalloc_to_page+0x48/0x1ec
[    1.570786]  __text_poke+0xb6/0x458
[    1.570786]  ? __pfx_text_poke_memcpy+0x10/0x10
[    1.570786]  ? __pfx___mutex_lock+0x10/0x10
[    1.570786]  ? __pfx___text_poke+0x10/0x10
[    1.570786]  ? __pfx_get_random_u32+0x10/0x10
[    1.570786]  ? srso_return_thunk+0x5/0x5f
[    1.570786]  text_poke_copy_locked+0x70/0x84
[    1.570786]  text_poke_copy+0x32/0x4f
[    1.570786]  bpf_arch_text_copy+0xf/0x27
[    1.570786]  bpf_jit_binary_pack_finalize+0x26/0x5a
[    1.570786]  bpf_int_jit_compile+0x576/0x8ad
[    1.570786]  ? __pfx_bpf_int_jit_compile+0x10/0x10
[    1.570786]  ? srso_return_thunk+0x5/0x5f
[    1.570786]  ? __kmalloc_node_track_caller+0x2b5/0x2e0
[    1.570786]  bpf_prog_select_runtime+0x7c/0x199
[    1.570786]  bpf_prepare_filter+0x1e9/0x25b
[    1.570786]  ? __pfx_bpf_prepare_filter+0x10/0x10
[    1.570786]  ? srso_return_thunk+0x5/0x5f
[    1.570786]  ? _find_next_bit+0x29/0x7e
[    1.570786]  bpf_prog_create+0xb8/0xe0
[    1.570786]  ptp_classifier_init+0x75/0xa1
[    1.570786]  ? __pfx_ptp_classifier_init+0x10/0x10
[    1.570786]  ? srso_return_thunk+0x5/0x5f
[    1.570786]  ? register_pernet_subsys+0x36/0x42
[    1.570786]  ? srso_return_thunk+0x5/0x5f
[    1.570786]  sock_init+0x99/0xa3
[    1.570786]  ? __pfx_sock_init+0x10/0x10
[    1.570786]  do_one_initcall+0x104/0x2c4
[    1.570786]  ? __pfx_do_one_initcall+0x10/0x10
[    1.570786]  ? parameq+0x25/0x2d
[    1.570786]  ? rcu_is_watching+0x1c/0x3c
[    1.570786]  ? trace_kmalloc+0x81/0xb2
[    1.570786]  ? srso_return_thunk+0x5/0x5f
[    1.570786]  ? __kmalloc+0x29c/0x2c7
[    1.570786]  ? srso_return_thunk+0x5/0x5f
[    1.570786]  do_initcalls+0xf9/0x123
[    1.570786]  kernel_init_freeable+0x24f/0x289
[    1.570786]  ? __pfx_kernel_init+0x10/0x10
[    1.570786]  kernel_init+0x19/0x13a
[    1.570786]  ret_from_fork+0x24/0x41
[    1.570786]  ? __pfx_kernel_init+0x10/0x10
[    1.570786]  ret_from_fork_asm+0x1a/0x30
[    1.570786]  </TASK>
[    1.570819] ---[ end trace 0000000000000000 ]---
[    1.571463] RIP: 0010:vmalloc_to_page+0x48/0x1ec
[    1.572111] Code: 0f 00 00 e8 eb 1a 05 00 b8 37 00 00 00 48 ba fe ff ff ff ff 1f 00 00 4c 03 25 76 49 c6 02 48 c1 e0 28 48 01 e8 48 39 d0 76 02 <0f> 0b 4c 89 e7 e8 bf 1a 05 00 49 8b 04 24 48 a9 9f ff ff ff 0f 84
[    1.574632] RSP: 0018:ffff888007787960 EFLAGS: 00010212
[    1.575129] RAX: 000036ffa0000000 RBX: 0000000000000640 RCX: ffffffff8147e93c
[    1.576097] RDX: 00001ffffffffffe RSI: dffffc0000000000 RDI: ffffffff840e32c8
[    1.577084] RBP: ffffffffa0000000 R08: 0000000000000000 R09: 0000000000000000
[    1.578077] R10: ffff888007787a88 R11: ffffffff8475d8e7 R12: ffffffff83e80ff8
[    1.578810] R13: 0000000000000640 R14: 0000000000000640 R15: 0000000000000640
[    1.579823] FS:  0000000000000000(0000) GS:ffff88806cc00000(0000) knlGS:0000000000000000
[    1.580992] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    1.581869] CR2: ffff888006a01000 CR3: 0000000003e80000 CR4: 0000000000350ef0
[    1.582800] Kernel panic - not syncing: Fatal exception
[    1.583765] ---[ end Kernel panic - not syncing: Fatal exception ]---

Fix this by checking CONFIG_EXECMEM instead.

Link: https://lkml.kernel.org/r/20240528160838.102223-1-xiyou.wangcong@gmail.com
Fixes: 2c9e5d4a0082 ("bpf: remove CONFIG_BPF_JIT dependency on CONFIG_MODULES of")
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Acked-by: Mike Rapoport (IBM) <rppt@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/vmalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5d3aa2dc88a8..45e1506d58c3 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -722,7 +722,7 @@ int is_vmalloc_or_module_addr(const void *x)
 	 * and fall back on vmalloc() if that fails. Others
 	 * just put it in the vmalloc space.
 	 */
-#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+#if defined(CONFIG_EXECMEM) && defined(MODULES_VADDR)
 	unsigned long addr = (unsigned long)kasan_reset_tag(x);
 	if (addr >= MODULES_VADDR && addr < MODULES_END)
 		return 1;
-- 
cgit v1.2.3


From 2ef3cec44c60ae171b287db7fc2aa341586d65ba Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Tue, 28 May 2024 12:48:06 +0200
Subject: kmsan: do not wipe out origin when doing partial unpoisoning

As noticed by Brian, KMSAN should not be zeroing the origin when
unpoisoning parts of a four-byte uninitialized value, e.g.:

    char a[4];
    kmsan_unpoison_memory(a, 1);

This led to false negatives, as certain poisoned values could receive zero
origins, preventing those values from being reported.

To fix the problem, check that kmsan_internal_set_shadow_origin() writes
zero origins only to slots which have zero shadow.

Link: https://lkml.kernel.org/r/20240528104807.738758-1-glider@google.com
Fixes: f80be4571b19 ("kmsan: add KMSAN runtime core")
Signed-off-by: Alexander Potapenko <glider@google.com>
Reported-by: Brian Johannesmeyer <bjohannesmeyer@gmail.com>
  Link: https://lore.kernel.org/lkml/20240524232804.1984355-1-bjohannesmeyer@gmail.com/T/
Reviewed-by: Marco Elver <elver@google.com>
Tested-by: Brian Johannesmeyer <bjohannesmeyer@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/kmsan/core.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/mm/kmsan/core.c b/mm/kmsan/core.c
index cf2d70e9c9a5..95f859e38c53 100644
--- a/mm/kmsan/core.c
+++ b/mm/kmsan/core.c
@@ -196,8 +196,7 @@ void kmsan_internal_set_shadow_origin(void *addr, size_t size, int b,
 				      u32 origin, bool checked)
 {
 	u64 address = (u64)addr;
-	void *shadow_start;
-	u32 *origin_start;
+	u32 *shadow_start, *origin_start;
 	size_t pad = 0;
 
 	KMSAN_WARN_ON(!kmsan_metadata_is_contiguous(addr, size));
@@ -225,8 +224,16 @@ void kmsan_internal_set_shadow_origin(void *addr, size_t size, int b,
 	origin_start =
 		(u32 *)kmsan_get_metadata((void *)address, KMSAN_META_ORIGIN);
 
-	for (int i = 0; i < size / KMSAN_ORIGIN_SIZE; i++)
-		origin_start[i] = origin;
+	/*
+	 * If the new origin is non-zero, assume that the shadow byte is also non-zero,
+	 * and unconditionally overwrite the old origin slot.
+	 * If the new origin is zero, overwrite the old origin slot iff the
+	 * corresponding shadow slot is zero.
+	 */
+	for (int i = 0; i < size / KMSAN_ORIGIN_SIZE; i++) {
+		if (origin || !shadow_start[i])
+			origin_start[i] = origin;
+	}
 }
 
 struct page *kmsan_vmalloc_to_page_or_null(void *vaddr)
-- 
cgit v1.2.3


From 730cdc2c72c6905a2eda2fccbbf67dcef1206590 Mon Sep 17 00:00:00 2001
From: Chengming Zhou <chengming.zhou@linux.dev>
Date: Tue, 28 May 2024 13:15:21 +0800
Subject: mm/ksm: fix ksm_pages_scanned accounting

Patch series "mm/ksm: fix some accounting problems", v3.

We encountered some abnormal ksm_pages_scanned and ksm_zero_pages during
some random tests.

1. ksm_pages_scanned unchanged even ksmd scanning has progress.
2. ksm_zero_pages maybe -1 in some rare cases.


This patch (of 2):

During testing, I found ksm_pages_scanned is unchanged although the
scan_get_next_rmap_item() did return valid rmap_item that is not NULL.

The reason is the scan_get_next_rmap_item() will return NULL after a full
scan, so ksm_do_scan() just return without accounting of the
ksm_pages_scanned.

Fix it by just putting ksm_pages_scanned accounting in that loop, and it
will be accounted more timely if that loop would last for a long time.

Link: https://lkml.kernel.org/r/20240528-b4-ksm-counters-v3-0-34bb358fdc13@linux.dev
Link: https://lkml.kernel.org/r/20240528-b4-ksm-counters-v3-1-34bb358fdc13@linux.dev
Fixes: b348b5fe2b5f ("mm/ksm: add pages scanned metric")
Signed-off-by: Chengming Zhou <chengming.zhou@linux.dev>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: xu xin <xu.xin16@zte.com.cn>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Cc: Stefan Roesch <shr@devkernel.io>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/ksm.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 452ac8346e6e..9e99cb12d330 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2754,18 +2754,16 @@ static void ksm_do_scan(unsigned int scan_npages)
 {
 	struct ksm_rmap_item *rmap_item;
 	struct page *page;
-	unsigned int npages = scan_npages;
 
-	while (npages-- && likely(!freezing(current))) {
+	while (scan_npages-- && likely(!freezing(current))) {
 		cond_resched();
 		rmap_item = scan_get_next_rmap_item(&page);
 		if (!rmap_item)
 			return;
 		cmp_and_merge_page(page, rmap_item);
 		put_page(page);
+		ksm_pages_scanned++;
 	}
-
-	ksm_pages_scanned += scan_npages - npages;
 }
 
 static int ksmd_should_run(void)
-- 
cgit v1.2.3


From c2dc78b86e0821ecf9a9d0c35dba2618279a5bb6 Mon Sep 17 00:00:00 2001
From: Chengming Zhou <chengming.zhou@linux.dev>
Date: Tue, 28 May 2024 13:15:22 +0800
Subject: mm/ksm: fix ksm_zero_pages accounting

We normally ksm_zero_pages++ in ksmd when page is merged with zero page,
but ksm_zero_pages-- is done from page tables side, where there is no any
accessing protection of ksm_zero_pages.

So we can read very exceptional value of ksm_zero_pages in rare cases,
such as -1, which is very confusing to users.

Fix it by changing to use atomic_long_t, and the same case with the
mm->ksm_zero_pages.

Link: https://lkml.kernel.org/r/20240528-b4-ksm-counters-v3-2-34bb358fdc13@linux.dev
Fixes: e2942062e01d ("ksm: count all zero pages placed by KSM")
Fixes: 6080d19f0704 ("ksm: add ksm zero pages for each process")
Signed-off-by: Chengming Zhou <chengming.zhou@linux.dev>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Cc: Stefan Roesch <shr@devkernel.io>
Cc: xu xin <xu.xin16@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/proc/base.c           |  2 +-
 include/linux/ksm.h      | 17 ++++++++++++++---
 include/linux/mm_types.h |  2 +-
 mm/ksm.c                 | 11 +++++------
 4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 18550c071d71..72a1acd03675 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3214,7 +3214,7 @@ static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns,
 	mm = get_task_mm(task);
 	if (mm) {
 		seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items);
-		seq_printf(m, "ksm_zero_pages %lu\n", mm->ksm_zero_pages);
+		seq_printf(m, "ksm_zero_pages %ld\n", mm_ksm_zero_pages(mm));
 		seq_printf(m, "ksm_merging_pages %lu\n", mm->ksm_merging_pages);
 		seq_printf(m, "ksm_process_profit %ld\n", ksm_process_profit(mm));
 		mmput(mm);
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 52c63a9c5a9c..11690dacd986 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -33,16 +33,27 @@ void __ksm_exit(struct mm_struct *mm);
  */
 #define is_ksm_zero_pte(pte)	(is_zero_pfn(pte_pfn(pte)) && pte_dirty(pte))
 
-extern unsigned long ksm_zero_pages;
+extern atomic_long_t ksm_zero_pages;
+
+static inline void ksm_map_zero_page(struct mm_struct *mm)
+{
+	atomic_long_inc(&ksm_zero_pages);
+	atomic_long_inc(&mm->ksm_zero_pages);
+}
 
 static inline void ksm_might_unmap_zero_page(struct mm_struct *mm, pte_t pte)
 {
 	if (is_ksm_zero_pte(pte)) {
-		ksm_zero_pages--;
-		mm->ksm_zero_pages--;
+		atomic_long_dec(&ksm_zero_pages);
+		atomic_long_dec(&mm->ksm_zero_pages);
 	}
 }
 
+static inline long mm_ksm_zero_pages(struct mm_struct *mm)
+{
+	return atomic_long_read(&mm->ksm_zero_pages);
+}
+
 static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 24323c7d0bd4..af3a0256fa93 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -985,7 +985,7 @@ struct mm_struct {
 		 * Represent how many empty pages are merged with kernel zero
 		 * pages when enabling KSM use_zero_pages.
 		 */
-		unsigned long ksm_zero_pages;
+		atomic_long_t ksm_zero_pages;
 #endif /* CONFIG_KSM */
 #ifdef CONFIG_LRU_GEN_WALKS_MMU
 		struct {
diff --git a/mm/ksm.c b/mm/ksm.c
index 9e99cb12d330..34c4820e0d3d 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -296,7 +296,7 @@ static bool ksm_use_zero_pages __read_mostly;
 static bool ksm_smart_scan = true;
 
 /* The number of zero pages which is placed by KSM */
-unsigned long ksm_zero_pages;
+atomic_long_t ksm_zero_pages = ATOMIC_LONG_INIT(0);
 
 /* The number of pages that have been skipped due to "smart scanning" */
 static unsigned long ksm_pages_skipped;
@@ -1429,8 +1429,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
 		 * the dirty bit in zero page's PTE is set.
 		 */
 		newpte = pte_mkdirty(pte_mkspecial(pfn_pte(page_to_pfn(kpage), vma->vm_page_prot)));
-		ksm_zero_pages++;
-		mm->ksm_zero_pages++;
+		ksm_map_zero_page(mm);
 		/*
 		 * We're replacing an anonymous page with a zero page, which is
 		 * not anonymous. We need to do proper accounting otherwise we
@@ -3374,7 +3373,7 @@ static void wait_while_offlining(void)
 #ifdef CONFIG_PROC_FS
 long ksm_process_profit(struct mm_struct *mm)
 {
-	return (long)(mm->ksm_merging_pages + mm->ksm_zero_pages) * PAGE_SIZE -
+	return (long)(mm->ksm_merging_pages + mm_ksm_zero_pages(mm)) * PAGE_SIZE -
 		mm->ksm_rmap_items * sizeof(struct ksm_rmap_item);
 }
 #endif /* CONFIG_PROC_FS */
@@ -3663,7 +3662,7 @@ KSM_ATTR_RO(pages_skipped);
 static ssize_t ksm_zero_pages_show(struct kobject *kobj,
 				struct kobj_attribute *attr, char *buf)
 {
-	return sysfs_emit(buf, "%ld\n", ksm_zero_pages);
+	return sysfs_emit(buf, "%ld\n", atomic_long_read(&ksm_zero_pages));
 }
 KSM_ATTR_RO(ksm_zero_pages);
 
@@ -3672,7 +3671,7 @@ static ssize_t general_profit_show(struct kobject *kobj,
 {
 	long general_profit;
 
-	general_profit = (ksm_pages_sharing + ksm_zero_pages) * PAGE_SIZE -
+	general_profit = (ksm_pages_sharing + atomic_long_read(&ksm_zero_pages)) * PAGE_SIZE -
 				ksm_rmap_items * sizeof(struct ksm_rmap_item);
 
 	return sysfs_emit(buf, "%ld\n", general_profit);
-- 
cgit v1.2.3


From 8daf9c702ee7f825f0de8600abff764acfedea13 Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Tue, 28 May 2024 22:53:23 +0200
Subject: mm/hugetlb: do not call vma_add_reservation upon ENOMEM

sysbot reported a splat [1] on __unmap_hugepage_range().  This is because
vma_needs_reservation() can return -ENOMEM if
allocate_file_region_entries() fails to allocate the file_region struct
for the reservation.

Check for that and do not call vma_add_reservation() if that is the case,
otherwise region_abort() and region_del() will see that we do not have any
file_regions.

If we detect that vma_needs_reservation() returned -ENOMEM, we clear the
hugetlb_restore_reserve flag as if this reservation was still consumed, so
free_huge_folio() will not increment the resv count.

[1] https://lore.kernel.org/linux-mm/0000000000004096100617c58d54@google.com/T/#ma5983bc1ab18a54910da83416b3f89f3c7ee43aa

Link: https://lkml.kernel.org/r/20240528205323.20439-1-osalvador@suse.de
Fixes: df7a6d1f6405 ("mm/hugetlb: restore the reservation if needed")
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reported-and-tested-by: syzbot+d3fe2dc5ffe9380b714b@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/linux-mm/0000000000004096100617c58d54@google.com/
Cc: Breno Leitao <leitao@debian.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6be78e7d4f6e..f35abff8be60 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5768,8 +5768,20 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		 * do_exit() will not see it, and will keep the reservation
 		 * forever.
 		 */
-		if (adjust_reservation && vma_needs_reservation(h, vma, address))
-			vma_add_reservation(h, vma, address);
+		if (adjust_reservation) {
+			int rc = vma_needs_reservation(h, vma, address);
+
+			if (rc < 0)
+				/* Pressumably allocate_file_region_entries failed
+				 * to allocate a file_region struct. Clear
+				 * hugetlb_restore_reserve so that global reserve
+				 * count will not be incremented by free_huge_folio.
+				 * Act as if we consumed the reservation.
+				 */
+				folio_clear_hugetlb_restore_reserve(page_folio(page));
+			else if (rc)
+				vma_add_reservation(h, vma, address);
+		}
 
 		tlb_remove_page_size(tlb, page, huge_page_size(h));
 		/*
-- 
cgit v1.2.3


From 3f0c44c8c21cfa3bb6b756b939491b7a60932cd1 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Date: Mon, 27 May 2024 15:30:06 -0300
Subject: codetag: avoid race at alloc_slab_obj_exts

When CONFIG_MEM_ALLOC_PROFILING_DEBUG is enabled, the following warning
may be noticed:

[   48.299584] ------------[ cut here ]------------
[   48.300092] alloc_tag was not set
[   48.300528] WARNING: CPU: 2 PID: 1361 at include/linux/alloc_tag.h:130 alloc_tagging_slab_free_hook+0x84/0xc7
[   48.301305] Modules linked in:
[   48.301553] CPU: 2 PID: 1361 Comm: systemd-udevd Not tainted 6.10.0-rc1-00003-gac8755535862 #176
[   48.302196] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
[   48.302752] RIP: 0010:alloc_tagging_slab_free_hook+0x84/0xc7
[   48.303169] Code: 8d 1c c4 48 85 db 74 4d 48 83 3b 00 75 1e 80 3d 65 02 86 04 00 75 15 48 c7 c7 11 48 1d 85 c6 05 55 02 86 04 01 e8 64 44 a5 ff <0f> 0b 48 8b 03 48 85 c0 74 21 48 83 f8 01 74 14 48 8b 50 20 48 f7
[   48.304411] RSP: 0018:ffff8880111b7d40 EFLAGS: 00010282
[   48.304916] RAX: 0000000000000000 RBX: ffff88800fcc9008 RCX: 0000000000000000
[   48.305455] RDX: 0000000080000000 RSI: ffff888014060000 RDI: ffffed1002236f97
[   48.305979] RBP: 0000000000001100 R08: fffffbfff0aa73a1 R09: 0000000000000000
[   48.306473] R10: ffffffff814515e5 R11: 0000000000000003 R12: ffff88800fcc9000
[   48.306943] R13: ffff88800b2e5cc0 R14: ffff8880111b7d90 R15: 0000000000000000
[   48.307529] FS:  00007faf5d1908c0(0000) GS:ffff88806cf00000(0000) knlGS:0000000000000000
[   48.308223] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   48.308710] CR2: 000058fb220c9118 CR3: 00000000110cc000 CR4: 0000000000750ef0
[   48.309274] PKRU: 55555554
[   48.309804] Call Trace:
[   48.310029]  <TASK>
[   48.310290]  ? show_regs+0x84/0x8d
[   48.310722]  ? alloc_tagging_slab_free_hook+0x84/0xc7
[   48.311298]  ? __warn+0x13b/0x2ff
[   48.311580]  ? alloc_tagging_slab_free_hook+0x84/0xc7
[   48.311987]  ? report_bug+0x2ce/0x3ab
[   48.312292]  ? handle_bug+0x8c/0x107
[   48.312563]  ? exc_invalid_op+0x34/0x6f
[   48.312842]  ? asm_exc_invalid_op+0x1a/0x20
[   48.313173]  ? this_cpu_in_panic+0x1c/0x72
[   48.313503]  ? alloc_tagging_slab_free_hook+0x84/0xc7
[   48.313880]  ? putname+0x143/0x14e
[   48.314152]  kmem_cache_free+0xe9/0x214
[   48.314454]  putname+0x143/0x14e
[   48.314712]  do_unlinkat+0x413/0x45e
[   48.315001]  ? __pfx_do_unlinkat+0x10/0x10
[   48.315388]  ? __check_object_size+0x4d7/0x525
[   48.315744]  ? __sanitizer_cov_trace_pc+0x20/0x4a
[   48.316167]  ? __sanitizer_cov_trace_pc+0x20/0x4a
[   48.316757]  ? getname_flags+0x4ed/0x500
[   48.317261]  __x64_sys_unlink+0x42/0x4a
[   48.317741]  do_syscall_64+0xe2/0x149
[   48.318171]  entry_SYSCALL_64_after_hwframe+0x76/0x7e
[   48.318602] RIP: 0033:0x7faf5d8850ab
[   48.318891] Code: fd ff ff e8 27 dd 01 00 0f 1f 80 00 00 00 00 f3 0f 1e fa b8 5f 00 00 00 0f 05 c3 0f 1f 40 00 f3 0f 1e fa b8 57 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 05 c3 0f 1f 40 00 48 8b 15 41 2d 0e 00 f7 d8
[   48.320649] RSP: 002b:00007ffc44982b38 EFLAGS: 00000246 ORIG_RAX: 0000000000000057
[   48.321182] RAX: ffffffffffffffda RBX: 00005ba344a44680 RCX: 00007faf5d8850ab
[   48.321667] RDX: 0000000000000000 RSI: 00005ba344a44430 RDI: 00007ffc44982b40
[   48.322139] RBP: 00007ffc44982c00 R08: 0000000000000000 R09: 0000000000000007
[   48.322598] R10: 00005ba344a44430 R11: 0000000000000246 R12: 0000000000000000
[   48.323071] R13: 00007ffc44982b40 R14: 0000000000000000 R15: 0000000000000000
[   48.323596]  </TASK>

This is due to a race when two objects are allocated from the same slab,
which did not have an obj_exts allocated for.

In such a case, the two threads will notice the NULL obj_exts and after
one assigns slab->obj_exts, the second one will happily do the exchange if
it reads this new assigned value.

In order to avoid that, verify that the read obj_exts does not point to an
allocated obj_exts before doing the exchange.

Link: https://lkml.kernel.org/r/20240527183007.1595037-1-cascardo@igalia.com
Fixes: 09c46563ff6d ("codetag: debug: introduce OBJEXTS_ALLOC_FAIL to mark failed slab_ext allocations")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Thadeu Lima de Souza Cascardo <cascardo@igalia.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/slub.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 0809760cf789..1373ac365a46 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1952,7 +1952,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
 #ifdef CONFIG_MEMCG
 	new_exts |= MEMCG_DATA_OBJEXTS;
 #endif
-	old_exts = slab->obj_exts;
+	old_exts = READ_ONCE(slab->obj_exts);
 	handle_failed_objexts_alloc(old_exts, vec, objects);
 	if (new_slab) {
 		/*
@@ -1961,7 +1961,8 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
 		 * be simply assigned.
 		 */
 		slab->obj_exts = new_exts;
-	} else if (cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) {
+	} else if ((old_exts & ~OBJEXTS_FLAGS_MASK) ||
+		   cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) {
 		/*
 		 * If the slab is already in use, somebody can allocate and
 		 * assign slabobj_exts in parallel. In this case the existing
-- 
cgit v1.2.3


From 9415983599413f847ec9f081e9f9e5ed6cdeb342 Mon Sep 17 00:00:00 2001
From: Suren Baghdasaryan <surenb@google.com>
Date: Fri, 31 May 2024 13:53:50 -0700
Subject: mm: fix xyz_noprof functions calling profiled functions

Grepping /proc/allocinfo for "noprof" reveals several xyz_noprof
functions, which means internally they are calling profiled functions.
This should never happen as such calls move allocation charge from a
higher level location where it should be accounted for into these lower
level helpers.  Fix this by replacing profiled function calls with noprof
ones.

Link: https://lkml.kernel.org/r/20240531205350.3973009-1-surenb@google.com
Fixes: b951aaff5035 ("mm: enable page allocation tagging")
Fixes: e26d8769da6d ("mempool: hook up to memory allocation profiling")
Fixes: 88ae5fb755b0 ("mm: vmalloc: enable memory allocation profiling")
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Reviewed-by: Kees Cook <kees@kernel.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/filemap.c |  2 +-
 mm/mempool.c |  2 +-
 mm/util.c    | 10 +++++-----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 382c3d06bfb1..876cc64aadd7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1000,7 +1000,7 @@ struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
 		do {
 			cpuset_mems_cookie = read_mems_allowed_begin();
 			n = cpuset_mem_spread_node();
-			folio = __folio_alloc_node(gfp, order, n);
+			folio = __folio_alloc_node_noprof(gfp, order, n);
 		} while (!folio && read_mems_allowed_retry(cpuset_mems_cookie));
 
 		return folio;
diff --git a/mm/mempool.c b/mm/mempool.c
index 6ece63a00acf..3223337135d0 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -273,7 +273,7 @@ mempool_t *mempool_create_node_noprof(int min_nr, mempool_alloc_t *alloc_fn,
 {
 	mempool_t *pool;
 
-	pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id);
+	pool = kmalloc_node_noprof(sizeof(*pool), gfp_mask | __GFP_ZERO, node_id);
 	if (!pool)
 		return NULL;
 
diff --git a/mm/util.c b/mm/util.c
index c9e519e6811f..6c3e6710e4de 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -705,7 +705,7 @@ void *kvrealloc_noprof(const void *p, size_t oldsize, size_t newsize, gfp_t flag
 
 	if (oldsize >= newsize)
 		return (void *)p;
-	newp = kvmalloc(newsize, flags);
+	newp = kvmalloc_noprof(newsize, flags);
 	if (!newp)
 		return NULL;
 	memcpy(newp, p, oldsize);
@@ -726,7 +726,7 @@ void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags)
 
 	if (unlikely(check_mul_overflow(n, size, &bytes)))
 		return NULL;
-	return __vmalloc(bytes, flags);
+	return __vmalloc_noprof(bytes, flags);
 }
 EXPORT_SYMBOL(__vmalloc_array_noprof);
 
@@ -737,7 +737,7 @@ EXPORT_SYMBOL(__vmalloc_array_noprof);
  */
 void *vmalloc_array_noprof(size_t n, size_t size)
 {
-	return __vmalloc_array(n, size, GFP_KERNEL);
+	return __vmalloc_array_noprof(n, size, GFP_KERNEL);
 }
 EXPORT_SYMBOL(vmalloc_array_noprof);
 
@@ -749,7 +749,7 @@ EXPORT_SYMBOL(vmalloc_array_noprof);
  */
 void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags)
 {
-	return __vmalloc_array(n, size, flags | __GFP_ZERO);
+	return __vmalloc_array_noprof(n, size, flags | __GFP_ZERO);
 }
 EXPORT_SYMBOL(__vcalloc_noprof);
 
@@ -760,7 +760,7 @@ EXPORT_SYMBOL(__vcalloc_noprof);
  */
 void *vcalloc_noprof(size_t n, size_t size)
 {
-	return __vmalloc_array(n, size, GFP_KERNEL | __GFP_ZERO);
+	return __vmalloc_array_noprof(n, size, GFP_KERNEL | __GFP_ZERO);
 }
 EXPORT_SYMBOL(vcalloc_noprof);
 
-- 
cgit v1.2.3


From 7373a51e7998b508af7136530f3a997b286ce81c Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Date: Tue, 4 Jun 2024 22:42:55 +0900
Subject: nilfs2: fix nilfs_empty_dir() misjudgment and long loop on I/O errors

The error handling in nilfs_empty_dir() when a directory folio/page read
fails is incorrect, as in the old ext2 implementation, and if the
folio/page cannot be read or nilfs_check_folio() fails, it will falsely
determine the directory as empty and corrupt the file system.

In addition, since nilfs_empty_dir() does not immediately return on a
failed folio/page read, but continues to loop, this can cause a long loop
with I/O if i_size of the directory's inode is also corrupted, causing the
log writer thread to wait and hang, as reported by syzbot.

Fix these issues by making nilfs_empty_dir() immediately return a false
value (0) if it fails to get a directory folio/page.

Link: https://lkml.kernel.org/r/20240604134255.7165-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Reported-by: syzbot+c8166c541d3971bf6c87@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c8166c541d3971bf6c87
Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations")
Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/nilfs2/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index a002a44ff161..52e50b1b7f22 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -607,7 +607,7 @@ int nilfs_empty_dir(struct inode *inode)
 
 		kaddr = nilfs_get_folio(inode, i, &folio);
 		if (IS_ERR(kaddr))
-			continue;
+			return 0;
 
 		de = (struct nilfs_dir_entry *)kaddr;
 		kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1);
-- 
cgit v1.2.3


From 03e4a092be8ce3de7c1baa7ae14e68b64e3ea644 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 3 Jun 2024 14:42:30 -0700
Subject: ice: fix iteration of TLVs in Preserved Fields Area

The ice_get_pfa_module_tlv() function iterates over the Type-Length-Value
structures in the Preserved Fields Area (PFA) of the NVM. This is used by
the driver to access data such as the Part Board Assembly identifier.

The function uses simple logic to iterate over the PFA. First, the pointer
to the PFA in the NVM is read. Then the total length of the PFA is read
from the first word.

A pointer to the first TLV is initialized, and a simple loop iterates over
each TLV. The pointer is moved forward through the NVM until it exceeds the
PFA area.

The logic seems sound, but it is missing a key detail. The Preserved
Fields Area length includes one additional final word. This is documented
in the device data sheet as a dummy word which contains 0xFFFF. All NVMs
have this extra word.

If the driver tries to scan for a TLV that is not in the PFA, it will read
past the size of the PFA. It reads and interprets the last dummy word of
the PFA as a TLV with type 0xFFFF. It then reads the word following the PFA
as a length.

The PFA resides within the Shadow RAM portion of the NVM, which is
relatively small. All of its offsets are within a 16-bit size. The PFA
pointer and TLV pointer are stored by the driver as 16-bit values.

In almost all cases, the word following the PFA will be such that
interpreting it as a length will result in 16-bit arithmetic overflow. Once
overflowed, the new next_tlv value is now below the maximum offset of the
PFA. Thus, the driver will continue to iterate the data as TLVs. In the
worst case, the driver hits on a sequence of reads which loop back to
reading the same offsets in an endless loop.

To fix this, we need to correct the loop iteration check to account for
this extra word at the end of the PFA. This alone is sufficient to resolve
the known cases of this issue in the field. However, it is plausible that
an NVM could be misconfigured or have corrupt data which results in the
same kind of overflow. Protect against this by using check_add_overflow
when calculating both the maximum offset of the TLVs, and when calculating
the next_tlv offset at the end of each loop iteration. This ensures that
the driver will not get stuck in an infinite loop when scanning the PFA.

Fixes: e961b679fb0b ("ice: add board identifier info to devlink .info_get")
Co-developed-by: Paul Greenwalt <paul.greenwalt@intel.com>
Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20240603-net-2024-05-30-intel-net-fixes-v2-1-e3563aa89b0c@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/ice/ice_nvm.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index 84eab92dc03c..ea7cbdf8492d 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -440,8 +440,7 @@ int
 ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
 		       u16 module_type)
 {
-	u16 pfa_len, pfa_ptr;
-	u16 next_tlv;
+	u16 pfa_len, pfa_ptr, next_tlv, max_tlv;
 	int status;
 
 	status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr);
@@ -454,11 +453,23 @@ ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
 		ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n");
 		return status;
 	}
+
+	/* The Preserved Fields Area contains a sequence of Type-Length-Value
+	 * structures which define its contents. The PFA length includes all
+	 * of the TLVs, plus the initial length word itself, *and* one final
+	 * word at the end after all of the TLVs.
+	 */
+	if (check_add_overflow(pfa_ptr, pfa_len - 1, &max_tlv)) {
+		dev_warn(ice_hw_to_dev(hw), "PFA starts at offset %u. PFA length of %u caused 16-bit arithmetic overflow.\n",
+			 pfa_ptr, pfa_len);
+		return -EINVAL;
+	}
+
 	/* Starting with first TLV after PFA length, iterate through the list
 	 * of TLVs to find the requested one.
 	 */
 	next_tlv = pfa_ptr + 1;
-	while (next_tlv < pfa_ptr + pfa_len) {
+	while (next_tlv < max_tlv) {
 		u16 tlv_sub_module_type;
 		u16 tlv_len;
 
@@ -482,10 +493,13 @@ ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
 			}
 			return -EINVAL;
 		}
-		/* Check next TLV, i.e. current TLV pointer + length + 2 words
-		 * (for current TLV's type and length)
-		 */
-		next_tlv = next_tlv + tlv_len + 2;
+
+		if (check_add_overflow(next_tlv, 2, &next_tlv) ||
+		    check_add_overflow(next_tlv, tlv_len, &next_tlv)) {
+			dev_warn(ice_hw_to_dev(hw), "TLV of type %u and length 0x%04x caused 16-bit arithmetic overflow. The PFA starts at 0x%04x and has length of 0x%04x\n",
+				 tlv_sub_module_type, tlv_len, pfa_ptr, pfa_len);
+			return -EINVAL;
+		}
 	}
 	/* Module does not exist */
 	return -ENOENT;
-- 
cgit v1.2.3


From cfa747a66e5da34793ac08c26b814e7709613fab Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 3 Jun 2024 14:42:31 -0700
Subject: ice: fix reads from NVM Shadow RAM on E830 and E825-C devices

The ice driver reads data from the Shadow RAM portion of the NVM during
initialization, including data used to identify the NVM image and device,
such as the ETRACK ID used to populate devlink dev info fw.bundle.

Currently it is using a fixed offset defined by ICE_CSS_HEADER_LENGTH to
compute the appropriate offset. This worked fine for E810 and E822 devices
which both have CSS header length of 330 words.

Other devices, including both E825-C and E830 devices have different sizes
for their CSS header. The use of a hard coded value results in the driver
reading from the wrong block in the NVM when attempting to access the
Shadow RAM copy. This results in the driver reporting the fw.bundle as 0x0
in both the devlink dev info and ethtool -i output.

The first E830 support was introduced by commit ba20ecb1d1bb ("ice: Hook up
4 E830 devices by adding their IDs") and the first E825-C support was
introducted by commit f64e18944233 ("ice: introduce new E825C devices
family")

The NVM actually contains the CSS header length embedded in it. Remove the
hard coded value and replace it with logic to read the length from the NVM
directly. This is more resilient against all existing and future hardware,
vs looking up the expected values from a table. It ensures the driver will
read from the appropriate place when determining the ETRACK ID value used
for populating the fw.bundle_id and for reporting in ethtool -i.

The CSS header length for both the active and inactive flash bank is stored
in the ice_bank_info structure to avoid unnecessary duplicate work when
accessing multiple words of the Shadow RAM. Both banks are read in the
unlikely event that the header length is different for the NVM in the
inactive bank, rather than being different only by the overall device
family.

Fixes: ba20ecb1d1bb ("ice: Hook up 4 E830 devices by adding their IDs")
Co-developed-by: Paul Greenwalt <paul.greenwalt@intel.com>
Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20240603-net-2024-05-30-intel-net-fixes-v2-2-e3563aa89b0c@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/ice/ice_nvm.c  | 88 ++++++++++++++++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_type.h | 14 +++--
 2 files changed, 93 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index ea7cbdf8492d..59e8879ac059 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -374,11 +374,25 @@ ice_read_nvm_module(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u1
  *
  * Read the specified word from the copy of the Shadow RAM found in the
  * specified NVM module.
+ *
+ * Note that the Shadow RAM copy is always located after the CSS header, and
+ * is aligned to 64-byte (32-word) offsets.
  */
 static int
 ice_read_nvm_sr_copy(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u16 *data)
 {
-	return ice_read_nvm_module(hw, bank, ICE_NVM_SR_COPY_WORD_OFFSET + offset, data);
+	u32 sr_copy;
+
+	switch (bank) {
+	case ICE_ACTIVE_FLASH_BANK:
+		sr_copy = roundup(hw->flash.banks.active_css_hdr_len, 32);
+		break;
+	case ICE_INACTIVE_FLASH_BANK:
+		sr_copy = roundup(hw->flash.banks.inactive_css_hdr_len, 32);
+		break;
+	}
+
+	return ice_read_nvm_module(hw, bank, sr_copy + offset, data);
 }
 
 /**
@@ -1023,6 +1037,72 @@ static int ice_determine_active_flash_banks(struct ice_hw *hw)
 	return 0;
 }
 
+/**
+ * ice_get_nvm_css_hdr_len - Read the CSS header length from the NVM CSS header
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash bank
+ * @hdr_len: storage for header length in words
+ *
+ * Read the CSS header length from the NVM CSS header and add the Authentication
+ * header size, and then convert to words.
+ *
+ * Return: zero on success, or a negative error code on failure.
+ */
+static int
+ice_get_nvm_css_hdr_len(struct ice_hw *hw, enum ice_bank_select bank,
+			u32 *hdr_len)
+{
+	u16 hdr_len_l, hdr_len_h;
+	u32 hdr_len_dword;
+	int status;
+
+	status = ice_read_nvm_module(hw, bank, ICE_NVM_CSS_HDR_LEN_L,
+				     &hdr_len_l);
+	if (status)
+		return status;
+
+	status = ice_read_nvm_module(hw, bank, ICE_NVM_CSS_HDR_LEN_H,
+				     &hdr_len_h);
+	if (status)
+		return status;
+
+	/* CSS header length is in DWORD, so convert to words and add
+	 * authentication header size
+	 */
+	hdr_len_dword = hdr_len_h << 16 | hdr_len_l;
+	*hdr_len = (hdr_len_dword * 2) + ICE_NVM_AUTH_HEADER_LEN;
+
+	return 0;
+}
+
+/**
+ * ice_determine_css_hdr_len - Discover CSS header length for the device
+ * @hw: pointer to the HW struct
+ *
+ * Determine the size of the CSS header at the start of the NVM module. This
+ * is useful for locating the Shadow RAM copy in the NVM, as the Shadow RAM is
+ * always located just after the CSS header.
+ *
+ * Return: zero on success, or a negative error code on failure.
+ */
+static int ice_determine_css_hdr_len(struct ice_hw *hw)
+{
+	struct ice_bank_info *banks = &hw->flash.banks;
+	int status;
+
+	status = ice_get_nvm_css_hdr_len(hw, ICE_ACTIVE_FLASH_BANK,
+					 &banks->active_css_hdr_len);
+	if (status)
+		return status;
+
+	status = ice_get_nvm_css_hdr_len(hw, ICE_INACTIVE_FLASH_BANK,
+					 &banks->inactive_css_hdr_len);
+	if (status)
+		return status;
+
+	return 0;
+}
+
 /**
  * ice_init_nvm - initializes NVM setting
  * @hw: pointer to the HW struct
@@ -1069,6 +1149,12 @@ int ice_init_nvm(struct ice_hw *hw)
 		return status;
 	}
 
+	status = ice_determine_css_hdr_len(hw);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to determine Shadow RAM copy offsets.\n");
+		return status;
+	}
+
 	status = ice_get_nvm_ver_info(hw, ICE_ACTIVE_FLASH_BANK, &flash->nvm);
 	if (status) {
 		ice_debug(hw, ICE_DBG_INIT, "Failed to read NVM info.\n");
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index f0796a93f428..eef397e5baa0 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -482,6 +482,8 @@ struct ice_bank_info {
 	u32 orom_size;				/* Size of OROM bank */
 	u32 netlist_ptr;			/* Pointer to 1st Netlist bank */
 	u32 netlist_size;			/* Size of Netlist bank */
+	u32 active_css_hdr_len;			/* Active CSS header length */
+	u32 inactive_css_hdr_len;		/* Inactive CSS header length */
 	enum ice_flash_bank nvm_bank;		/* Active NVM bank */
 	enum ice_flash_bank orom_bank;		/* Active OROM bank */
 	enum ice_flash_bank netlist_bank;	/* Active Netlist bank */
@@ -1087,17 +1089,13 @@ struct ice_aq_get_set_rss_lut_params {
 #define ICE_SR_SECTOR_SIZE_IN_WORDS	0x800
 
 /* CSS Header words */
+#define ICE_NVM_CSS_HDR_LEN_L			0x02
+#define ICE_NVM_CSS_HDR_LEN_H			0x03
 #define ICE_NVM_CSS_SREV_L			0x14
 #define ICE_NVM_CSS_SREV_H			0x15
 
-/* Length of CSS header section in words */
-#define ICE_CSS_HEADER_LENGTH			330
-
-/* Offset of Shadow RAM copy in the NVM bank area. */
-#define ICE_NVM_SR_COPY_WORD_OFFSET		roundup(ICE_CSS_HEADER_LENGTH, 32)
-
-/* Size in bytes of Option ROM trailer */
-#define ICE_NVM_OROM_TRAILER_LENGTH		(2 * ICE_CSS_HEADER_LENGTH)
+/* Length of Authentication header section in words */
+#define ICE_NVM_AUTH_HEADER_LEN			0x08
 
 /* The Link Topology Netlist section is stored as a series of words. It is
  * stored in the NVM as a TLV, with the first two words containing the type
-- 
cgit v1.2.3


From adbf5a42341f6ea038d3626cd4437d9f0ad0b2dd Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Mon, 3 Jun 2024 14:42:32 -0700
Subject: ice: remove af_xdp_zc_qps bitmap

Referenced commit has introduced a bitmap to distinguish between ZC and
copy-mode AF_XDP queues, because xsk_get_pool_from_qid() does not do this
for us.

The bitmap would be especially useful when restoring previous state after
rebuild, if only it was not reallocated in the process. This leads to e.g.
xdpsock dying after changing number of queues.

Instead of preserving the bitmap during the rebuild, remove it completely
and distinguish between ZC and copy-mode queues based on the presence of
a device associated with the pool.

Fixes: e102db780e1c ("ice: track AF_XDP ZC enabled queues in bitmap")
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20240603-net-2024-05-30-intel-net-fixes-v2-3-e3563aa89b0c@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/ice/ice.h     | 32 +++++++++++++++++++++-----------
 drivers/net/ethernet/intel/ice/ice_lib.c |  8 --------
 drivers/net/ethernet/intel/ice/ice_xsk.c | 13 ++++++-------
 3 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 6ad8002b22e1..5e51cb84b600 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -409,7 +409,6 @@ struct ice_vsi {
 	struct ice_tc_cfg tc_cfg;
 	struct bpf_prog *xdp_prog;
 	struct ice_tx_ring **xdp_rings;	 /* XDP ring array */
-	unsigned long *af_xdp_zc_qps;	 /* tracks AF_XDP ZC enabled qps */
 	u16 num_xdp_txq;		 /* Used XDP queues */
 	u8 xdp_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
 
@@ -746,6 +745,25 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring)
 	ring->flags |= ICE_TX_FLAGS_RING_XDP;
 }
 
+/**
+ * ice_get_xp_from_qid - get ZC XSK buffer pool bound to a queue ID
+ * @vsi: pointer to VSI
+ * @qid: index of a queue to look at XSK buff pool presence
+ *
+ * Return: A pointer to xsk_buff_pool structure if there is a buffer pool
+ * attached and configured as zero-copy, NULL otherwise.
+ */
+static inline struct xsk_buff_pool *ice_get_xp_from_qid(struct ice_vsi *vsi,
+							u16 qid)
+{
+	struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid);
+
+	if (!ice_is_xdp_ena_vsi(vsi))
+		return NULL;
+
+	return (pool && pool->dev) ? pool : NULL;
+}
+
 /**
  * ice_xsk_pool - get XSK buffer pool bound to a ring
  * @ring: Rx ring to use
@@ -758,10 +776,7 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
 	struct ice_vsi *vsi = ring->vsi;
 	u16 qid = ring->q_index;
 
-	if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps))
-		return NULL;
-
-	return xsk_get_pool_from_qid(vsi->netdev, qid);
+	return ice_get_xp_from_qid(vsi, qid);
 }
 
 /**
@@ -786,12 +801,7 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid)
 	if (!ring)
 		return;
 
-	if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) {
-		ring->xsk_pool = NULL;
-		return;
-	}
-
-	ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid);
+	ring->xsk_pool = ice_get_xp_from_qid(vsi, qid);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 5371e91f6bbb..c0a7ff6c7e87 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -114,14 +114,8 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
 	if (!vsi->q_vectors)
 		goto err_vectors;
 
-	vsi->af_xdp_zc_qps = bitmap_zalloc(max_t(int, vsi->alloc_txq, vsi->alloc_rxq), GFP_KERNEL);
-	if (!vsi->af_xdp_zc_qps)
-		goto err_zc_qps;
-
 	return 0;
 
-err_zc_qps:
-	devm_kfree(dev, vsi->q_vectors);
 err_vectors:
 	devm_kfree(dev, vsi->rxq_map);
 err_rxq_map:
@@ -309,8 +303,6 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi)
 
 	dev = ice_pf_to_dev(pf);
 
-	bitmap_free(vsi->af_xdp_zc_qps);
-	vsi->af_xdp_zc_qps = NULL;
 	/* free the ring and vector containers */
 	devm_kfree(dev, vsi->q_vectors);
 	vsi->q_vectors = NULL;
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 7541f223bf4f..a65955eb23c0 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -269,7 +269,6 @@ static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
 	if (!pool)
 		return -EINVAL;
 
-	clear_bit(qid, vsi->af_xdp_zc_qps);
 	xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR);
 
 	return 0;
@@ -300,8 +299,6 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
 	if (err)
 		return err;
 
-	set_bit(qid, vsi->af_xdp_zc_qps);
-
 	return 0;
 }
 
@@ -349,11 +346,13 @@ ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present)
 int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc)
 {
 	struct ice_rx_ring *rx_ring;
-	unsigned long q;
+	uint i;
+
+	ice_for_each_rxq(vsi, i) {
+		rx_ring = vsi->rx_rings[i];
+		if (!rx_ring->xsk_pool)
+			continue;
 
-	for_each_set_bit(q, vsi->af_xdp_zc_qps,
-			 max_t(int, vsi->alloc_txq, vsi->alloc_rxq)) {
-		rx_ring = vsi->rx_rings[q];
 		if (ice_realloc_rx_xdp_bufs(rx_ring, zc))
 			return -ENOMEM;
 	}
-- 
cgit v1.2.3


From 744d197162c2070a6045a71e2666ed93a57cc65d Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Mon, 3 Jun 2024 14:42:33 -0700
Subject: ice: add flag to distinguish reset from .ndo_bpf in XDP rings config

Commit 6624e780a577 ("ice: split ice_vsi_setup into smaller functions")
has placed ice_vsi_free_q_vectors() after ice_destroy_xdp_rings() in
the rebuild process. The behaviour of the XDP rings config functions is
context-dependent, so the change of order has led to
ice_destroy_xdp_rings() doing additional work and removing XDP prog, when
it was supposed to be preserved.

Also, dependency on the PF state reset flags creates an additional,
fortunately less common problem:

* PFR is requested e.g. by tx_timeout handler
* .ndo_bpf() is asked to delete the program, calls ice_destroy_xdp_rings(),
  but reset flag is set, so rings are destroyed without deleting the
  program
* ice_vsi_rebuild tries to delete non-existent XDP rings, because the
  program is still on the VSI
* system crashes

With a similar race, when requested to attach a program,
ice_prepare_xdp_rings() can actually skip setting the program in the VSI
and nevertheless report success.

Instead of reverting to the old order of function calls, add an enum
argument to both ice_prepare_xdp_rings() and ice_destroy_xdp_rings() in
order to distinguish between calls from rebuild and .ndo_bpf().

Fixes: efc2214b6047 ("ice: Add support for XDP")
Reviewed-by: Igor Bagnucki <igor.bagnucki@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20240603-net-2024-05-30-intel-net-fixes-v2-4-e3563aa89b0c@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/ice/ice.h      | 11 +++++++++--
 drivers/net/ethernet/intel/ice/ice_lib.c  |  5 +++--
 drivers/net/ethernet/intel/ice/ice_main.c | 22 ++++++++++++----------
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 5e51cb84b600..a5de6ef9c07e 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -930,9 +930,16 @@ int ice_down(struct ice_vsi *vsi);
 int ice_down_up(struct ice_vsi *vsi);
 int ice_vsi_cfg_lan(struct ice_vsi *vsi);
 struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
+
+enum ice_xdp_cfg {
+	ICE_XDP_CFG_FULL,	/* Fully apply new config in .ndo_bpf() */
+	ICE_XDP_CFG_PART,	/* Save/use part of config in VSI rebuild */
+};
+
 int ice_vsi_determine_xdp_res(struct ice_vsi *vsi);
-int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
-int ice_destroy_xdp_rings(struct ice_vsi *vsi);
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
+			  enum ice_xdp_cfg cfg_type);
+int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type);
 int
 ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 	     u32 flags);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index c0a7ff6c7e87..dd8b374823ee 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2285,7 +2285,8 @@ static int ice_vsi_cfg_def(struct ice_vsi *vsi)
 			ret = ice_vsi_determine_xdp_res(vsi);
 			if (ret)
 				goto unroll_vector_base;
-			ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
+			ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog,
+						    ICE_XDP_CFG_PART);
 			if (ret)
 				goto unroll_vector_base;
 		}
@@ -2429,7 +2430,7 @@ void ice_vsi_decfg(struct ice_vsi *vsi)
 		/* return value check can be skipped here, it always returns
 		 * 0 if reset is in progress
 		 */
-		ice_destroy_xdp_rings(vsi);
+		ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_PART);
 
 	ice_vsi_clear_rings(vsi);
 	ice_vsi_free_q_vectors(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index f60c022f7960..2a270aacd24a 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2711,10 +2711,12 @@ static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
  * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
  * @vsi: VSI to bring up Tx rings used by XDP
  * @prog: bpf program that will be assigned to VSI
+ * @cfg_type: create from scratch or restore the existing configuration
  *
  * Return 0 on success and negative value on error
  */
-int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
+			  enum ice_xdp_cfg cfg_type)
 {
 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
 	int xdp_rings_rem = vsi->num_xdp_txq;
@@ -2790,7 +2792,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
 	 * taken into account at the end of ice_vsi_rebuild, where
 	 * ice_cfg_vsi_lan is being called
 	 */
-	if (ice_is_reset_in_progress(pf->state))
+	if (cfg_type == ICE_XDP_CFG_PART)
 		return 0;
 
 	/* tell the Tx scheduler that right now we have
@@ -2842,22 +2844,21 @@ err_map_xdp:
 /**
  * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
  * @vsi: VSI to remove XDP rings
+ * @cfg_type: disable XDP permanently or allow it to be restored later
  *
  * Detach XDP rings from irq vectors, clean up the PF bitmap and free
  * resources
  */
-int ice_destroy_xdp_rings(struct ice_vsi *vsi)
+int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type)
 {
 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
 	struct ice_pf *pf = vsi->back;
 	int i, v_idx;
 
 	/* q_vectors are freed in reset path so there's no point in detaching
-	 * rings; in case of rebuild being triggered not from reset bits
-	 * in pf->state won't be set, so additionally check first q_vector
-	 * against NULL
+	 * rings
 	 */
-	if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+	if (cfg_type == ICE_XDP_CFG_PART)
 		goto free_qmap;
 
 	ice_for_each_q_vector(vsi, v_idx) {
@@ -2898,7 +2899,7 @@ free_qmap:
 	if (static_key_enabled(&ice_xdp_locking_key))
 		static_branch_dec(&ice_xdp_locking_key);
 
-	if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+	if (cfg_type == ICE_XDP_CFG_PART)
 		return 0;
 
 	ice_vsi_assign_bpf_prog(vsi, NULL);
@@ -3009,7 +3010,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
 		if (xdp_ring_err) {
 			NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP");
 		} else {
-			xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
+			xdp_ring_err = ice_prepare_xdp_rings(vsi, prog,
+							     ICE_XDP_CFG_FULL);
 			if (xdp_ring_err)
 				NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
 		}
@@ -3020,7 +3022,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
 			NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed");
 	} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
 		xdp_features_clear_redirect_target(vsi->netdev);
-		xdp_ring_err = ice_destroy_xdp_rings(vsi);
+		xdp_ring_err = ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_FULL);
 		if (xdp_ring_err)
 			NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
 		/* reallocate Rx queues that were used for zero-copy */
-- 
cgit v1.2.3


From f3df4044254c98128890b512bf19cc05588f1fe5 Mon Sep 17 00:00:00 2001
From: Larysa Zaremba <larysa.zaremba@intel.com>
Date: Mon, 3 Jun 2024 14:42:34 -0700
Subject: ice: map XDP queues to vectors in ice_vsi_map_rings_to_vectors()

ice_pf_dcb_recfg() re-maps queues to vectors with
ice_vsi_map_rings_to_vectors(), which does not restore the previous
state for XDP queues. This leads to no AF_XDP traffic after rebuild.

Map XDP queues to vectors in ice_vsi_map_rings_to_vectors().
Also, move the code around, so XDP queues are mapped independently only
through .ndo_bpf().

Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions")
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20240603-net-2024-05-30-intel-net-fixes-v2-5-e3563aa89b0c@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/ice/ice.h      |  1 +
 drivers/net/ethernet/intel/ice/ice_base.c |  3 +
 drivers/net/ethernet/intel/ice/ice_lib.c  | 14 ++---
 drivers/net/ethernet/intel/ice/ice_main.c | 96 ++++++++++++++++++-------------
 4 files changed, 68 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index a5de6ef9c07e..99a75a59078e 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -940,6 +940,7 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi);
 int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
 			  enum ice_xdp_cfg cfg_type);
 int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type);
+void ice_map_xdp_rings(struct ice_vsi *vsi);
 int
 ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 	     u32 flags);
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 687f6cb2b917..5d396c1a7731 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -842,6 +842,9 @@ void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
 		}
 		rx_rings_rem -= rx_rings_per_v;
 	}
+
+	if (ice_is_xdp_ena_vsi(vsi))
+		ice_map_xdp_rings(vsi);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index dd8b374823ee..7629b0190578 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2274,13 +2274,6 @@ static int ice_vsi_cfg_def(struct ice_vsi *vsi)
 		if (ret)
 			goto unroll_vector_base;
 
-		ice_vsi_map_rings_to_vectors(vsi);
-
-		/* Associate q_vector rings to napi */
-		ice_vsi_set_napi_queues(vsi);
-
-		vsi->stat_offsets_loaded = false;
-
 		if (ice_is_xdp_ena_vsi(vsi)) {
 			ret = ice_vsi_determine_xdp_res(vsi);
 			if (ret)
@@ -2291,6 +2284,13 @@ static int ice_vsi_cfg_def(struct ice_vsi *vsi)
 				goto unroll_vector_base;
 		}
 
+		ice_vsi_map_rings_to_vectors(vsi);
+
+		/* Associate q_vector rings to napi */
+		ice_vsi_set_napi_queues(vsi);
+
+		vsi->stat_offsets_loaded = false;
+
 		/* ICE_VSI_CTRL does not need RSS so skip RSS processing */
 		if (vsi->type != ICE_VSI_CTRL)
 			/* Do not exit if configuring RSS had an issue, at
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 2a270aacd24a..1b61ca3a6eb6 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2707,6 +2707,60 @@ static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
 		bpf_prog_put(old_prog);
 }
 
+static struct ice_tx_ring *ice_xdp_ring_from_qid(struct ice_vsi *vsi, int qid)
+{
+	struct ice_q_vector *q_vector;
+	struct ice_tx_ring *ring;
+
+	if (static_key_enabled(&ice_xdp_locking_key))
+		return vsi->xdp_rings[qid % vsi->num_xdp_txq];
+
+	q_vector = vsi->rx_rings[qid]->q_vector;
+	ice_for_each_tx_ring(ring, q_vector->tx)
+		if (ice_ring_is_xdp(ring))
+			return ring;
+
+	return NULL;
+}
+
+/**
+ * ice_map_xdp_rings - Map XDP rings to interrupt vectors
+ * @vsi: the VSI with XDP rings being configured
+ *
+ * Map XDP rings to interrupt vectors and perform the configuration steps
+ * dependent on the mapping.
+ */
+void ice_map_xdp_rings(struct ice_vsi *vsi)
+{
+	int xdp_rings_rem = vsi->num_xdp_txq;
+	int v_idx, q_idx;
+
+	/* follow the logic from ice_vsi_map_rings_to_vectors */
+	ice_for_each_q_vector(vsi, v_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+		int xdp_rings_per_v, q_id, q_base;
+
+		xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
+					       vsi->num_q_vectors - v_idx);
+		q_base = vsi->num_xdp_txq - xdp_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
+			struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id];
+
+			xdp_ring->q_vector = q_vector;
+			xdp_ring->next = q_vector->tx.tx_ring;
+			q_vector->tx.tx_ring = xdp_ring;
+		}
+		xdp_rings_rem -= xdp_rings_per_v;
+	}
+
+	ice_for_each_rxq(vsi, q_idx) {
+		vsi->rx_rings[q_idx]->xdp_ring = ice_xdp_ring_from_qid(vsi,
+								       q_idx);
+		ice_tx_xsk_pool(vsi, q_idx);
+	}
+}
+
 /**
  * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
  * @vsi: VSI to bring up Tx rings used by XDP
@@ -2719,7 +2773,6 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
 			  enum ice_xdp_cfg cfg_type)
 {
 	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
-	int xdp_rings_rem = vsi->num_xdp_txq;
 	struct ice_pf *pf = vsi->back;
 	struct ice_qs_cfg xdp_qs_cfg = {
 		.qs_mutex = &pf->avail_q_mutex,
@@ -2732,8 +2785,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
 		.mapping_mode = ICE_VSI_MAP_CONTIG
 	};
 	struct device *dev;
-	int i, v_idx;
-	int status;
+	int status, i;
 
 	dev = ice_pf_to_dev(pf);
 	vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq,
@@ -2752,42 +2804,6 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
 	if (ice_xdp_alloc_setup_rings(vsi))
 		goto clear_xdp_rings;
 
-	/* follow the logic from ice_vsi_map_rings_to_vectors */
-	ice_for_each_q_vector(vsi, v_idx) {
-		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
-		int xdp_rings_per_v, q_id, q_base;
-
-		xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
-					       vsi->num_q_vectors - v_idx);
-		q_base = vsi->num_xdp_txq - xdp_rings_rem;
-
-		for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
-			struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id];
-
-			xdp_ring->q_vector = q_vector;
-			xdp_ring->next = q_vector->tx.tx_ring;
-			q_vector->tx.tx_ring = xdp_ring;
-		}
-		xdp_rings_rem -= xdp_rings_per_v;
-	}
-
-	ice_for_each_rxq(vsi, i) {
-		if (static_key_enabled(&ice_xdp_locking_key)) {
-			vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
-		} else {
-			struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector;
-			struct ice_tx_ring *ring;
-
-			ice_for_each_tx_ring(ring, q_vector->tx) {
-				if (ice_ring_is_xdp(ring)) {
-					vsi->rx_rings[i]->xdp_ring = ring;
-					break;
-				}
-			}
-		}
-		ice_tx_xsk_pool(vsi, i);
-	}
-
 	/* omit the scheduler update if in reset path; XDP queues will be
 	 * taken into account at the end of ice_vsi_rebuild, where
 	 * ice_cfg_vsi_lan is being called
@@ -2795,6 +2811,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
 	if (cfg_type == ICE_XDP_CFG_PART)
 		return 0;
 
+	ice_map_xdp_rings(vsi);
+
 	/* tell the Tx scheduler that right now we have
 	 * additional queues
 	 */
-- 
cgit v1.2.3


From 7d67d11fbe194f71298263f48e33ae2afa38197e Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Mon, 3 Jun 2024 14:42:35 -0700
Subject: igc: Fix Energy Efficient Ethernet support declaration

The commit 01cf893bf0f4 ("net: intel: i40e/igc: Remove setting Autoneg in
EEE capabilities") removed SUPPORTED_Autoneg field but left inappropriate
ethtool_keee structure initialization. When "ethtool --show <device>"
(get_eee) invoke, the 'ethtool_keee' structure was accidentally overridden.
Remove the 'ethtool_keee' overriding and add EEE declaration as per IEEE
specification that allows reporting Energy Efficient Ethernet capabilities.

Examples:
Before fix:
ethtool --show-eee enp174s0
EEE settings for enp174s0:
	EEE status: not supported

After fix:
EEE settings for enp174s0:
	EEE status: disabled
	Tx LPI: disabled
	Supported EEE link modes:  100baseT/Full
	                           1000baseT/Full
	                           2500baseT/Full

Fixes: 01cf893bf0f4 ("net: intel: i40e/igc: Remove setting Autoneg in EEE capabilities")
Suggested-by: Dima Ruinskiy <dima.ruinskiy@intel.com>
Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Naama Meir <naamax.meir@linux.intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20240603-net-2024-05-30-intel-net-fixes-v2-6-e3563aa89b0c@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 9 +++++++--
 drivers/net/ethernet/intel/igc/igc_main.c    | 4 ++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index f2c4f1966bb0..0cd2bd695db1 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1629,12 +1629,17 @@ static int igc_ethtool_get_eee(struct net_device *netdev,
 	struct igc_hw *hw = &adapter->hw;
 	u32 eeer;
 
+	linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+			 edata->supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+			 edata->supported);
+	linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+			 edata->supported);
+
 	if (hw->dev_spec._base.eee_enable)
 		mii_eee_cap1_mod_linkmode_t(edata->advertised,
 					    adapter->eee_advert);
 
-	*edata = adapter->eee;
-
 	eeer = rd32(IGC_EEER);
 
 	/* EEE status on negotiated link */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 12f004f46082..305e05294a26 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -12,6 +12,7 @@
 #include <linux/bpf_trace.h>
 #include <net/xdp_sock_drv.h>
 #include <linux/pci.h>
+#include <linux/mdio.h>
 
 #include <net/ipv6.h>
 
@@ -4975,6 +4976,9 @@ void igc_up(struct igc_adapter *adapter)
 	/* start the watchdog. */
 	hw->mac.get_link_status = true;
 	schedule_work(&adapter->watchdog_task);
+
+	adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T |
+			      MDIO_EEE_2_5GT;
 }
 
 /**
-- 
cgit v1.2.3


From b0c9a26435413b81799047a7be53255640432547 Mon Sep 17 00:00:00 2001
From: Aleksandr Mishin <amishin@t-argos.ru>
Date: Tue, 4 Jun 2024 11:25:00 +0300
Subject: net: wwan: iosm: Fix tainted pointer delete is case of region
 creation fail

In case of region creation fail in ipc_devlink_create_region(), previously
created regions delete process starts from tainted pointer which actually
holds error code value.
Fix this bug by decreasing region index before delete.

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Fixes: 4dcd183fbd67 ("net: wwan: iosm: devlink registration")
Signed-off-by: Aleksandr Mishin <amishin@t-argos.ru>
Acked-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20240604082500.20769-1-amishin@t-argos.ru
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/wwan/iosm/iosm_ipc_devlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wwan/iosm/iosm_ipc_devlink.c b/drivers/net/wwan/iosm/iosm_ipc_devlink.c
index bef6819986e9..33d6342124bc 100644
--- a/drivers/net/wwan/iosm/iosm_ipc_devlink.c
+++ b/drivers/net/wwan/iosm/iosm_ipc_devlink.c
@@ -211,7 +211,7 @@ static int ipc_devlink_create_region(struct iosm_devlink *devlink)
 			rc = PTR_ERR(devlink->cd_regions[i]);
 			dev_err(devlink->dev, "Devlink region fail,err %d", rc);
 			/* Delete previously created regions */
-			for ( ; i >= 0; i--)
+			for (i--; i >= 0; i--)
 				devlink_region_destroy(devlink->cd_regions[i]);
 			goto region_create_fail;
 		}
-- 
cgit v1.2.3


From 26bfb8b57063f52b867f9b6c8d1742fcb5bd656c Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:27 -0700
Subject: af_unix: Set sk->sk_state under unix_state_lock() for truly
 disconencted peer.

When a SOCK_DGRAM socket connect()s to another socket, the both sockets'
sk->sk_state are changed to TCP_ESTABLISHED so that we can register them
to BPF SOCKMAP.

When the socket disconnects from the peer by connect(AF_UNSPEC), the state
is set back to TCP_CLOSE.

Then, the peer's state is also set to TCP_CLOSE, but the update is done
locklessly and unconditionally.

Let's say socket A connect()ed to B, B connect()ed to C, and A disconnects
from B.

After the first two connect()s, all three sockets' sk->sk_state are
TCP_ESTABLISHED:

  $ ss -xa
  Netid State  Recv-Q Send-Q  Local Address:Port  Peer Address:PortProcess
  u_dgr ESTAB  0      0       @A 641              * 642
  u_dgr ESTAB  0      0       @B 642              * 643
  u_dgr ESTAB  0      0       @C 643              * 0

And after the disconnect, B's state is TCP_CLOSE even though it's still
connected to C and C's state is TCP_ESTABLISHED.

  $ ss -xa
  Netid State  Recv-Q Send-Q  Local Address:Port  Peer Address:PortProcess
  u_dgr UNCONN 0      0       @A 641              * 0
  u_dgr UNCONN 0      0       @B 642              * 643
  u_dgr ESTAB  0      0       @C 643              * 0

In this case, we cannot register B to SOCKMAP.

So, when a socket disconnects from the peer, we should not set TCP_CLOSE to
the peer if the peer is connected to yet another socket, and this must be
done under unix_state_lock().

Note that we use WRITE_ONCE() for sk->sk_state as there are many lockless
readers.  These data-races will be fixed in the following patches.

Fixes: 83301b5367a9 ("af_unix: Set TCP_ESTABLISHED for datagram sockets too")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 25b49efc0926..b162164b7a42 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -570,7 +570,6 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 			sk_error_report(other);
 		}
 	}
-	other->sk_state = TCP_CLOSE;
 }
 
 static void unix_sock_destructor(struct sock *sk)
@@ -1424,8 +1423,15 @@ restart:
 
 		unix_state_double_unlock(sk, other);
 
-		if (other != old_peer)
+		if (other != old_peer) {
 			unix_dgram_disconnected(sk, old_peer);
+
+			unix_state_lock(old_peer);
+			if (!unix_peer(old_peer))
+				WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
+			unix_state_unlock(old_peer);
+		}
+
 		sock_put(old_peer);
 	} else {
 		unix_peer(sk) = other;
-- 
cgit v1.2.3


From 942238f9735a4a4ebf8274b218d9a910158941d1 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:28 -0700
Subject: af_unix: Annodate data-races around sk->sk_state for writers.

sk->sk_state is changed under unix_state_lock(), but it's read locklessly
in many places.

This patch adds WRITE_ONCE() on the writer side.

We will add READ_ONCE() to the lockless readers in the following patches.

Fixes: 83301b5367a9 ("af_unix: Set TCP_ESTABLISHED for datagram sockets too")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index b162164b7a42..424d021a4d7d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -616,7 +616,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
 	u->path.dentry = NULL;
 	u->path.mnt = NULL;
 	state = sk->sk_state;
-	sk->sk_state = TCP_CLOSE;
+	WRITE_ONCE(sk->sk_state, TCP_CLOSE);
 
 	skpair = unix_peer(sk);
 	unix_peer(sk) = NULL;
@@ -738,7 +738,8 @@ static int unix_listen(struct socket *sock, int backlog)
 	if (backlog > sk->sk_max_ack_backlog)
 		wake_up_interruptible_all(&u->peer_wait);
 	sk->sk_max_ack_backlog	= backlog;
-	sk->sk_state		= TCP_LISTEN;
+	WRITE_ONCE(sk->sk_state, TCP_LISTEN);
+
 	/* set credentials so connect can copy them */
 	init_peercred(sk);
 	err = 0;
@@ -1401,7 +1402,8 @@ restart:
 		if (err)
 			goto out_unlock;
 
-		sk->sk_state = other->sk_state = TCP_ESTABLISHED;
+		WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
+		WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
 	} else {
 		/*
 		 *	1003.1g breaking connected state with AF_UNSPEC
@@ -1418,7 +1420,7 @@ restart:
 
 		unix_peer(sk) = other;
 		if (!other)
-			sk->sk_state = TCP_CLOSE;
+			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
 
 		unix_state_double_unlock(sk, other);
@@ -1639,7 +1641,7 @@ restart:
 	copy_peercred(sk, other);
 
 	sock->state	= SS_CONNECTED;
-	sk->sk_state	= TCP_ESTABLISHED;
+	WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
 	sock_hold(newsk);
 
 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
@@ -2050,7 +2052,7 @@ restart_locked:
 			unix_peer(sk) = NULL;
 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
 
-			sk->sk_state = TCP_CLOSE;
+			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
 			unix_state_unlock(sk);
 
 			unix_dgram_disconnected(sk, other);
-- 
cgit v1.2.3


From 3a0f38eb285c8c2eead4b3230c7ac2983707599d Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:29 -0700
Subject: af_unix: Annotate data-race of sk->sk_state in unix_inq_len().

ioctl(SIOCINQ) calls unix_inq_len() that checks sk->sk_state first
and returns -EINVAL if it's TCP_LISTEN.

Then, for SOCK_STREAM sockets, unix_inq_len() returns the number of
bytes in recvq.

However, unix_inq_len() does not hold unix_state_lock(), and the
concurrent listen() might change the state after checking sk->sk_state.

If the race occurs, 0 is returned for the listener, instead of -EINVAL,
because the length of skb with embryo is 0.

We could hold unix_state_lock() in unix_inq_len(), but it's overkill
given the result is true for pre-listen() TCP_CLOSE state.

So, let's use READ_ONCE() for sk->sk_state in unix_inq_len().

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 424d021a4d7d..b37b53767b29 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -3017,7 +3017,7 @@ long unix_inq_len(struct sock *sk)
 	struct sk_buff *skb;
 	long amount = 0;
 
-	if (sk->sk_state == TCP_LISTEN)
+	if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
 		return -EINVAL;
 
 	spin_lock(&sk->sk_receive_queue.lock);
-- 
cgit v1.2.3


From eb0718fb3e97ad0d6f4529b810103451c90adf94 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:30 -0700
Subject: af_unix: Annotate data-races around sk->sk_state in
 unix_write_space() and poll().

unix_poll() and unix_dgram_poll() read sk->sk_state locklessly and
calls unix_writable() which also reads sk->sk_state without holding
unix_state_lock().

Let's use READ_ONCE() in unix_poll() and unix_dgram_poll() and pass
it to unix_writable().

While at it, we remove TCP_SYN_SENT check in unix_dgram_poll() as
that state does not exist for AF_UNIX socket since the code was added.

Fixes: 1586a5877db9 ("af_unix: do not report POLLOUT on listeners")
Fixes: 3c73419c09a5 ("af_unix: fix 'poll for write'/ connected DGRAM sockets")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index b37b53767b29..3bacf47cb94d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -530,9 +530,9 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 	return 0;
 }
 
-static int unix_writable(const struct sock *sk)
+static int unix_writable(const struct sock *sk, unsigned char state)
 {
-	return sk->sk_state != TCP_LISTEN &&
+	return state != TCP_LISTEN &&
 	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 }
 
@@ -541,7 +541,7 @@ static void unix_write_space(struct sock *sk)
 	struct socket_wq *wq;
 
 	rcu_read_lock();
-	if (unix_writable(sk)) {
+	if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
 		wq = rcu_dereference(sk->sk_wq);
 		if (skwq_has_sleeper(wq))
 			wake_up_interruptible_sync_poll(&wq->wait,
@@ -3129,12 +3129,14 @@ static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
 {
 	struct sock *sk = sock->sk;
+	unsigned char state;
 	__poll_t mask;
 	u8 shutdown;
 
 	sock_poll_wait(file, sock, wait);
 	mask = 0;
 	shutdown = READ_ONCE(sk->sk_shutdown);
+	state = READ_ONCE(sk->sk_state);
 
 	/* exceptional events? */
 	if (READ_ONCE(sk->sk_err))
@@ -3156,14 +3158,14 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
 
 	/* Connection-based need to check for termination and startup */
 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
-	    sk->sk_state == TCP_CLOSE)
+	    state == TCP_CLOSE)
 		mask |= EPOLLHUP;
 
 	/*
 	 * we set writable also when the other side has shut down the
 	 * connection. This prevents stuck sockets.
 	 */
-	if (unix_writable(sk))
+	if (unix_writable(sk, state))
 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
 
 	return mask;
@@ -3174,12 +3176,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
 {
 	struct sock *sk = sock->sk, *other;
 	unsigned int writable;
+	unsigned char state;
 	__poll_t mask;
 	u8 shutdown;
 
 	sock_poll_wait(file, sock, wait);
 	mask = 0;
 	shutdown = READ_ONCE(sk->sk_shutdown);
+	state = READ_ONCE(sk->sk_state);
 
 	/* exceptional events? */
 	if (READ_ONCE(sk->sk_err) ||
@@ -3199,19 +3203,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
 		mask |= EPOLLIN | EPOLLRDNORM;
 
 	/* Connection-based need to check for termination and startup */
-	if (sk->sk_type == SOCK_SEQPACKET) {
-		if (sk->sk_state == TCP_CLOSE)
-			mask |= EPOLLHUP;
-		/* connection hasn't started yet? */
-		if (sk->sk_state == TCP_SYN_SENT)
-			return mask;
-	}
+	if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
+		mask |= EPOLLHUP;
 
 	/* No write status requested, avoid expensive OUT tests. */
 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
 		return mask;
 
-	writable = unix_writable(sk);
+	writable = unix_writable(sk, state);
 	if (writable) {
 		unix_state_lock(sk);
 
-- 
cgit v1.2.3


From a9bf9c7dc6a5899c01cb8f6e773a66315a5cd4b7 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:31 -0700
Subject: af_unix: Annotate data-race of sk->sk_state in unix_stream_connect().

As small optimisation, unix_stream_connect() prefetches the client's
sk->sk_state without unix_state_lock() and checks if it's TCP_CLOSE.

Later, sk->sk_state is checked again under unix_state_lock().

Let's use READ_ONCE() for the first check and TCP_CLOSE directly for
the second check.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3bacf47cb94d..84552826530d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1481,7 +1481,6 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	struct sk_buff *skb = NULL;
 	long timeo;
 	int err;
-	int st;
 
 	err = unix_validate_addr(sunaddr, addr_len);
 	if (err)
@@ -1571,9 +1570,7 @@ restart:
 
 	   Well, and we have to recheck the state after socket locked.
 	 */
-	st = sk->sk_state;
-
-	switch (st) {
+	switch (READ_ONCE(sk->sk_state)) {
 	case TCP_CLOSE:
 		/* This is ok... continue with connect */
 		break;
@@ -1588,7 +1585,7 @@ restart:
 
 	unix_state_lock_nested(sk, U_LOCK_SECOND);
 
-	if (sk->sk_state != st) {
+	if (sk->sk_state != TCP_CLOSE) {
 		unix_state_unlock(sk);
 		unix_state_unlock(other);
 		sock_put(other);
-- 
cgit v1.2.3


From 1b536948e805aab61a48c5aa5db10c9afee880bd Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:32 -0700
Subject: af_unix: Annotate data-race of sk->sk_state in unix_accept().

Once sk->sk_state is changed to TCP_LISTEN, it never changes.

unix_accept() takes the advantage and reads sk->sk_state without
holding unix_state_lock().

Let's use READ_ONCE() there.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 84552826530d..4763c26ae480 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1710,7 +1710,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock,
 		goto out;
 
 	arg->err = -EINVAL;
-	if (sk->sk_state != TCP_LISTEN)
+	if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
 		goto out;
 
 	/* If socket state is TCP_LISTEN it cannot change (for now...),
-- 
cgit v1.2.3


From 8a34d4e8d9742a24f74998f45a6a98edd923319b Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:33 -0700
Subject: af_unix: Annotate data-races around sk->sk_state in sendmsg() and
 recvmsg().

The following functions read sk->sk_state locklessly and proceed only if
the state is TCP_ESTABLISHED.

  * unix_stream_sendmsg
  * unix_stream_read_generic
  * unix_seqpacket_sendmsg
  * unix_seqpacket_recvmsg

Let's use READ_ONCE() there.

Fixes: a05d2ad1c1f3 ("af_unix: Only allow recv on connected seqpacket sockets.")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4763c26ae480..4ef9c21783a5 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2226,7 +2226,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 	}
 
 	if (msg->msg_namelen) {
-		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
+		err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
 		goto out_err;
 	} else {
 		err = -ENOTCONN;
@@ -2340,7 +2340,7 @@ static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
 	if (err)
 		return err;
 
-	if (sk->sk_state != TCP_ESTABLISHED)
+	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
 		return -ENOTCONN;
 
 	if (msg->msg_namelen)
@@ -2354,7 +2354,7 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
 {
 	struct sock *sk = sock->sk;
 
-	if (sk->sk_state != TCP_ESTABLISHED)
+	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
 		return -ENOTCONN;
 
 	return unix_dgram_recvmsg(sock, msg, size, flags);
@@ -2683,7 +2683,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
 	size_t size = state->size;
 	unsigned int last_len;
 
-	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
+	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
 		err = -EINVAL;
 		goto out;
 	}
-- 
cgit v1.2.3


From af4c733b6b1aded4dc808fafece7dfe6e9d2ebb3 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:34 -0700
Subject: af_unix: Annotate data-race of sk->sk_state in
 unix_stream_read_skb().

unix_stream_read_skb() is called from sk->sk_data_ready() context
where unix_state_lock() is not held.

Let's use READ_ONCE() there.

Fixes: 77462de14a43 ("af_unix: Add read_sock for stream socket types")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4ef9c21783a5..e7b74207aa3b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2659,7 +2659,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
 
 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
 {
-	if (unlikely(sk->sk_state != TCP_ESTABLISHED))
+	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
 		return -ENOTCONN;
 
 	return unix_read_skb(sk, recv_actor);
-- 
cgit v1.2.3


From 0aa3be7b3e1f8f997312cc4705f8165e02806f8f Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:35 -0700
Subject: af_unix: Annotate data-races around sk->sk_state in UNIX_DIAG.

While dumping AF_UNIX sockets via UNIX_DIAG, sk->sk_state is read
locklessly.

Let's use READ_ONCE() there.

Note that the result could be inconsistent if the socket is dumped
during the state change.  This is common for other SOCK_DIAG and
similar interfaces.

Fixes: c9da99e6475f ("unix_diag: Fixup RQLEN extension report")
Fixes: 2aac7a2cb0d9 ("unix_diag: Pending connections IDs NLA")
Fixes: 45a96b9be6ec ("unix_diag: Dumping all sockets core")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/diag.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/unix/diag.c b/net/unix/diag.c
index ae39538c5042..116cf508aea4 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -65,7 +65,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
 	u32 *buf;
 	int i;
 
-	if (sk->sk_state == TCP_LISTEN) {
+	if (READ_ONCE(sk->sk_state) == TCP_LISTEN) {
 		spin_lock(&sk->sk_receive_queue.lock);
 
 		attr = nla_reserve(nlskb, UNIX_DIAG_ICONS,
@@ -103,7 +103,7 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
 {
 	struct unix_diag_rqlen rql;
 
-	if (sk->sk_state == TCP_LISTEN) {
+	if (READ_ONCE(sk->sk_state) == TCP_LISTEN) {
 		rql.udiag_rqueue = sk->sk_receive_queue.qlen;
 		rql.udiag_wqueue = sk->sk_max_ack_backlog;
 	} else {
@@ -136,7 +136,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 	rep = nlmsg_data(nlh);
 	rep->udiag_family = AF_UNIX;
 	rep->udiag_type = sk->sk_type;
-	rep->udiag_state = sk->sk_state;
+	rep->udiag_state = READ_ONCE(sk->sk_state);
 	rep->pad = 0;
 	rep->udiag_ino = sk_ino;
 	sock_diag_save_cookie(sk, rep->udiag_cookie);
@@ -215,7 +215,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		sk_for_each(sk, &net->unx.table.buckets[slot]) {
 			if (num < s_num)
 				goto next;
-			if (!(req->udiag_states & (1 << sk->sk_state)))
+			if (!(req->udiag_states & (1 << READ_ONCE(sk->sk_state))))
 				goto next;
 			if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk),
 					 NETLINK_CB(cb->skb).portid,
-- 
cgit v1.2.3


From b0632e53e0da8054e36bc973f0eec69d30f1b7c6 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:36 -0700
Subject: af_unix: Annotate data-races around sk->sk_sndbuf.

sk_setsockopt() changes sk->sk_sndbuf under lock_sock(), but it's
not used in af_unix.c.

Let's use READ_ONCE() to read sk->sk_sndbuf in unix_writable(),
unix_dgram_sendmsg(), and unix_stream_sendmsg().

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e7b74207aa3b..de2a5e334a88 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -533,7 +533,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 static int unix_writable(const struct sock *sk, unsigned char state)
 {
 	return state != TCP_LISTEN &&
-	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
+		(refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
 }
 
 static void unix_write_space(struct sock *sk)
@@ -1967,7 +1967,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	}
 
 	err = -EMSGSIZE;
-	if (len > sk->sk_sndbuf - 32)
+	if (len > READ_ONCE(sk->sk_sndbuf) - 32)
 		goto out;
 
 	if (len > SKB_MAX_ALLOC) {
@@ -2247,7 +2247,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 						   &err, 0);
 		} else {
 			/* Keep two messages in the pipe so it schedules better */
-			size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
+			size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
 
 			/* allow fallback to order-0 allocations */
 			size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
-- 
cgit v1.2.3


From bd9f2d05731f6a112d0c7391a0d537bfc588dbe6 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:37 -0700
Subject: af_unix: Annotate data-race of net->unx.sysctl_max_dgram_qlen.

net->unx.sysctl_max_dgram_qlen is exposed as a sysctl knob and can be
changed concurrently.

Let's use READ_ONCE() in unix_create1().

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index de2a5e334a88..623ee39657e2 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -976,7 +976,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
 	sk->sk_hash		= unix_unbound_hash(sk);
 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
 	sk->sk_write_space	= unix_write_space;
-	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
+	sk->sk_max_ack_backlog	= READ_ONCE(net->unx.sysctl_max_dgram_qlen);
 	sk->sk_destruct		= unix_sock_destructor;
 	u = unix_sk(sk);
 	u->listener = NULL;
-- 
cgit v1.2.3


From 45d872f0e65593176d880ec148f41ad7c02e40a7 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:38 -0700
Subject: af_unix: Use unix_recvq_full_lockless() in unix_stream_connect().

Once sk->sk_state is changed to TCP_LISTEN, it never changes.

unix_accept() takes advantage of this characteristics; it does not
hold the listener's unix_state_lock() and only acquires recvq lock
to pop one skb.

It means unix_state_lock() does not prevent the queue length from
changing in unix_stream_connect().

Thus, we need to use unix_recvq_full_lockless() to avoid data-race.

Now we remove unix_recvq_full() as no one uses it.

Note that we can remove READ_ONCE() for sk->sk_max_ack_backlog in
unix_recvq_full_lockless() because of the following reasons:

  (1) For SOCK_DGRAM, it is a written-once field in unix_create1()

  (2) For SOCK_STREAM and SOCK_SEQPACKET, it is changed under the
      listener's unix_state_lock() in unix_listen(), and we hold
      the lock in unix_stream_connect()

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 623ee39657e2..eb3ba3448ed3 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -221,15 +221,9 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk)
 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 }
 
-static inline int unix_recvq_full(const struct sock *sk)
-{
-	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
-}
-
 static inline int unix_recvq_full_lockless(const struct sock *sk)
 {
-	return skb_queue_len_lockless(&sk->sk_receive_queue) >
-		READ_ONCE(sk->sk_max_ack_backlog);
+	return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 }
 
 struct sock *unix_peer_get(struct sock *s)
@@ -1545,7 +1539,7 @@ restart:
 	if (other->sk_shutdown & RCV_SHUTDOWN)
 		goto out_unlock;
 
-	if (unix_recvq_full(other)) {
+	if (unix_recvq_full_lockless(other)) {
 		err = -EAGAIN;
 		if (!timeo)
 			goto out_unlock;
-- 
cgit v1.2.3


From 83690b82d228b3570565ebd0b41873933238b97f Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:39 -0700
Subject: af_unix: Use skb_queue_empty_lockless() in unix_release_sock().

If the socket type is SOCK_STREAM or SOCK_SEQPACKET, unix_release_sock()
checks the length of the peer socket's recvq under unix_state_lock().

However, unix_stream_read_generic() calls skb_unlink() after releasing
the lock.  Also, for SOCK_SEQPACKET, __skb_try_recv_datagram() unlinks
skb without unix_state_lock().

Thues, unix_state_lock() does not protect qlen.

Let's use skb_queue_empty_lockless() in unix_release_sock().

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index eb3ba3448ed3..80846279de9f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -631,7 +631,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
 			unix_state_lock(skpair);
 			/* No more writes */
 			WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
-			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
+			if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion)
 				WRITE_ONCE(skpair->sk_err, ECONNRESET);
 			unix_state_unlock(skpair);
 			skpair->sk_state_change(skpair);
-- 
cgit v1.2.3


From 5d915e584d8408211d4567c22685aae8820bfc55 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:40 -0700
Subject: af_unix: Use skb_queue_len_lockless() in sk_diag_show_rqlen().

We can dump the socket queue length via UNIX_DIAG by specifying
UDIAG_SHOW_RQLEN.

If sk->sk_state is TCP_LISTEN, we return the recv queue length,
but here we do not hold recvq lock.

Let's use skb_queue_len_lockless() in sk_diag_show_rqlen().

Fixes: c9da99e6475f ("unix_diag: Fixup RQLEN extension report")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/diag.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/unix/diag.c b/net/unix/diag.c
index 116cf508aea4..321336f91a0a 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -104,7 +104,7 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
 	struct unix_diag_rqlen rql;
 
 	if (READ_ONCE(sk->sk_state) == TCP_LISTEN) {
-		rql.udiag_rqueue = sk->sk_receive_queue.qlen;
+		rql.udiag_rqueue = skb_queue_len_lockless(&sk->sk_receive_queue);
 		rql.udiag_wqueue = sk->sk_max_ack_backlog;
 	} else {
 		rql.udiag_rqueue = (u32) unix_inq_len(sk);
-- 
cgit v1.2.3


From efaf24e30ec39ebbea9112227485805a48b0ceb1 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Tue, 4 Jun 2024 09:52:41 -0700
Subject: af_unix: Annotate data-race of sk->sk_shutdown in sk_diag_fill().

While dumping sockets via UNIX_DIAG, we do not hold unix_state_lock().

Let's use READ_ONCE() to read sk->sk_shutdown.

Fixes: e4e541a84863 ("sock-diag: Report shutdown for inet and unix sockets (v2)")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/diag.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/unix/diag.c b/net/unix/diag.c
index 321336f91a0a..937edf4afed4 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -165,7 +165,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 	    sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO))
 		goto out_nlmsg_trim;
 
-	if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown))
+	if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, READ_ONCE(sk->sk_shutdown)))
 		goto out_nlmsg_trim;
 
 	if ((req->udiag_show & UDIAG_SHOW_UID) &&
-- 
cgit v1.2.3


From c181689bc479d3b2300f91fc4d53e089d7631898 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 2 Jun 2024 03:20:38 +0900
Subject: kconfig: remove unneeded code for user-supplied values being out of
 range

This is a leftover from commit ce1fc9345a59 ("kconfig: do not clear
SYMBOL_DEF_USER when the value is out of range").

This code is now redundant because if a user-supplied value is out
of range, the value adjusted by sym_validate_range() differs, and
conf_unsaved has already been incremented a few lines above.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/confdata.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 387503daf0f7..85b53069ba7a 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -533,19 +533,6 @@ int conf_read(const char *name)
 			 */
 			if (sym->visible == no && !conf_unsaved)
 				sym->flags &= ~SYMBOL_DEF_USER;
-			switch (sym->type) {
-			case S_STRING:
-			case S_INT:
-			case S_HEX:
-				/* Reset a string value if it's out of range */
-				if (sym_string_within_range(sym, sym->def[S_DEF_USER].val))
-					break;
-				sym->flags &= ~SYMBOL_VALID;
-				conf_unsaved++;
-				break;
-			default:
-				break;
-			}
 		}
 	}
 
-- 
cgit v1.2.3


From 46edf4372e336ef3a61c3126e49518099d2e2e6d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 2 Jun 2024 03:20:40 +0900
Subject: kconfig: gconf: give a proper initial state to the Save button

Currently, the initial state of the "Save" button is always active.

If none of the CONFIG options are changed while loading the .config
file, the "Save" button should be greyed out.

This can be fixed by calling conf_read() after widget initialization.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/gconf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c
index cc400ffe6615..e04dbafd3add 100644
--- a/scripts/kconfig/gconf.c
+++ b/scripts/kconfig/gconf.c
@@ -1422,7 +1422,6 @@ int main(int ac, char *av[])
 
 	conf_parse(name);
 	fixup_rootmenu(&rootmenu);
-	conf_read(NULL);
 
 	/* Load the interface and connect signals */
 	init_main_window(glade_file);
@@ -1430,6 +1429,8 @@ int main(int ac, char *av[])
 	init_left_tree();
 	init_right_tree();
 
+	conf_read(NULL);
+
 	switch (view_mode) {
 	case SINGLE_VIEW:
 		display_tree_part();
-- 
cgit v1.2.3


From bf83266a1eef8251e2f126dba635039de069104a Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 4 Jun 2024 01:19:02 +0900
Subject: kconfig: doc: fix a typo in the note about 'imply'

This sentence does not make sense due to a typo. Fix it.

Fixes: def2fbffe62c ("kconfig: allow symbols implied by y to become m")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
---
 Documentation/kbuild/kconfig-language.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst
index 555c2f839969..86be5b857cc4 100644
--- a/Documentation/kbuild/kconfig-language.rst
+++ b/Documentation/kbuild/kconfig-language.rst
@@ -184,7 +184,7 @@ applicable everywhere (see syntax).
   ability to hook into a secondary subsystem while allowing the user to
   configure that subsystem out without also having to unset these drivers.
 
-  Note: If the combination of FOO=y and BAR=m causes a link error,
+  Note: If the combination of FOO=y and BAZ=m causes a link error,
   you can guard the function call with IS_REACHABLE()::
 
 	foo_init()
-- 
cgit v1.2.3


From 45c7f555bf5e716d9c6ffb737e97d4cc9b4c21ef Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 4 Jun 2024 01:19:03 +0900
Subject: kconfig: doc: document behavior of 'select' and 'imply' followed by
 'if'

Documentation/kbuild/kconfig-language.rst explains the behavior of
'select' as follows:

  reverse dependencies can be used to force a lower limit of
  another symbol. The value of the current menu symbol is used as the
  minimal value <symbol> can be set to.

This is not true when the 'select' property is followed by 'if'.

[Test Code]

    config MODULES
            def_bool y
            modules

    config A
            def_tristate y
            select C if B

    config B
            def_tristate m

    config C
            tristate

[Result]

    CONFIG_MODULES=y
    CONFIG_A=y
    CONFIG_B=m
    CONFIG_C=m

If "the value of A is used as the minimal value C can be set to",
C must be 'y'.

The actual behavior is "C is selected by (A && B)". The lower limit of
C is downgraded due to B being 'm'.

This behavior is kind of weird, and this has arisen several times in
the mailing list.

I do not know whether it is a bug or intended behavior. Anyway, it is
not feasible to change it now because many Kconfig files are written
based on this behavior. The same applies to 'imply'.

Document this (but reserve the possibility for a future change).

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
---
 Documentation/kbuild/kconfig-language.rst | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst
index 86be5b857cc4..1fb3f5e6193c 100644
--- a/Documentation/kbuild/kconfig-language.rst
+++ b/Documentation/kbuild/kconfig-language.rst
@@ -150,6 +150,12 @@ applicable everywhere (see syntax).
 	That will limit the usefulness but on the other hand avoid
 	the illegal configurations all over.
 
+	If "select" <symbol> is followed by "if" <expr>, <symbol> will be
+	selected by the logical AND of the value of the current menu symbol
+	and <expr>. This means, the lower limit can be downgraded due to the
+	presence of "if" <expr>. This behavior may seem weird, but we rely on
+	it. (The future of this behavior is undecided.)
+
 - weak reverse dependencies: "imply" <symbol> ["if" <expr>]
 
   This is similar to "select" as it enforces a lower limit on another
@@ -202,6 +208,10 @@ applicable everywhere (see syntax).
 	imply BAR
 	imply BAZ
 
+  Note: If "imply" <symbol> is followed by "if" <expr>, the default of <symbol>
+  will be the logical AND of the value of the current menu symbol and <expr>.
+  (The future of this behavior is undecided.)
+
 - limiting menu display: "visible if" <expr>
 
   This attribute is only applicable to menu blocks, if the condition is
-- 
cgit v1.2.3


From b01e1c030770ff3b4fe37fc7cc6bca03f594133f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 4 Jun 2024 19:35:49 +0000
Subject: ipv6: fix possible race in __fib6_drop_pcpu_from()

syzbot found a race in __fib6_drop_pcpu_from() [1]

If compiler reads more than once (*ppcpu_rt),
second read could read NULL, if another cpu clears
the value in rt6_get_pcpu_route().

Add a READ_ONCE() to prevent this race.

Also add rcu_read_lock()/rcu_read_unlock() because
we rely on RCU protection while dereferencing pcpu_rt.

[1]

Oops: general protection fault, probably for non-canonical address 0xdffffc0000000012: 0000 [#1] PREEMPT SMP KASAN PTI
KASAN: null-ptr-deref in range [0x0000000000000090-0x0000000000000097]
CPU: 0 PID: 7543 Comm: kworker/u8:17 Not tainted 6.10.0-rc1-syzkaller-00013-g2bfcfd584ff5 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 04/02/2024
Workqueue: netns cleanup_net
 RIP: 0010:__fib6_drop_pcpu_from.part.0+0x10a/0x370 net/ipv6/ip6_fib.c:984
Code: f8 48 c1 e8 03 80 3c 28 00 0f 85 16 02 00 00 4d 8b 3f 4d 85 ff 74 31 e8 74 a7 fa f7 49 8d bf 90 00 00 00 48 89 f8 48 c1 e8 03 <80> 3c 28 00 0f 85 1e 02 00 00 49 8b 87 90 00 00 00 48 8b 0c 24 48
RSP: 0018:ffffc900040df070 EFLAGS: 00010206
RAX: 0000000000000012 RBX: 0000000000000001 RCX: ffffffff89932e16
RDX: ffff888049dd1e00 RSI: ffffffff89932d7c RDI: 0000000000000091
RBP: dffffc0000000000 R08: 0000000000000005 R09: 0000000000000007
R10: 0000000000000001 R11: 0000000000000006 R12: ffff88807fa080b8
R13: fffffbfff1a9a07d R14: ffffed100ff41022 R15: 0000000000000001
FS:  0000000000000000(0000) GS:ffff8880b9200000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000001b32c26000 CR3: 000000005d56e000 CR4: 00000000003526f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
  __fib6_drop_pcpu_from net/ipv6/ip6_fib.c:966 [inline]
  fib6_drop_pcpu_from net/ipv6/ip6_fib.c:1027 [inline]
  fib6_purge_rt+0x7f2/0x9f0 net/ipv6/ip6_fib.c:1038
  fib6_del_route net/ipv6/ip6_fib.c:1998 [inline]
  fib6_del+0xa70/0x17b0 net/ipv6/ip6_fib.c:2043
  fib6_clean_node+0x426/0x5b0 net/ipv6/ip6_fib.c:2205
  fib6_walk_continue+0x44f/0x8d0 net/ipv6/ip6_fib.c:2127
  fib6_walk+0x182/0x370 net/ipv6/ip6_fib.c:2175
  fib6_clean_tree+0xd7/0x120 net/ipv6/ip6_fib.c:2255
  __fib6_clean_all+0x100/0x2d0 net/ipv6/ip6_fib.c:2271
  rt6_sync_down_dev net/ipv6/route.c:4906 [inline]
  rt6_disable_ip+0x7ed/0xa00 net/ipv6/route.c:4911
  addrconf_ifdown.isra.0+0x117/0x1b40 net/ipv6/addrconf.c:3855
  addrconf_notify+0x223/0x19e0 net/ipv6/addrconf.c:3778
  notifier_call_chain+0xb9/0x410 kernel/notifier.c:93
  call_netdevice_notifiers_info+0xbe/0x140 net/core/dev.c:1992
  call_netdevice_notifiers_extack net/core/dev.c:2030 [inline]
  call_netdevice_notifiers net/core/dev.c:2044 [inline]
  dev_close_many+0x333/0x6a0 net/core/dev.c:1585
  unregister_netdevice_many_notify+0x46d/0x19f0 net/core/dev.c:11193
  unregister_netdevice_many net/core/dev.c:11276 [inline]
  default_device_exit_batch+0x85b/0xae0 net/core/dev.c:11759
  ops_exit_list+0x128/0x180 net/core/net_namespace.c:178
  cleanup_net+0x5b7/0xbf0 net/core/net_namespace.c:640
  process_one_work+0x9fb/0x1b60 kernel/workqueue.c:3231
  process_scheduled_works kernel/workqueue.c:3312 [inline]
  worker_thread+0x6c8/0xf70 kernel/workqueue.c:3393
  kthread+0x2c1/0x3a0 kernel/kthread.c:389
  ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
  ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244

Fixes: d52d3997f843 ("ipv6: Create percpu rt6_info")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/r/20240604193549.981839-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/ipv6/ip6_fib.c | 6 +++++-
 net/ipv6/route.c   | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 31d77885bcae..6e57c03e3255 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -966,6 +966,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
 	if (!fib6_nh->rt6i_pcpu)
 		return;
 
+	rcu_read_lock();
 	/* release the reference to this fib entry from
 	 * all of its cached pcpu routes
 	 */
@@ -974,7 +975,9 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
 		struct rt6_info *pcpu_rt;
 
 		ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
-		pcpu_rt = *ppcpu_rt;
+
+		/* Paired with xchg() in rt6_get_pcpu_route() */
+		pcpu_rt = READ_ONCE(*ppcpu_rt);
 
 		/* only dropping the 'from' reference if the cached route
 		 * is using 'match'. The cached pcpu_rt->from only changes
@@ -988,6 +991,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
 			fib6_info_release(from);
 		}
 	}
+	rcu_read_unlock();
 }
 
 struct fib6_nh_pcpu_arg {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a504b88ec06b..f083d9faba6b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1409,6 +1409,7 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
 		struct rt6_info *prev, **p;
 
 		p = this_cpu_ptr(res->nh->rt6i_pcpu);
+		/* Paired with READ_ONCE() in __fib6_drop_pcpu_from() */
 		prev = xchg(p, NULL);
 		if (prev) {
 			dst_dev_put(&prev->dst);
-- 
cgit v1.2.3


From 77a92660d8fe8d29503fae768d9f5eb529c88b36 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 4 Jun 2024 01:19:04 +0900
Subject: kconfig: remove wrong expr_trans_bool()

expr_trans_bool() performs an incorrect transformation.

[Test Code]

    config MODULES
            def_bool y
            modules

    config A
            def_bool y
            select C if B != n

    config B
            def_tristate m

    config C
            tristate

[Result]

    CONFIG_MODULES=y
    CONFIG_A=y
    CONFIG_B=m
    CONFIG_C=m

This output is incorrect because CONFIG_C=y is expected.

Documentation/kbuild/kconfig-language.rst clearly explains the function
of the '!=' operator:

    If the values of both symbols are equal, it returns 'n',
    otherwise 'y'.

Therefore, the statement:

    select C if B != n

should be equivalent to:

    select C if y

Or, more simply:

    select C

Hence, the symbol C should be selected by the value of A, which is 'y'.

However, expr_trans_bool() wrongly transforms it to:

    select C if B

Therefore, the symbol C is selected by (A && B), which is 'm'.

The comment block of expr_trans_bool() correctly explains its intention:

  * bool FOO!=n => FOO
    ^^^^

If FOO is bool, FOO!=n can be simplified into FOO. This is correct.

However, the actual code performs this transformation when FOO is
tristate:

    if (e->left.sym->type == S_TRISTATE) {
                             ^^^^^^^^^^

While it can be fixed to S_BOOLEAN, there is no point in doing so
because expr_tranform() already transforms FOO!=n to FOO when FOO is
bool. (see the "case E_UNEQUAL" part)

expr_trans_bool() is wrong and unnecessary.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
---
 scripts/kconfig/expr.c | 29 -----------------------------
 scripts/kconfig/expr.h |  1 -
 scripts/kconfig/menu.c |  2 --
 3 files changed, 32 deletions(-)

diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c
index 4d95fce5f9a7..fcc190b67b6f 100644
--- a/scripts/kconfig/expr.c
+++ b/scripts/kconfig/expr.c
@@ -396,35 +396,6 @@ static struct expr *expr_eliminate_yn(struct expr *e)
 	return e;
 }
 
-/*
- * bool FOO!=n => FOO
- */
-struct expr *expr_trans_bool(struct expr *e)
-{
-	if (!e)
-		return NULL;
-	switch (e->type) {
-	case E_AND:
-	case E_OR:
-	case E_NOT:
-		e->left.expr = expr_trans_bool(e->left.expr);
-		e->right.expr = expr_trans_bool(e->right.expr);
-		break;
-	case E_UNEQUAL:
-		// FOO!=n -> FOO
-		if (e->left.sym->type == S_TRISTATE) {
-			if (e->right.sym == &symbol_no) {
-				e->type = E_SYMBOL;
-				e->right.sym = NULL;
-			}
-		}
-		break;
-	default:
-		;
-	}
-	return e;
-}
-
 /*
  * e1 || e2 -> ?
  */
diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h
index fa50fc45622e..7c0c242318bc 100644
--- a/scripts/kconfig/expr.h
+++ b/scripts/kconfig/expr.h
@@ -284,7 +284,6 @@ void expr_free(struct expr *e);
 void expr_eliminate_eq(struct expr **ep1, struct expr **ep2);
 int expr_eq(struct expr *e1, struct expr *e2);
 tristate expr_calc_value(struct expr *e);
-struct expr *expr_trans_bool(struct expr *e);
 struct expr *expr_eliminate_dups(struct expr *e);
 struct expr *expr_transform(struct expr *e);
 int expr_contains_symbol(struct expr *dep, struct symbol *sym);
diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index 53151c5a6028..eef9b63cdf11 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -398,8 +398,6 @@ static void _menu_finalize(struct menu *parent, bool inside_choice)
 				dep = expr_transform(dep);
 				dep = expr_alloc_and(expr_copy(basedep), dep);
 				dep = expr_eliminate_dups(dep);
-				if (menu->sym && menu->sym->type != S_TRISTATE)
-					dep = expr_trans_bool(dep);
 				prop->visible.expr = dep;
 
 				/*
-- 
cgit v1.2.3


From 0dcc53abf58d572d34c5313de85f607cd33fc691 Mon Sep 17 00:00:00 2001
From: Su Hui <suhui@nfschina.com>
Date: Wed, 5 Jun 2024 11:47:43 +0800
Subject: net: ethtool: fix the error condition in
 ethtool_get_phy_stats_ethtool()

Clang static checker (scan-build) warning:
net/ethtool/ioctl.c:line 2233, column 2
Called function pointer is null (null dereference).

Return '-EOPNOTSUPP' when 'ops->get_ethtool_phy_stats' is NULL to fix
this typo error.

Fixes: 201ed315f967 ("net/ethtool/ioctl: split ethtool_get_phy_stats into multiple helpers")
Signed-off-by: Su Hui <suhui@nfschina.com>
Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
Reviewed-by: Hariprasad Kelam <hkelam@marvell.com>
Link: https://lore.kernel.org/r/20240605034742.921751-1-suhui@nfschina.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/ethtool/ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 5a55270aa86e..e645d751a5e8 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -2220,7 +2220,7 @@ static int ethtool_get_phy_stats_ethtool(struct net_device *dev,
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	int n_stats, ret;
 
-	if (!ops || !ops->get_sset_count || ops->get_ethtool_phy_stats)
+	if (!ops || !ops->get_sset_count || !ops->get_ethtool_phy_stats)
 		return -EOPNOTSUPP;
 
 	n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
-- 
cgit v1.2.3


From 09fe2bfa6b83f865126ce3964744863f69a4a030 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 6 Jun 2024 21:14:45 +1000
Subject: ata: pata_macio: Fix max_segment_size with PAGE_SIZE == 64K

The pata_macio driver advertises a max_segment_size of 0xff00, because
the hardware doesn't cope with requests >= 64K.

However the SCSI core requires max_segment_size to be at least
PAGE_SIZE, which is a problem for pata_macio when the kernel is built
with 64K pages.

In older kernels the SCSI core would just increase the segment size to
be equal to PAGE_SIZE, however since the commit tagged below it causes a
warning and the device fails to probe:

  WARNING: CPU: 0 PID: 26 at block/blk-settings.c:202 .blk_validate_limits+0x2f8/0x35c
  CPU: 0 PID: 26 Comm: kworker/u4:1 Not tainted 6.10.0-rc1 #1
  Hardware name: PowerMac7,2 PPC970 0x390202 PowerMac
  ...
  NIP .blk_validate_limits+0x2f8/0x35c
  LR  .blk_alloc_queue+0xc0/0x2f8
  Call Trace:
    .blk_alloc_queue+0xc0/0x2f8
    .blk_mq_alloc_queue+0x60/0xf8
    .scsi_alloc_sdev+0x208/0x3c0
    .scsi_probe_and_add_lun+0x314/0x52c
    .__scsi_add_device+0x170/0x1a4
    .ata_scsi_scan_host+0x2bc/0x3e4
    .async_port_probe+0x6c/0xa0
    .async_run_entry_fn+0x60/0x1bc
    .process_one_work+0x228/0x510
    .worker_thread+0x360/0x530
    .kthread+0x134/0x13c
    .start_kernel_thread+0x10/0x14
  ...
  scsi_alloc_sdev: Allocation failure during SCSI scanning, some SCSI devices might not be configured

Although the hardware can't cope with a 64K segment, the driver
already deals with that internally by splitting large requests in
pata_macio_qc_prep(). That is how the driver has managed to function
until now on 64K kernels.

So fix the driver to advertise a max_segment_size of 64K, which avoids
the warning and keeps the SCSI core happy.

Fixes: afd53a3d8528 ("scsi: core: Initialize scsi midlayer limits before allocating the queue")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Closes: https://lore.kernel.org/all/ce2bf6af-4382-4fe1-b392-cc6829f5ceb2@roeck-us.net/
Reported-by: Doru Iorgulescu <doru.iorgulescu1@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218858
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Signed-off-by: Niklas Cassel <cassel@kernel.org>
---
 drivers/ata/pata_macio.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index 817838e2f70e..3cb455a32d92 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -915,10 +915,13 @@ static const struct scsi_host_template pata_macio_sht = {
 	.sg_tablesize		= MAX_DCMDS,
 	/* We may not need that strict one */
 	.dma_boundary		= ATA_DMA_BOUNDARY,
-	/* Not sure what the real max is but we know it's less than 64K, let's
-	 * use 64K minus 256
+	/*
+	 * The SCSI core requires the segment size to cover at least a page, so
+	 * for 64K page size kernels this must be at least 64K. However the
+	 * hardware can't handle 64K, so pata_macio_qc_prep() will split large
+	 * requests.
 	 */
-	.max_segment_size	= MAX_DBDMA_SEG,
+	.max_segment_size	= SZ_64K,
 	.device_configure	= pata_macio_device_configure,
 	.sdev_groups		= ata_common_sdev_groups,
 	.can_queue		= ATA_DEF_QUEUE,
-- 
cgit v1.2.3


From 41b02ea4c0adfcc6761fbfed42c3ce6b6412d881 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Wed, 5 Jun 2024 11:21:16 +0200
Subject: selftests: net: lib: support errexit with busywait

If errexit is enabled ('set -e'), loopy_wait -- or busywait and others
using it -- will stop after the first failure.

Note that if the returned status of loopy_wait is checked, and even if
errexit is enabled, Bash will not stop at the first error.

Fixes: 25ae948b4478 ("selftests/net: add lib.sh")
Cc: stable@vger.kernel.org
Acked-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20240605-upstream-net-20240605-selftests-net-lib-fixes-v1-1-b3afadd368c9@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/lib.sh | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index edc030e81a46..a422e10d3d3a 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -67,9 +67,7 @@ loopy_wait()
 	while true
 	do
 		local out
-		out=$("$@")
-		local ret=$?
-		if ((!ret)); then
+		if out=$("$@"); then
 			echo -n "$out"
 			return 0
 		fi
-- 
cgit v1.2.3


From 79322174bcc780b99795cb89d237b26006a8b94b Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Wed, 5 Jun 2024 11:21:17 +0200
Subject: selftests: net: lib: avoid error removing empty netns name

If there is an error to create the first netns with 'setup_ns()',
'cleanup_ns()' will be called with an empty string as first parameter.

The consequences is that 'cleanup_ns()' will try to delete an invalid
netns, and wait 20 seconds if the netns list is empty.

Instead of just checking if the name is not empty, convert the string
separated by spaces to an array. Manipulating the array is cleaner, and
calling 'cleanup_ns()' with an empty array will be a no-op.

Fixes: 25ae948b4478 ("selftests/net: add lib.sh")
Cc: stable@vger.kernel.org
Acked-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20240605-upstream-net-20240605-selftests-net-lib-fixes-v1-2-b3afadd368c9@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/lib.sh | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index a422e10d3d3a..e2f51102d7e1 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -15,7 +15,7 @@ ksft_xfail=2
 ksft_skip=4
 
 # namespace list created by setup_ns
-NS_LIST=""
+NS_LIST=()
 
 ##############################################################################
 # Helpers
@@ -137,6 +137,7 @@ cleanup_ns()
 	fi
 
 	for ns in "$@"; do
+		[ -z "${ns}" ] && continue
 		ip netns delete "${ns}" &> /dev/null
 		if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then
 			echo "Warn: Failed to remove namespace $ns"
@@ -150,7 +151,7 @@ cleanup_ns()
 
 cleanup_all_ns()
 {
-	cleanup_ns $NS_LIST
+	cleanup_ns "${NS_LIST[@]}"
 }
 
 # setup netns with given names as prefix. e.g
@@ -159,7 +160,7 @@ setup_ns()
 {
 	local ns=""
 	local ns_name=""
-	local ns_list=""
+	local ns_list=()
 	local ns_exist=
 	for ns_name in "$@"; do
 		# Some test may setup/remove same netns multi times
@@ -175,13 +176,13 @@ setup_ns()
 
 		if ! ip netns add "$ns"; then
 			echo "Failed to create namespace $ns_name"
-			cleanup_ns "$ns_list"
+			cleanup_ns "${ns_list[@]}"
 			return $ksft_skip
 		fi
 		ip -n "$ns" link set lo up
-		! $ns_exist && ns_list="$ns_list $ns"
+		! $ns_exist && ns_list+=("$ns")
 	done
-	NS_LIST="$NS_LIST $ns_list"
+	NS_LIST+=("${ns_list[@]}")
 }
 
 tc_rule_stats_get()
-- 
cgit v1.2.3


From 84a8bc3ec225b28067b168e9410e452c83d706da Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe@kernel.org>
Date: Wed, 5 Jun 2024 11:21:18 +0200
Subject: selftests: net: lib: set 'i' as local

Without this, the 'i' variable declared before could be overridden by
accident, e.g.

  for i in "${@}"; do
      __ksft_status_merge "${i}"  ## 'i' has been modified
      foo "${i}"                  ## using 'i' with an unexpected value
  done

After a quick look, it looks like 'i' is currently not used after having
been modified in __ksft_status_merge(), but still, better be safe than
sorry. I saw this while modifying the same file, not because I suspected
an issue somewhere.

Fixes: 596c8819cb78 ("selftests: forwarding: Have RET track kselftest framework constants")
Acked-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
Link: https://lore.kernel.org/r/20240605-upstream-net-20240605-selftests-net-lib-fixes-v1-3-b3afadd368c9@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/lib.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index e2f51102d7e1..9155c914c064 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -27,6 +27,7 @@ __ksft_status_merge()
 	local -A weights
 	local weight=0
 
+	local i
 	for i in "$@"; do
 		weights[$i]=$((weight++))
 	done
-- 
cgit v1.2.3


From f3a5367c679d31473d3fbb391675055b4792c309 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Thu, 6 Jun 2024 11:01:51 +0930
Subject: btrfs: protect folio::private when attaching extent buffer folios
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[BUG]
Since v6.8 there are rare kernel crashes reported by various people,
the common factor is bad page status error messages like this:

  BUG: Bad page state in process kswapd0  pfn:d6e840
  page: refcount:0 mapcount:0 mapping:000000007512f4f2 index:0x2796c2c7c
  pfn:0xd6e840
  aops:btree_aops ino:1
  flags: 0x17ffffe0000008(uptodate|node=0|zone=2|lastcpupid=0x3fffff)
  page_type: 0xffffffff()
  raw: 0017ffffe0000008 dead000000000100 dead000000000122 ffff88826d0be4c0
  raw: 00000002796c2c7c 0000000000000000 00000000ffffffff 0000000000000000
  page dumped because: non-NULL mapping

[CAUSE]
Commit 09e6cef19c9f ("btrfs: refactor alloc_extent_buffer() to
allocate-then-attach method") changes the sequence when allocating a new
extent buffer.

Previously we always called grab_extent_buffer() under
mapping->i_private_lock, to ensure the safety on modification on
folio::private (which is a pointer to extent buffer for regular
sectorsize).

This can lead to the following race:

Thread A is trying to allocate an extent buffer at bytenr X, with 4
4K pages, meanwhile thread B is trying to release the page at X + 4K
(the second page of the extent buffer at X).

           Thread A                |                 Thread B
-----------------------------------+-------------------------------------
                                   | btree_release_folio()
				   | | This is for the page at X + 4K,
				   | | Not page X.
				   | |
alloc_extent_buffer()              | |- release_extent_buffer()
|- filemap_add_folio() for the     | |  |- atomic_dec_and_test(eb->refs)
|  page at bytenr X (the first     | |  |
|  page).                          | |  |
|  Which returned -EEXIST.         | |  |
|                                  | |  |
|- filemap_lock_folio()            | |  |
|  Returned the first page locked. | |  |
|                                  | |  |
|- grab_extent_buffer()            | |  |
|  |- atomic_inc_not_zero()        | |  |
|  |  Returned false               | |  |
|  |- folio_detach_private()       | |  |- folio_detach_private() for X
|     |- folio_test_private()      | |     |- folio_test_private()
      |  Returned true             | |     |  Returned true
      |- folio_put()               |       |- folio_put()

Now there are two puts on the same folio at folio X, leading to refcount
underflow of the folio X, and eventually causing the BUG_ON() on the
page->mapping.

The condition is not that easy to hit:

- The release must be triggered for the middle page of an eb
  If the release is on the same first page of an eb, page lock would kick
  in and prevent the race.

- folio_detach_private() has a very small race window
  It's only between folio_test_private() and folio_clear_private().

That's exactly when mapping->i_private_lock is used to prevent such race,
and commit 09e6cef19c9f ("btrfs: refactor alloc_extent_buffer() to
allocate-then-attach method") screwed that up.

At that time, I thought the page lock would kick in as
filemap_release_folio() also requires the page to be locked, but forgot
the filemap_release_folio() only locks one page, not all pages of an
extent buffer.

[FIX]
Move all the code requiring i_private_lock into
attach_eb_folio_to_filemap(), so that everything is done with proper
lock protection.

Furthermore to prevent future problems, add an extra
lockdep_assert_locked() to ensure we're holding the proper lock.

To reproducer that is able to hit the race (takes a few minutes with
instrumented code inserting delays to alloc_extent_buffer()):

  #!/bin/sh
  drop_caches () {
	  while(true); do
		  echo 3 > /proc/sys/vm/drop_caches
		  echo 1 > /proc/sys/vm/compact_memory
	  done
  }

  run_tar () {
	  while(true); do
		  for x in `seq 1 80` ; do
			  tar cf /dev/zero /mnt > /dev/null &
		  done
		  wait
	  done
  }

  mkfs.btrfs -f -d single -m single /dev/vda
  mount -o noatime /dev/vda /mnt
  # create 200,000 files, 1K each
  ./simoop -n 200000 -E -f 1k /mnt
  drop_caches &
  (run_tar)

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/linux-btrfs/CAHk-=wgt362nGfScVOOii8cgKn2LVVHeOvOA7OBwg1OwbuJQcw@mail.gmail.com/
Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Link: https://lore.kernel.org/lkml/CABXGCsPktcHQOvKTbPaTwegMExije=Gpgci5NW=hqORo-s7diA@mail.gmail.com/
Reported-by: Toralf Förster <toralf.foerster@gmx.de>
Link: https://lore.kernel.org/linux-btrfs/e8b3311c-9a75-4903-907f-fc0f7a3fe423@gmx.de/
Reported-by: syzbot+f80b066392366b4af85e@syzkaller.appspotmail.com
Fixes: 09e6cef19c9f ("btrfs: refactor alloc_extent_buffer() to allocate-then-attach method")
CC: stable@vger.kernel.org # 6.8+
CC: Chris Mason <clm@fb.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 60 +++++++++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 597387e9f040..f688fab55251 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3689,6 +3689,8 @@ static struct extent_buffer *grab_extent_buffer(
 	struct folio *folio = page_folio(page);
 	struct extent_buffer *exists;
 
+	lockdep_assert_held(&page->mapping->i_private_lock);
+
 	/*
 	 * For subpage case, we completely rely on radix tree to ensure we
 	 * don't try to insert two ebs for the same bytenr.  So here we always
@@ -3756,13 +3758,14 @@ static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
  * The caller needs to free the existing folios and retry using the same order.
  */
 static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
+				      struct btrfs_subpage *prealloc,
 				      struct extent_buffer **found_eb_ret)
 {
 
 	struct btrfs_fs_info *fs_info = eb->fs_info;
 	struct address_space *mapping = fs_info->btree_inode->i_mapping;
 	const unsigned long index = eb->start >> PAGE_SHIFT;
-	struct folio *existing_folio;
+	struct folio *existing_folio = NULL;
 	int ret;
 
 	ASSERT(found_eb_ret);
@@ -3774,12 +3777,14 @@ retry:
 	ret = filemap_add_folio(mapping, eb->folios[i], index + i,
 				GFP_NOFS | __GFP_NOFAIL);
 	if (!ret)
-		return 0;
+		goto finish;
 
 	existing_folio = filemap_lock_folio(mapping, index + i);
 	/* The page cache only exists for a very short time, just retry. */
-	if (IS_ERR(existing_folio))
+	if (IS_ERR(existing_folio)) {
+		existing_folio = NULL;
 		goto retry;
+	}
 
 	/* For now, we should only have single-page folios for btree inode. */
 	ASSERT(folio_nr_pages(existing_folio) == 1);
@@ -3790,14 +3795,13 @@ retry:
 		return -EAGAIN;
 	}
 
-	if (fs_info->nodesize < PAGE_SIZE) {
-		/*
-		 * We're going to reuse the existing page, can drop our page
-		 * and subpage structure now.
-		 */
+finish:
+	spin_lock(&mapping->i_private_lock);
+	if (existing_folio && fs_info->nodesize < PAGE_SIZE) {
+		/* We're going to reuse the existing page, can drop our folio now. */
 		__free_page(folio_page(eb->folios[i], 0));
 		eb->folios[i] = existing_folio;
-	} else {
+	} else if (existing_folio) {
 		struct extent_buffer *existing_eb;
 
 		existing_eb = grab_extent_buffer(fs_info,
@@ -3805,6 +3809,7 @@ retry:
 		if (existing_eb) {
 			/* The extent buffer still exists, we can use it directly. */
 			*found_eb_ret = existing_eb;
+			spin_unlock(&mapping->i_private_lock);
 			folio_unlock(existing_folio);
 			folio_put(existing_folio);
 			return 1;
@@ -3813,6 +3818,22 @@ retry:
 		__free_page(folio_page(eb->folios[i], 0));
 		eb->folios[i] = existing_folio;
 	}
+	eb->folio_size = folio_size(eb->folios[i]);
+	eb->folio_shift = folio_shift(eb->folios[i]);
+	/* Should not fail, as we have preallocated the memory. */
+	ret = attach_extent_buffer_folio(eb, eb->folios[i], prealloc);
+	ASSERT(!ret);
+	/*
+	 * To inform we have an extra eb under allocation, so that
+	 * detach_extent_buffer_page() won't release the folio private when the
+	 * eb hasn't been inserted into radix tree yet.
+	 *
+	 * The ref will be decreased when the eb releases the page, in
+	 * detach_extent_buffer_page().  Thus needs no special handling in the
+	 * error path.
+	 */
+	btrfs_folio_inc_eb_refs(fs_info, eb->folios[i]);
+	spin_unlock(&mapping->i_private_lock);
 	return 0;
 }
 
@@ -3824,7 +3845,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	int attached = 0;
 	struct extent_buffer *eb;
 	struct extent_buffer *existing_eb = NULL;
-	struct address_space *mapping = fs_info->btree_inode->i_mapping;
 	struct btrfs_subpage *prealloc = NULL;
 	u64 lockdep_owner = owner_root;
 	bool page_contig = true;
@@ -3890,7 +3910,7 @@ reallocate:
 	for (int i = 0; i < num_folios; i++) {
 		struct folio *folio;
 
-		ret = attach_eb_folio_to_filemap(eb, i, &existing_eb);
+		ret = attach_eb_folio_to_filemap(eb, i, prealloc, &existing_eb);
 		if (ret > 0) {
 			ASSERT(existing_eb);
 			goto out;
@@ -3927,24 +3947,6 @@ reallocate:
 		 * and free the allocated page.
 		 */
 		folio = eb->folios[i];
-		eb->folio_size = folio_size(folio);
-		eb->folio_shift = folio_shift(folio);
-		spin_lock(&mapping->i_private_lock);
-		/* Should not fail, as we have preallocated the memory */
-		ret = attach_extent_buffer_folio(eb, folio, prealloc);
-		ASSERT(!ret);
-		/*
-		 * To inform we have extra eb under allocation, so that
-		 * detach_extent_buffer_page() won't release the folio private
-		 * when the eb hasn't yet been inserted into radix tree.
-		 *
-		 * The ref will be decreased when the eb released the page, in
-		 * detach_extent_buffer_page().
-		 * Thus needs no special handling in error path.
-		 */
-		btrfs_folio_inc_eb_refs(fs_info, folio);
-		spin_unlock(&mapping->i_private_lock);
-
 		WARN_ON(btrfs_folio_test_dirty(fs_info, folio, eb->start, eb->len));
 
 		/*
-- 
cgit v1.2.3


From bb93148ca831c30248270b4d81d3ccca23faa1bf Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Wed, 15 May 2024 19:22:22 +0300
Subject: MAINTAINERS: update Xe driver maintainers

Because I left Intel, I'm removing myself from the list
of Xe driver maintainers.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240515162222.12958-3-ogabbay@kernel.org
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
(cherry picked from commit 8de6625dafd202da733188f632701e9109188035)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index d6c90161c7bf..69f553485b9c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11035,7 +11035,6 @@ F:	include/uapi/drm/i915_drm.h
 
 INTEL DRM XE DRIVER (Lunar Lake and newer)
 M:	Lucas De Marchi <lucas.demarchi@intel.com>
-M:	Oded Gabbay <ogabbay@kernel.org>
 M:	Thomas Hellström <thomas.hellstrom@linux.intel.com>
 L:	intel-xe@lists.freedesktop.org
 S:	Supported
-- 
cgit v1.2.3


From a9f9b30e1748252d158f78a0c0affdc949671dd1 Mon Sep 17 00:00:00 2001
From: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Date: Sun, 2 Jun 2024 21:09:59 +0200
Subject: MAINTAINERS: Update Xe driver maintainers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add Rodrigo Vivi as an Xe driver maintainer.

v2:
- Cc also Lucas De Marchi (Rodrigo vivi)
- Remove a blank line in commit the commit message (Lucas De Marchi)

Cc: David Airlie <airlied@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240602190959.2981-1-thomas.hellstrom@linux.intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 69f553485b9c..c517af00df84 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11036,6 +11036,7 @@ F:	include/uapi/drm/i915_drm.h
 INTEL DRM XE DRIVER (Lunar Lake and newer)
 M:	Lucas De Marchi <lucas.demarchi@intel.com>
 M:	Thomas Hellström <thomas.hellstrom@linux.intel.com>
+M:	Rodrigo Vivi <rodrigo.vivi@intel.com>
 L:	intel-xe@lists.freedesktop.org
 S:	Supported
 W:	https://drm.pages.freedesktop.org/intel-docs/
-- 
cgit v1.2.3


From 96c965667b9dbbd713acdffa95ebab8c225f8595 Mon Sep 17 00:00:00 2001
From: Richard Acayan <mailingradian@gmail.com>
Date: Wed, 5 Jun 2024 13:58:09 -0400
Subject: kbuild: explicitly run mksysmap as sed script from link-vmlinux.sh

In commit b18b047002b7 ("kbuild: change scripts/mksysmap into sed
script"), the mksysmap script was transformed into a sed script,
made directly executable with "#!/bin/sed -f". Apparently, the path to
sed is different on NixOS.

The shebang can't use the env command, otherwise the "sed -f" command
would be treated as a single argument. This can be solved with the -S
flag, but that is a GNU extension. Explicitly use sed instead of relying
on the executable shebang to fix NixOS builds without breaking build
environments using Busybox.

Fixes: b18b047002b7 ("kbuild: change scripts/mksysmap into sed script")
Reported-by: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Richard Acayan <mailingradian@gmail.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Dmitry Safonov <0x7f454c46@gmail.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/link-vmlinux.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 46ce5d04dbeb..518c70b8db50 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -193,7 +193,7 @@ kallsyms_step()
 mksysmap()
 {
 	info NM ${2}
-	${NM} -n "${1}" | "${srctree}/scripts/mksysmap" > "${2}"
+	${NM} -n "${1}" | sed -f "${srctree}/scripts/mksysmap" > "${2}"
 }
 
 sorttable()
-- 
cgit v1.2.3


From 9185afeac2a3dcce8300a5684291a43c2838cfd6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Fri, 7 Jun 2024 03:36:12 +0900
Subject: modpost: do not warn about missing MODULE_DESCRIPTION() for vmlinux.o

Building with W=1 incorrectly emits the following warning:

  WARNING: modpost: missing MODULE_DESCRIPTION() in vmlinux.o

This check should apply only to modules.

Fixes: 1fffe7a34c89 ("script: modpost: emit a warning when the description is missing")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Vincenzo Palazzo <vincenzopalazzodev@gmail.com>
---
 scripts/mod/modpost.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 937294ff164f..f48d72d22dc2 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1647,10 +1647,11 @@ static void read_symbols(const char *modname)
 			namespace = get_next_modinfo(&info, "import_ns",
 						     namespace);
 		}
+
+		if (extra_warn && !get_modinfo(&info, "description"))
+			warn("missing MODULE_DESCRIPTION() in %s\n", modname);
 	}
 
-	if (extra_warn && !get_modinfo(&info, "description"))
-		warn("missing MODULE_DESCRIPTION() in %s\n", modname);
 	for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
 		symname = remove_dot(info.strtab + sym->st_name);
 
-- 
cgit v1.2.3


From 02c418774f76a0a36a6195c9dbf8971eb4130a15 Mon Sep 17 00:00:00 2001
From: Enzo Matsumiya <ematsumiya@suse.de>
Date: Thu, 6 Jun 2024 13:13:13 -0300
Subject: smb: client: fix deadlock in smb2_find_smb_tcon()

Unlock cifs_tcp_ses_lock before calling cifs_put_smb_ses() to avoid such
deadlock.

Cc: stable@vger.kernel.org
Signed-off-by: Enzo Matsumiya <ematsumiya@suse.de>
Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
Reviewed-by: Paulo Alcantara (Red Hat) <pc@manguebit.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/smb2transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c
index 02135a605305..1476c445cadc 100644
--- a/fs/smb/client/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
@@ -216,8 +216,8 @@ smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32  tid)
 	}
 	tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid);
 	if (!tcon) {
-		cifs_put_smb_ses(ses);
 		spin_unlock(&cifs_tcp_ses_lock);
+		cifs_put_smb_ses(ses);
 		return NULL;
 	}
 	spin_unlock(&cifs_tcp_ses_lock);
-- 
cgit v1.2.3


From a88d60903696c01de577558080ec4fc738a70475 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 31 May 2024 15:53:42 +0100
Subject: cifs: Don't advance the I/O iterator before terminating subrequest

There's now no need to make sure subreq->io_iter is advanced to match
subreq->transferred before calling one of the netfs subrequest termination
functions as the check has been removed netfslib and the iterator is reset
prior to retrying a subreq.

Fixes: 3ee1a1fc3981 ("cifs: Cut over to using netfslib")
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: Rohith Surabattula <rohiths.msft@gmail.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cifs@vger.kernel.org
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/smb2pdu.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 993ac36c3d58..38a06e8a0f90 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -4577,8 +4577,6 @@ smb2_readv_callback(struct mid_q_entry *mid)
 		if (rdata->subreq.start < rdata->subreq.rreq->i_size)
 			rdata->result = 0;
 	}
-	if (rdata->result == 0 || rdata->result == -EAGAIN)
-		iov_iter_advance(&rdata->subreq.io_iter, rdata->got_bytes);
 	rdata->credits.value = 0;
 	netfs_subreq_terminated(&rdata->subreq,
 				(rdata->result == 0 || rdata->result == -EAGAIN) ?
@@ -4789,7 +4787,6 @@ smb2_writev_callback(struct mid_q_entry *mid)
 			wdata->result = -ENOSPC;
 		else
 			wdata->subreq.len = written;
-		iov_iter_advance(&wdata->subreq.io_iter, written);
 		break;
 	case MID_REQUEST_SUBMITTED:
 	case MID_RETRY_NEEDED:
-- 
cgit v1.2.3


From 64054eb716db52e4246527dc9414377c5bc5b01d Mon Sep 17 00:00:00 2001
From: Jeff Johnson <quic_jjohnson@quicinc.com>
Date: Thu, 6 Jun 2024 20:23:50 -0700
Subject: gpio: add missing MODULE_DESCRIPTION() macros

On x86, make allmodconfig && make W=1 C=1 reports:
WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpio/gpio-gw-pld.o
WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpio/gpio-mc33880.o
WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/gpio/gpio-pcf857x.o

Add the missing invocations of the MODULE_DESCRIPTION() macro,
including the one missing in gpio-pl061.c, which is not built for x86.

Signed-off-by: Jeff Johnson <quic_jjohnson@quicinc.com>
Link: https://lore.kernel.org/r/20240606-md-drivers-gpio-v1-1-cb42d240ca5c@quicinc.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-gw-pld.c  | 1 +
 drivers/gpio/gpio-mc33880.c | 1 +
 drivers/gpio/gpio-pcf857x.c | 1 +
 drivers/gpio/gpio-pl061.c   | 1 +
 4 files changed, 4 insertions(+)

diff --git a/drivers/gpio/gpio-gw-pld.c b/drivers/gpio/gpio-gw-pld.c
index 899335da93c7..7e29a2d8de1a 100644
--- a/drivers/gpio/gpio-gw-pld.c
+++ b/drivers/gpio/gpio-gw-pld.c
@@ -130,5 +130,6 @@ static struct i2c_driver gw_pld_driver = {
 };
 module_i2c_driver(gw_pld_driver);
 
+MODULE_DESCRIPTION("Gateworks I2C PLD GPIO expander");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
diff --git a/drivers/gpio/gpio-mc33880.c b/drivers/gpio/gpio-mc33880.c
index cd9b16dbe1a9..94f6fefc011b 100644
--- a/drivers/gpio/gpio-mc33880.c
+++ b/drivers/gpio/gpio-mc33880.c
@@ -168,5 +168,6 @@ static void __exit mc33880_exit(void)
 module_exit(mc33880_exit);
 
 MODULE_AUTHOR("Mocean Laboratories <info@mocean-labs.com>");
+MODULE_DESCRIPTION("MC33880 high-side/low-side switch GPIO driver");
 MODULE_LICENSE("GPL v2");
 
diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c
index 53b69abe6787..7c57eaeb0afe 100644
--- a/drivers/gpio/gpio-pcf857x.c
+++ b/drivers/gpio/gpio-pcf857x.c
@@ -438,5 +438,6 @@ static void __exit pcf857x_exit(void)
 }
 module_exit(pcf857x_exit);
 
+MODULE_DESCRIPTION("Driver for pcf857x, pca857x, and pca967x I2C GPIO expanders");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("David Brownell");
diff --git a/drivers/gpio/gpio-pl061.c b/drivers/gpio/gpio-pl061.c
index 9fc1f3dd4190..a211a02d4b4a 100644
--- a/drivers/gpio/gpio-pl061.c
+++ b/drivers/gpio/gpio-pl061.c
@@ -438,4 +438,5 @@ static struct amba_driver pl061_gpio_driver = {
 };
 module_amba_driver(pl061_gpio_driver);
 
+MODULE_DESCRIPTION("Driver for the ARM PrimeCell(tm) General Purpose Input/Output (PL061)");
 MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 89e1ee118d6f0ee6bd6e80d8fe08839875daa241 Mon Sep 17 00:00:00 2001
From: Andrew Ballance <andrewjballance@gmail.com>
Date: Sun, 2 Jun 2024 03:50:23 -0500
Subject: hid: asus: asus_report_fixup: fix potential read out of bounds

syzbot reported a potential read out of bounds in asus_report_fixup.

this patch adds checks so that a read out of bounds will not occur

Signed-off-by: Andrew Ballance <andrewjballance@gmail.com>
Reported-by:  <syzbot+07762f019fd03d01f04c@syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=07762f019fd03d01f04c
Fixes: 59d2f5b7392e ("HID: asus: fix more n-key report descriptors if n-key quirked")
Link: https://lore.kernel.org/r/20240602085023.1720492-1-andrewjballance@gmail.com
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 drivers/hid/hid-asus.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index 02de2bf4f790..37e6d25593c2 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -1204,8 +1204,8 @@ static __u8 *asus_report_fixup(struct hid_device *hdev, __u8 *rdesc,
 	}
 
 	/* match many more n-key devices */
-	if (drvdata->quirks & QUIRK_ROG_NKEY_KEYBOARD) {
-		for (int i = 0; i < *rsize + 1; i++) {
+	if (drvdata->quirks & QUIRK_ROG_NKEY_KEYBOARD && *rsize > 15) {
+		for (int i = 0; i < *rsize - 15; i++) {
 			/* offset to the count from 0x5a report part always 14 */
 			if (rdesc[i] == 0x85 && rdesc[i + 1] == 0x5a &&
 			    rdesc[i + 14] == 0x95 && rdesc[i + 15] == 0x05) {
-- 
cgit v1.2.3


From 0c7dd00de018ff70b3452c424901816e26366a8a Mon Sep 17 00:00:00 2001
From: Aseda Aboagye <aaboagye@chromium.org>
Date: Tue, 4 Jun 2024 23:10:47 +0000
Subject: input: Add event code for accessibility key

HUTRR116 added support for a new usage titled "System Accessibility
Binding" which toggles a system-wide bound accessibility UI or command.
This commit simply adds a new event code for the usage.

Signed-off-by: Aseda Aboagye <aaboagye@chromium.org>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Link: https://lore.kernel.org/r/Zl-e97O9nvudco5z@google.com
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 drivers/hid/hid-debug.c                | 1 +
 drivers/hid/hid-input.c                | 1 +
 include/uapi/linux/input-event-codes.h | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 87a961cae775..322ed63a98ab 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -3366,6 +3366,7 @@ static const char *keys[KEY_MAX + 1] = {
 	[KEY_CAMERA_ACCESS_ENABLE] = "CameraAccessEnable",
 	[KEY_CAMERA_ACCESS_DISABLE] = "CameraAccessDisable",
 	[KEY_CAMERA_ACCESS_TOGGLE] = "CameraAccessToggle",
+	[KEY_ACCESSIBILITY] = "Accessibility",
 	[KEY_DICTATE] = "Dictate",
 	[KEY_MICMUTE] = "MicrophoneMute",
 	[KEY_BRIGHTNESS_MIN] = "BrightnessMin",
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index e03d300d2bac..d5a6e89c3086 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -836,6 +836,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		if ((usage->hid & 0xf0) == 0xa0) {	/* SystemControl */
 			switch (usage->hid & 0xf) {
 			case 0x9: map_key_clear(KEY_MICMUTE); break;
+			case 0xa: map_key_clear(KEY_ACCESSIBILITY); break;
 			default: goto ignore;
 			}
 			break;
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 03edf2ccdf6c..39f11ec676fa 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -618,6 +618,7 @@
 #define KEY_CAMERA_ACCESS_ENABLE	0x24b	/* Enables programmatic access to camera devices. (HUTRR72) */
 #define KEY_CAMERA_ACCESS_DISABLE	0x24c	/* Disables programmatic access to camera devices. (HUTRR72) */
 #define KEY_CAMERA_ACCESS_TOGGLE	0x24d	/* Toggles the current state of the camera access control. (HUTRR72) */
+#define KEY_ACCESSIBILITY		0x24e	/* Toggles the system bound accessibility UI/command (HUTRR116) */
 
 #define KEY_BRIGHTNESS_MIN		0x250	/* Set Brightness to Minimum */
 #define KEY_BRIGHTNESS_MAX		0x251	/* Set Brightness to Maximum */
-- 
cgit v1.2.3


From 22d6d060ac77955291deb43efc2f3f4f9632c6cb Mon Sep 17 00:00:00 2001
From: Aseda Aboagye <aaboagye@chromium.org>
Date: Tue, 4 Jun 2024 23:16:32 +0000
Subject: input: Add support for "Do Not Disturb"

HUTRR94 added support for a new usage titled "System Do Not Disturb"
which toggles a system-wide Do Not Disturb setting. This commit simply
adds a new event code for the usage.

Signed-off-by: Aseda Aboagye <aaboagye@chromium.org>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Link: https://lore.kernel.org/r/Zl-gUHE70s7wCAoB@google.com
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 drivers/hid/hid-debug.c                | 1 +
 drivers/hid/hid-input.c                | 8 ++++++++
 include/uapi/linux/input-event-codes.h | 1 +
 3 files changed, 10 insertions(+)

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 322ed63a98ab..d5abfe652fb5 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -3367,6 +3367,7 @@ static const char *keys[KEY_MAX + 1] = {
 	[KEY_CAMERA_ACCESS_DISABLE] = "CameraAccessDisable",
 	[KEY_CAMERA_ACCESS_TOGGLE] = "CameraAccessToggle",
 	[KEY_ACCESSIBILITY] = "Accessibility",
+	[KEY_DO_NOT_DISTURB] = "DoNotDisturb",
 	[KEY_DICTATE] = "Dictate",
 	[KEY_MICMUTE] = "MicrophoneMute",
 	[KEY_BRIGHTNESS_MIN] = "BrightnessMin",
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index d5a6e89c3086..8bb16e9b94aa 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -833,6 +833,14 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 			break;
 		}
 
+		if ((usage->hid & 0xf0) == 0x90) { /* SystemControl*/
+			switch (usage->hid & 0xf) {
+			case 0xb: map_key_clear(KEY_DO_NOT_DISTURB); break;
+			default: goto ignore;
+			}
+			break;
+		}
+
 		if ((usage->hid & 0xf0) == 0xa0) {	/* SystemControl */
 			switch (usage->hid & 0xf) {
 			case 0x9: map_key_clear(KEY_MICMUTE); break;
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 39f11ec676fa..a4206723f503 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -619,6 +619,7 @@
 #define KEY_CAMERA_ACCESS_DISABLE	0x24c	/* Disables programmatic access to camera devices. (HUTRR72) */
 #define KEY_CAMERA_ACCESS_TOGGLE	0x24d	/* Toggles the current state of the camera access control. (HUTRR72) */
 #define KEY_ACCESSIBILITY		0x24e	/* Toggles the system bound accessibility UI/command (HUTRR116) */
+#define KEY_DO_NOT_DISTURB		0x24f	/* Toggles the system-wide "Do Not Disturb" control (HUTRR94)*/
 
 #define KEY_BRIGHTNESS_MIN		0x250	/* Set Brightness to Minimum */
 #define KEY_BRIGHTNESS_MAX		0x251	/* Set Brightness to Maximum */
-- 
cgit v1.2.3


From 8d3ae46c64336b538898af76b4f504dfbe89b886 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Tue, 7 May 2024 16:48:15 +0200
Subject: dt-bindings: HID: i2c-hid: add dedicated Ilitek ILI2901 schema

The Ilitek ILI2901 touch screen controller was apparently incorrectly
added to the Elan eKTH6915 schema simply because it also has a reset
gpio and is currently managed by the Elan driver in Linux.

The two controllers are not related even if an unfortunate wording in
the commit message adding the Ilitek compatible made it sound like they
were.

Add a dedicated schema for the ILI2901 which does not specify the I2C
address (which is likely 0x41 rather than 0x10 as for other Ilitek touch
controllers) to avoid cluttering the Elan schema with unrelated devices
and to make it easier to find the correct schema when adding further
Ilitek controllers.

Fixes: d74ac6f60a7e ("dt-bindings: HID: i2c-hid: elan: Introduce Ilitek ili2901")
Cc: Zhengqiao Xia <xiazhengqiao@huaqin.corp-partner.google.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20240507144821.12275-2-johan+linaro@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 .../devicetree/bindings/input/elan,ekth6915.yaml   |  1 -
 .../devicetree/bindings/input/ilitek,ili2901.yaml  | 66 ++++++++++++++++++++++
 2 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/input/ilitek,ili2901.yaml

diff --git a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
index dc4ac41f2441..c1fcf8c90c24 100644
--- a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
+++ b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
@@ -20,7 +20,6 @@ properties:
   compatible:
     enum:
       - elan,ekth6915
-      - ilitek,ili2901
 
   reg:
     const: 0x10
diff --git a/Documentation/devicetree/bindings/input/ilitek,ili2901.yaml b/Documentation/devicetree/bindings/input/ilitek,ili2901.yaml
new file mode 100644
index 000000000000..1abeec768d79
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/ilitek,ili2901.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/ilitek,ili2901.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ilitek ILI2901 touchscreen controller
+
+maintainers:
+  - Jiri Kosina <jkosina@suse.com>
+
+description:
+  Supports the Ilitek ILI2901 touchscreen controller.
+  This touchscreen controller uses the i2c-hid protocol with a reset GPIO.
+
+allOf:
+  - $ref: /schemas/input/touchscreen/touchscreen.yaml#
+
+properties:
+  compatible:
+    enum:
+      - ilitek,ili2901
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  panel: true
+
+  reset-gpios:
+    maxItems: 1
+
+  vcc33-supply: true
+
+  vccio-supply: true
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - vcc33-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      touchscreen@41 {
+        compatible = "ilitek,ili2901";
+        reg = <0x41>;
+
+        interrupt-parent = <&tlmm>;
+        interrupts = <9 IRQ_TYPE_LEVEL_LOW>;
+
+        reset-gpios = <&tlmm 8 GPIO_ACTIVE_LOW>;
+        vcc33-supply = <&pp3300_ts>;
+      };
+    };
-- 
cgit v1.2.3


From 07fc16fa55522520f5e4b2e35c72650f11079c35 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Tue, 7 May 2024 16:48:16 +0200
Subject: dt-bindings: HID: i2c-hid: elan: add Elan eKTH5015M

Add a compatible string for the Elan eKTH5015M touch controller.

Judging from the current binding and commit bd3cba00dcc6 ("HID: i2c-hid:
elan: Add support for Elan eKTH6915 i2c-hid touchscreens"), eKTH5015M
appears to be compatible with eKTH6915. Notably the power-on sequence is
the same.

While at it, drop a redundant label from the example.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20240507144821.12275-3-johan+linaro@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 Documentation/devicetree/bindings/input/elan,ekth6915.yaml | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
index c1fcf8c90c24..be84f7ed0abc 100644
--- a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
+++ b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
@@ -18,8 +18,12 @@ allOf:
 
 properties:
   compatible:
-    enum:
-      - elan,ekth6915
+    oneOf:
+      - items:
+          - enum:
+              - elan,ekth5015m
+          - const: elan,ekth6915
+      - const: elan,ekth6915
 
   reg:
     const: 0x10
@@ -57,8 +61,8 @@ examples:
       #address-cells = <1>;
       #size-cells = <0>;
 
-      ap_ts: touchscreen@10 {
-        compatible = "elan,ekth6915";
+      touchscreen@10 {
+        compatible = "elan,ekth5015m", "elan,ekth6915";
         reg = <0x10>;
 
         interrupt-parent = <&tlmm>;
-- 
cgit v1.2.3


From e538d4b85b8f1e3534dfbb42c2273f18bbb59d6e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Tue, 7 May 2024 16:48:17 +0200
Subject: dt-bindings: HID: i2c-hid: elan: add 'no-reset-on-power-off' property

When the power supply is shared with other peripherals the reset line
can be wired in such a way that it can remain deasserted regardless of
whether the supply is on or not.

This is important as it can be used to avoid holding the controller in
reset for extended periods of time when it remains powered, something
which can lead to increased power consumption. Leaving reset deasserted
also avoids leaking current through the reset circuitry pull-up
resistors.

Add a new 'no-reset-on-power-off' devicetree property which can be used
by the OS to determine when reset needs to be asserted on power down.

Note that this property can also be used when the supply cannot be
turned off by the OS at all.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20240507144821.12275-4-johan+linaro@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 Documentation/devicetree/bindings/input/elan,ekth6915.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
index be84f7ed0abc..a62916d07a08 100644
--- a/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
+++ b/Documentation/devicetree/bindings/input/elan,ekth6915.yaml
@@ -36,6 +36,12 @@ properties:
   reset-gpios:
     description: Reset GPIO; not all touchscreens using eKTH6915 hook this up.
 
+  no-reset-on-power-off:
+    type: boolean
+    description:
+      Reset line is wired so that it can (and should) be left deasserted when
+      the power supply is off.
+
   vcc33-supply:
     description: The 3.3V supply to the touchscreen.
 
-- 
cgit v1.2.3


From 0eafc58f2194dbd01d4be40f99a697681171995b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Tue, 7 May 2024 16:48:18 +0200
Subject: HID: i2c-hid: elan: fix reset suspend current leakage

The Elan eKTH5015M touch controller found on the Lenovo ThinkPad X13s
shares the VCC33 supply with other peripherals that may remain powered
during suspend (e.g. when enabled as wakeup sources).

The reset line is also wired so that it can be left deasserted when the
supply is off.

This is important as it avoids holding the controller in reset for
extended periods of time when it remains powered, which can lead to
increased power consumption, and also avoids leaking current through the
X13s reset circuitry during suspend (and after driver unbind).

Use the new 'no-reset-on-power-off' devicetree property to determine
when reset needs to be asserted on power down.

Notably this also avoids wasting power on machine variants without a
touchscreen for which the driver would otherwise exit probe with reset
asserted.

Fixes: bd3cba00dcc6 ("HID: i2c-hid: elan: Add support for Elan eKTH6915 i2c-hid touchscreens")
Cc: <stable@vger.kernel.org>	# 6.0
Cc: Douglas Anderson <dianders@chromium.org>
Tested-by: Steev Klimaszewski <steev@kali.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Link: https://lore.kernel.org/r/20240507144821.12275-5-johan+linaro@kernel.org
Signed-off-by: Benjamin Tissoires <bentiss@kernel.org>
---
 drivers/hid/i2c-hid/i2c-hid-of-elan.c | 59 ++++++++++++++++++++++++++++-------
 1 file changed, 47 insertions(+), 12 deletions(-)

diff --git a/drivers/hid/i2c-hid/i2c-hid-of-elan.c b/drivers/hid/i2c-hid/i2c-hid-of-elan.c
index 5b91fb106cfc..091e37933225 100644
--- a/drivers/hid/i2c-hid/i2c-hid-of-elan.c
+++ b/drivers/hid/i2c-hid/i2c-hid-of-elan.c
@@ -31,6 +31,7 @@ struct i2c_hid_of_elan {
 	struct regulator *vcc33;
 	struct regulator *vccio;
 	struct gpio_desc *reset_gpio;
+	bool no_reset_on_power_off;
 	const struct elan_i2c_hid_chip_data *chip_data;
 };
 
@@ -40,17 +41,17 @@ static int elan_i2c_hid_power_up(struct i2chid_ops *ops)
 		container_of(ops, struct i2c_hid_of_elan, ops);
 	int ret;
 
+	gpiod_set_value_cansleep(ihid_elan->reset_gpio, 1);
+
 	if (ihid_elan->vcc33) {
 		ret = regulator_enable(ihid_elan->vcc33);
 		if (ret)
-			return ret;
+			goto err_deassert_reset;
 	}
 
 	ret = regulator_enable(ihid_elan->vccio);
-	if (ret) {
-		regulator_disable(ihid_elan->vcc33);
-		return ret;
-	}
+	if (ret)
+		goto err_disable_vcc33;
 
 	if (ihid_elan->chip_data->post_power_delay_ms)
 		msleep(ihid_elan->chip_data->post_power_delay_ms);
@@ -60,6 +61,15 @@ static int elan_i2c_hid_power_up(struct i2chid_ops *ops)
 		msleep(ihid_elan->chip_data->post_gpio_reset_on_delay_ms);
 
 	return 0;
+
+err_disable_vcc33:
+	if (ihid_elan->vcc33)
+		regulator_disable(ihid_elan->vcc33);
+err_deassert_reset:
+	if (ihid_elan->no_reset_on_power_off)
+		gpiod_set_value_cansleep(ihid_elan->reset_gpio, 0);
+
+	return ret;
 }
 
 static void elan_i2c_hid_power_down(struct i2chid_ops *ops)
@@ -67,7 +77,14 @@ static void elan_i2c_hid_power_down(struct i2chid_ops *ops)
 	struct i2c_hid_of_elan *ihid_elan =
 		container_of(ops, struct i2c_hid_of_elan, ops);
 
-	gpiod_set_value_cansleep(ihid_elan->reset_gpio, 1);
+	/*
+	 * Do not assert reset when the hardware allows for it to remain
+	 * deasserted regardless of the state of the (shared) power supply to
+	 * avoid wasting power when the supply is left on.
+	 */
+	if (!ihid_elan->no_reset_on_power_off)
+		gpiod_set_value_cansleep(ihid_elan->reset_gpio, 1);
+
 	if (ihid_elan->chip_data->post_gpio_reset_off_delay_ms)
 		msleep(ihid_elan->chip_data->post_gpio_reset_off_delay_ms);
 
@@ -79,6 +96,7 @@ static void elan_i2c_hid_power_down(struct i2chid_ops *ops)
 static int i2c_hid_of_elan_probe(struct i2c_client *client)
 {
 	struct i2c_hid_of_elan *ihid_elan;
+	int ret;
 
 	ihid_elan = devm_kzalloc(&client->dev, sizeof(*ihid_elan), GFP_KERNEL);
 	if (!ihid_elan)
@@ -93,21 +111,38 @@ static int i2c_hid_of_elan_probe(struct i2c_client *client)
 	if (IS_ERR(ihid_elan->reset_gpio))
 		return PTR_ERR(ihid_elan->reset_gpio);
 
+	ihid_elan->no_reset_on_power_off = of_property_read_bool(client->dev.of_node,
+						"no-reset-on-power-off");
+
 	ihid_elan->vccio = devm_regulator_get(&client->dev, "vccio");
-	if (IS_ERR(ihid_elan->vccio))
-		return PTR_ERR(ihid_elan->vccio);
+	if (IS_ERR(ihid_elan->vccio)) {
+		ret = PTR_ERR(ihid_elan->vccio);
+		goto err_deassert_reset;
+	}
 
 	ihid_elan->chip_data = device_get_match_data(&client->dev);
 
 	if (ihid_elan->chip_data->main_supply_name) {
 		ihid_elan->vcc33 = devm_regulator_get(&client->dev,
 						      ihid_elan->chip_data->main_supply_name);
-		if (IS_ERR(ihid_elan->vcc33))
-			return PTR_ERR(ihid_elan->vcc33);
+		if (IS_ERR(ihid_elan->vcc33)) {
+			ret = PTR_ERR(ihid_elan->vcc33);
+			goto err_deassert_reset;
+		}
 	}
 
-	return i2c_hid_core_probe(client, &ihid_elan->ops,
-				  ihid_elan->chip_data->hid_descriptor_address, 0);
+	ret = i2c_hid_core_probe(client, &ihid_elan->ops,
+				 ihid_elan->chip_data->hid_descriptor_address, 0);
+	if (ret)
+		goto err_deassert_reset;
+
+	return 0;
+
+err_deassert_reset:
+	if (ihid_elan->no_reset_on_power_off)
+		gpiod_set_value_cansleep(ihid_elan->reset_gpio, 0);
+
+	return ret;
 }
 
 static const struct elan_i2c_hid_chip_data elan_ekth6915_chip_data = {
-- 
cgit v1.2.3


From a3a5a37efba11b7cf1a86abe7bccfbcdb521764e Mon Sep 17 00:00:00 2001
From: Louis Dalibard <ontake@ontake.dev>
Date: Fri, 7 Jun 2024 16:53:43 +0200
Subject: HID: Ignore battery for ELAN touchscreens 2F2C and 4116

At least ASUS Zenbook 14 (2023) and ASUS Zenbook 14 Pro (2023) are affected.

The touchscreen reports a battery status of 0% and jumps to 1% when a
stylus is used.

The device ID was added and the battery ignore quirk was enabled for it.

[jkosina@suse.com: reformatted changelog a bit]
Signed-off-by: Louis Dalibard <ontake@ontake.dev>
Signed-off-by: Jiri Kosina <jkosina@suse.com>
---
 drivers/hid/hid-ids.h   | 2 ++
 drivers/hid/hid-input.c | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 61d2a21affa2..72d56ee7ce1b 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -423,6 +423,8 @@
 #define I2C_DEVICE_ID_HP_SPECTRE_X360_13_AW0020NG  0x29DF
 #define I2C_DEVICE_ID_ASUS_TP420IA_TOUCHSCREEN 0x2BC8
 #define I2C_DEVICE_ID_ASUS_GV301RA_TOUCHSCREEN 0x2C82
+#define I2C_DEVICE_ID_ASUS_UX3402_TOUCHSCREEN 0x2F2C
+#define I2C_DEVICE_ID_ASUS_UX6404_TOUCHSCREEN 0x4116
 #define USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN	0x2544
 #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN	0x2706
 #define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN	0x261A
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 8bb16e9b94aa..c9094a4f281e 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -377,6 +377,10 @@ static const struct hid_device_id hid_battery_quirks[] = {
 	  HID_BATTERY_QUIRK_IGNORE },
 	{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_ASUS_GV301RA_TOUCHSCREEN),
 	  HID_BATTERY_QUIRK_IGNORE },
+	{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_ASUS_UX3402_TOUCHSCREEN),
+	  HID_BATTERY_QUIRK_IGNORE },
+	{ HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_ASUS_UX6404_TOUCHSCREEN),
+	  HID_BATTERY_QUIRK_IGNORE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN),
 	  HID_BATTERY_QUIRK_IGNORE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN),
-- 
cgit v1.2.3


From 83a7eefedc9b56fe7bfeff13b6c7356688ffa670 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 9 Jun 2024 14:19:43 -0700
Subject: Linux 6.10-rc3

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 7f921ae547f1..925a75b8ba7d 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 10
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Baby Opossum Posse
 
 # *DOCUMENTATION*
-- 
cgit v1.2.3


From 799d4b392417ed6889030a5b2335ccb6dcf030ab Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 25 Apr 2024 11:48:51 +0200
Subject: drm/exynos: hdmi: report safe 640x480 mode as a fallback when no EDID
 found

When reading EDID fails and driver reports no modes available, the DRM
core adds an artificial 1024x786 mode to the connector. Unfortunately
some variants of the Exynos HDMI (like the one in Exynos4 SoCs) are not
able to drive such mode, so report a safe 640x480 mode instead of nothing
in case of the EDID reading failure.

This fixes the following issue observed on Trats2 board since commit
13d5b040363c ("drm/exynos: do not return negative values from .get_modes()"):

[drm] Exynos DRM: using 11c00000.fimd device for DMA mapping operations
exynos-drm exynos-drm: bound 11c00000.fimd (ops fimd_component_ops)
exynos-drm exynos-drm: bound 12c10000.mixer (ops mixer_component_ops)
exynos-dsi 11c80000.dsi: [drm:samsung_dsim_host_attach] Attached s6e8aa0 device (lanes:4 bpp:24 mode-flags:0x10b)
exynos-drm exynos-drm: bound 11c80000.dsi (ops exynos_dsi_component_ops)
exynos-drm exynos-drm: bound 12d00000.hdmi (ops hdmi_component_ops)
[drm] Initialized exynos 1.1.0 20180330 for exynos-drm on minor 1
exynos-hdmi 12d00000.hdmi: [drm:hdmiphy_enable.part.0] *ERROR* PLL could not reach steady state
panel-samsung-s6e8aa0 11c80000.dsi.0: ID: 0xa2, 0x20, 0x8c
exynos-mixer 12c10000.mixer: timeout waiting for VSYNC
------------[ cut here ]------------
WARNING: CPU: 1 PID: 11 at drivers/gpu/drm/drm_atomic_helper.c:1682 drm_atomic_helper_wait_for_vblanks.part.0+0x2b0/0x2b8
[CRTC:70:crtc-1] vblank wait timed out
Modules linked in:
CPU: 1 PID: 11 Comm: kworker/u16:0 Not tainted 6.9.0-rc5-next-20240424 #14913
Hardware name: Samsung Exynos (Flattened Device Tree)
Workqueue: events_unbound deferred_probe_work_func
Call trace:
 unwind_backtrace from show_stack+0x10/0x14
 show_stack from dump_stack_lvl+0x68/0x88
 dump_stack_lvl from __warn+0x7c/0x1c4
 __warn from warn_slowpath_fmt+0x11c/0x1a8
 warn_slowpath_fmt from drm_atomic_helper_wait_for_vblanks.part.0+0x2b0/0x2b8
 drm_atomic_helper_wait_for_vblanks.part.0 from drm_atomic_helper_commit_tail_rpm+0x7c/0x8c
 drm_atomic_helper_commit_tail_rpm from commit_tail+0x9c/0x184
 commit_tail from drm_atomic_helper_commit+0x168/0x190
 drm_atomic_helper_commit from drm_atomic_commit+0xb4/0xe0
 drm_atomic_commit from drm_client_modeset_commit_atomic+0x23c/0x27c
 drm_client_modeset_commit_atomic from drm_client_modeset_commit_locked+0x60/0x1cc
 drm_client_modeset_commit_locked from drm_client_modeset_commit+0x24/0x40
 drm_client_modeset_commit from __drm_fb_helper_restore_fbdev_mode_unlocked+0x9c/0xc4
 __drm_fb_helper_restore_fbdev_mode_unlocked from drm_fb_helper_set_par+0x2c/0x3c
 drm_fb_helper_set_par from fbcon_init+0x3d8/0x550
 fbcon_init from visual_init+0xc0/0x108
 visual_init from do_bind_con_driver+0x1b8/0x3a4
 do_bind_con_driver from do_take_over_console+0x140/0x1ec
 do_take_over_console from do_fbcon_takeover+0x70/0xd0
 do_fbcon_takeover from fbcon_fb_registered+0x19c/0x1ac
 fbcon_fb_registered from register_framebuffer+0x190/0x21c
 register_framebuffer from __drm_fb_helper_initial_config_and_unlock+0x350/0x574
 __drm_fb_helper_initial_config_and_unlock from exynos_drm_fbdev_client_hotplug+0x6c/0xb0
 exynos_drm_fbdev_client_hotplug from drm_client_register+0x58/0x94
 drm_client_register from exynos_drm_bind+0x160/0x190
 exynos_drm_bind from try_to_bring_up_aggregate_device+0x200/0x2d8
 try_to_bring_up_aggregate_device from __component_add+0xb0/0x170
 __component_add from mixer_probe+0x74/0xcc
 mixer_probe from platform_probe+0x5c/0xb8
 platform_probe from really_probe+0xe0/0x3d8
 really_probe from __driver_probe_device+0x9c/0x1e4
 __driver_probe_device from driver_probe_device+0x30/0xc0
 driver_probe_device from __device_attach_driver+0xa8/0x120
 __device_attach_driver from bus_for_each_drv+0x80/0xcc
 bus_for_each_drv from __device_attach+0xac/0x1fc
 __device_attach from bus_probe_device+0x8c/0x90
 bus_probe_device from deferred_probe_work_func+0x98/0xe0
 deferred_probe_work_func from process_one_work+0x240/0x6d0
 process_one_work from worker_thread+0x1a0/0x3f4
 worker_thread from kthread+0x104/0x138
 kthread from ret_from_fork+0x14/0x28
Exception stack(0xf0895fb0 to 0xf0895ff8)
...
irq event stamp: 82357
hardirqs last  enabled at (82363): [<c01a96e8>] vprintk_emit+0x308/0x33c
hardirqs last disabled at (82368): [<c01a969c>] vprintk_emit+0x2bc/0x33c
softirqs last  enabled at (81614): [<c0101644>] __do_softirq+0x320/0x500
softirqs last disabled at (81609): [<c012dfe0>] __irq_exit_rcu+0x130/0x184
---[ end trace 0000000000000000 ]---
exynos-drm exynos-drm: [drm] *ERROR* flip_done timed out
exynos-drm exynos-drm: [drm] *ERROR* [CRTC:70:crtc-1] commit wait timed out
exynos-drm exynos-drm: [drm] *ERROR* flip_done timed out
exynos-drm exynos-drm: [drm] *ERROR* [CONNECTOR:74:HDMI-A-1] commit wait timed out
exynos-drm exynos-drm: [drm] *ERROR* flip_done timed out
exynos-drm exynos-drm: [drm] *ERROR* [PLANE:56:plane-5] commit wait timed out
exynos-mixer 12c10000.mixer: timeout waiting for VSYNC

Cc: stable@vger.kernel.org
Fixes: 13d5b040363c ("drm/exynos: do not return negative values from .get_modes()")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_hdmi.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index e968824a4c72..1e26cd4f8347 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -887,11 +887,11 @@ static int hdmi_get_modes(struct drm_connector *connector)
 	int ret;
 
 	if (!hdata->ddc_adpt)
-		return 0;
+		goto no_edid;
 
 	edid = drm_get_edid(connector, hdata->ddc_adpt);
 	if (!edid)
-		return 0;
+		goto no_edid;
 
 	hdata->dvi_mode = !connector->display_info.is_hdmi;
 	DRM_DEV_DEBUG_KMS(hdata->dev, "%s : width[%d] x height[%d]\n",
@@ -906,6 +906,9 @@ static int hdmi_get_modes(struct drm_connector *connector)
 	kfree(edid);
 
 	return ret;
+
+no_edid:
+	return drm_add_modes_noedid(connector, 640, 480);
 }
 
 static int hdmi_find_phy_conf(struct hdmi_context *hdata, u32 pixel_clock)
-- 
cgit v1.2.3


From 1f3512cdf8299f9edaea9046d53ea324a7730bab Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 4 Jun 2024 15:11:29 +0200
Subject: drm/exynos: dp: drop driver owner initialization

Core in platform_driver_register() already sets the .owner, so driver
does not need to.  Whatever is set here will be anyway overwritten by
main driver calling platform_driver_register().

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_dp.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index f48c4343f469..3e6d4c6aa877 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -285,7 +285,6 @@ struct platform_driver dp_driver = {
 	.remove_new	= exynos_dp_remove,
 	.driver		= {
 		.name	= "exynos-dp",
-		.owner	= THIS_MODULE,
 		.pm	= pm_ptr(&exynos_dp_pm_ops),
 		.of_match_table = exynos_dp_match,
 	},
-- 
cgit v1.2.3


From 38e3825631b1f314b21e3ade00b5a4d737eb054e Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Thu, 30 May 2024 13:01:51 +0300
Subject: drm/exynos/vidi: fix memory leak in .get_modes()

The duplicated EDID is never freed. Fix it.

Cc: stable@vger.kernel.org
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_vidi.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index fab135308b70..11a720fef32b 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -309,6 +309,7 @@ static int vidi_get_modes(struct drm_connector *connector)
 	struct vidi_context *ctx = ctx_from_connector(connector);
 	struct edid *edid;
 	int edid_len;
+	int count;
 
 	/*
 	 * the edid data comes from user side and it would be set
@@ -328,7 +329,11 @@ static int vidi_get_modes(struct drm_connector *connector)
 
 	drm_connector_update_edid_property(connector, edid);
 
-	return drm_add_edid_modes(connector, edid);
+	count = drm_add_edid_modes(connector, edid);
+
+	kfree(edid);
+
+	return count;
 }
 
 static const struct drm_connector_helper_funcs vidi_connector_helper_funcs = {
-- 
cgit v1.2.3


From 7c877115da4196fa108dcfefd49f5a9b67b8d8ca Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Thu, 6 Jun 2024 15:38:41 +0530
Subject: drm/xe/xe_gt_idle: use GT forcewake domain assertion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The rc6 registers used in disable_c6 function belong
to the GT forcewake domain. Hence change the forcewake
assertion to check GT forcewake domain.

v2: add fixes tag (Himal)

Fixes: 975e4a3795d4 ("drm/xe: Manually setup C6 when skip_guc_pc is set")
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240606100842.956072-2-riana.tauro@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
(cherry picked from commit 21b708554648177a0078962c31629bce31ef5d83)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_gt_idle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index 8fc0f3f6ecc5..c699f47f4700 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -199,7 +199,7 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt)
 void xe_gt_idle_disable_c6(struct xe_gt *gt)
 {
 	xe_device_assert_mem_access(gt_to_xe(gt));
-	xe_force_wake_assert_held(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+	xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
 
 	xe_mmio_write32(gt, PG_ENABLE, 0);
 	xe_mmio_write32(gt, RC_CONTROL, 0);
-- 
cgit v1.2.3


From cd554e1e118a6aa1c919309cd28398b003f69c1f Mon Sep 17 00:00:00 2001
From: Michal Wajdeczko <michal.wajdeczko@intel.com>
Date: Fri, 7 Jun 2024 17:31:55 +0200
Subject: drm/xe/pf: Assert LMEM provisioning is done only on DGFX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Local Memory (aka VRAM) is only available on DGFX platforms.
We shouldn't attempt to provision VFs with LMEM or attempt to
update the LMTT on non-DGFX platforms. Add missing asserts that
would enforce that and fix release code that could crash on iGFX
due to uninitialized LMTT.

Fixes: 0698ff57bf32 ("drm/xe/pf: Update the LMTT when freeing VF GT config")
Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Piotr Piórkowski <piotr.piorkowski@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240607153155.1592-1-michal.wajdeczko@intel.com
(cherry picked from commit b321cb83a375bcc18cd0a4b62bdeaf6905cca769)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 476d613333a9..6c2cfc54442c 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -1274,6 +1274,9 @@ static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid)
 	struct xe_tile *tile;
 	unsigned int tid;
 
+	xe_assert(xe, IS_DGFX(xe));
+	xe_assert(xe, IS_SRIOV_PF(xe));
+
 	for_each_tile(tile, xe, tid) {
 		lmtt = &tile->sriov.pf.lmtt;
 		xe_lmtt_drop_pages(lmtt, vfid);
@@ -1292,6 +1295,9 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid)
 	unsigned int tid;
 	int err;
 
+	xe_assert(xe, IS_DGFX(xe));
+	xe_assert(xe, IS_SRIOV_PF(xe));
+
 	total = 0;
 	for_each_tile(tile, xe, tid)
 		total += pf_get_vf_config_lmem(tile->primary_gt, vfid);
@@ -1337,6 +1343,7 @@ fail:
 
 static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config)
 {
+	xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt)));
 	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
 	lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
 
@@ -1355,6 +1362,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
 	int err;
 
 	xe_gt_assert(gt, vfid);
+	xe_gt_assert(gt, IS_DGFX(xe));
 	xe_gt_assert(gt, !xe_gt_is_media_type(gt));
 
 	size = round_up(size, pf_get_lmem_alignment(gt));
@@ -1745,11 +1753,14 @@ static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *c
 static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid)
 {
 	struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+	struct xe_device *xe = gt_to_xe(gt);
 
 	if (!xe_gt_is_media_type(gt)) {
 		pf_release_vf_config_ggtt(gt, config);
-		pf_release_vf_config_lmem(gt, config);
-		pf_update_vf_lmtt(gt_to_xe(gt), vfid);
+		if (IS_DGFX(xe)) {
+			pf_release_vf_config_lmem(gt, config);
+			pf_update_vf_lmtt(xe, vfid);
+		}
 	}
 	pf_release_config_ctxs(gt, config);
 	pf_release_config_dbs(gt, config);
-- 
cgit v1.2.3


From b5e3a9b83f352a737b77a01734a6661d1130ed49 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <andrzej.hajda@intel.com>
Date: Wed, 5 Jun 2024 09:29:48 +0200
Subject: drm/xe: flush engine buffers before signalling user fence on all
 engines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tests show that user fence signalling requires kind of write barrier,
otherwise not all writes performed by the workload will be available
to userspace. It is already done for render and compute, we need it
also for the rest: video, gsc, copy.

Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Andrzej Hajda <andrzej.hajda@intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240605-fix_user_fence_posted-v3-2-06e7932f784a@intel.com
(cherry picked from commit 3ad7d18c5dad75ed38098c7cc3bc9594b4701399)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_ring_ops.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index d42b3f33bd7a..aca7a9af6e84 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -80,6 +80,16 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
 	return i;
 }
 
+static int emit_flush_dw(u32 *dw, int i)
+{
+	dw[i++] = MI_FLUSH_DW | MI_FLUSH_IMM_DW;
+	dw[i++] = 0;
+	dw[i++] = 0;
+	dw[i++] = 0;
+
+	return i;
+}
+
 static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
 			       u32 *dw, int i)
 {
@@ -234,10 +244,12 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
-	if (job->user_fence.used)
+	if (job->user_fence.used) {
+		i = emit_flush_dw(dw, i);
 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
 						job->user_fence.value,
 						dw, i);
+	}
 
 	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
 
@@ -293,10 +305,12 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 
 	i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
 
-	if (job->user_fence.used)
+	if (job->user_fence.used) {
+		i = emit_flush_dw(dw, i);
 		i = emit_store_imm_ppgtt_posted(job->user_fence.addr,
 						job->user_fence.value,
 						dw, i);
+	}
 
 	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
 
-- 
cgit v1.2.3


From 2470b141bfae2b9695b5b6823e3b978b22d33dde Mon Sep 17 00:00:00 2001
From: Riana Tauro <riana.tauro@intel.com>
Date: Thu, 6 Jun 2024 15:38:42 +0530
Subject: drm/xe: move disable_c6 call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

disable c6 called in guc_pc_fini_hw is unreachable.

GuC PC init returns earlier if skip_guc_pc is true and never
registers the finish call thus making disable_c6 unreachable.

move this call to gt idle.

v2: rebase
v3: add fixes tag (Himal)

Fixes: 975e4a3795d4 ("drm/xe: Manually setup C6 when skip_guc_pc is set")
Signed-off-by: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240606100842.956072-3-riana.tauro@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
(cherry picked from commit 6800e63cf97bae62bca56d8e691544540d945f53)
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/xe/xe_gt_idle.c | 7 +++++++
 drivers/gpu/drm/xe/xe_guc_pc.c  | 6 ------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index c699f47f4700..944770fb2daf 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -147,6 +147,13 @@ static const struct attribute *gt_idle_attrs[] = {
 static void gt_idle_sysfs_fini(struct drm_device *drm, void *arg)
 {
 	struct kobject *kobj = arg;
+	struct xe_gt *gt = kobj_to_gt(kobj->parent);
+
+	if (gt_to_xe(gt)->info.skip_guc_pc) {
+		XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FW_GT));
+		xe_gt_idle_disable_c6(gt);
+		xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
+	}
 
 	sysfs_remove_files(kobj, gt_idle_attrs);
 	kobject_put(kobj);
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 509649d0e65e..23382ced4ea7 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -895,12 +895,6 @@ int xe_guc_pc_stop(struct xe_guc_pc *pc)
 static void xe_guc_pc_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_guc_pc *pc = arg;
-	struct xe_device *xe = pc_to_xe(pc);
-
-	if (xe->info.skip_guc_pc) {
-		xe_gt_idle_disable_c6(pc_to_gt(pc));
-		return;
-	}
 
 	XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
 	XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
-- 
cgit v1.2.3