diff options
59 files changed, 3146 insertions, 471 deletions
diff --git a/Documentation/arch/powerpc/htm.rst b/Documentation/arch/powerpc/htm.rst new file mode 100644 index 000000000000..fcb4eb6306b1 --- /dev/null +++ b/Documentation/arch/powerpc/htm.rst @@ -0,0 +1,104 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. _htm: + +=================================== +HTM (Hardware Trace Macro) +=================================== + +Athira Rajeev, 2 Mar 2025 + +.. contents:: + :depth: 3 + + +Basic overview +============== + +H_HTM is used as an interface for executing Hardware Trace Macro (HTM) +functions, including setup, configuration, control and dumping of the HTM data. +For using HTM, it is required to setup HTM buffers and HTM operations can +be controlled using the H_HTM hcall. The hcall can be invoked for any core/chip +of the system from within a partition itself. To use this feature, a debugfs +folder called "htmdump" is present under /sys/kernel/debug/powerpc. + + +HTM debugfs example usage +========================= + +.. code-block:: sh + + # ls /sys/kernel/debug/powerpc/htmdump/ + coreindexonchip htmcaps htmconfigure htmflags htminfo htmsetup + htmstart htmstatus htmtype nodalchipindex nodeindex trace + +Details on each file: + +* nodeindex, nodalchipindex, coreindexonchip specifies which partition to configure the HTM for. +* htmtype: specifies the type of HTM. Supported target is hardwareTarget. +* trace: is to read the HTM data. +* htmconfigure: Configure/Deconfigure the HTM. Writing 1 to the file will configure the trace, writing 0 to the file will do deconfigure. +* htmstart: start/Stop the HTM. Writing 1 to the file will start the tracing, writing 0 to the file will stop the tracing. +* htmstatus: get the status of HTM. This is needed to understand the HTM state after each operation. +* htmsetup: set the HTM buffer size. Size of HTM buffer is in power of 2 +* htminfo: provides the system processor configuration details. This is needed to understand the appropriate values for nodeindex, nodalchipindex, coreindexonchip. +* htmcaps : provides the HTM capabilities like minimum/maximum buffer size, what kind of tracing the HTM supports etc. +* htmflags : allows to pass flags to hcall. Currently supports controlling the wrapping of HTM buffer. + +To see the system processor configuration details: + +.. code-block:: sh + + # cat /sys/kernel/debug/powerpc/htmdump/htminfo > htminfo_file + +The result can be interpreted using hexdump. + +To collect HTM traces for a partition represented by nodeindex as +zero, nodalchipindex as 1 and coreindexonchip as 12 + +.. code-block:: sh + + # cd /sys/kernel/debug/powerpc/htmdump/ + # echo 2 > htmtype + # echo 33 > htmsetup ( sets 8GB memory for HTM buffer, number is size in power of 2 ) + +This requires a CEC reboot to get the HTM buffers allocated. + +.. code-block:: sh + + # cd /sys/kernel/debug/powerpc/htmdump/ + # echo 2 > htmtype + # echo 0 > nodeindex + # echo 1 > nodalchipindex + # echo 12 > coreindexonchip + # echo 1 > htmflags # to set noWrap for HTM buffers + # echo 1 > htmconfigure # Configure the HTM + # echo 1 > htmstart # Start the HTM + # echo 0 > htmstart # Stop the HTM + # echo 0 > htmconfigure # Deconfigure the HTM + # cat htmstatus # Dump the status of HTM entries as data + +Above will set the htmtype and core details, followed by executing respective HTM operation. + +Read the HTM trace data +======================== + +After starting the trace collection, run the workload +of interest. Stop the trace collection after required period +of time, and read the trace file. + +.. code-block:: sh + + # cat /sys/kernel/debug/powerpc/htmdump/trace > trace_file + +This trace file will contain the relevant instruction traces +collected during the workload execution. And can be used as +input file for trace decoders to understand data. + +Benefits of using HTM debugfs interface +======================================= + +It is now possible to collect traces for a particular core/chip +from within any partition of the system and decode it. Through +this enablement, a small partition can be dedicated to collect the +trace data and analyze to provide important information for Performance +analysis, Software tuning, or Hardware debug. diff --git a/Documentation/arch/powerpc/kvm-nested.rst b/Documentation/arch/powerpc/kvm-nested.rst index 5defd13cc6c1..574592505604 100644 --- a/Documentation/arch/powerpc/kvm-nested.rst +++ b/Documentation/arch/powerpc/kvm-nested.rst @@ -208,13 +208,9 @@ associated values for each ID in the GSB:: flags: Bit 0: getGuestWideState: Request state of the Guest instead of an individual VCPU. - Bit 1: takeOwnershipOfVcpuState Indicate the L1 is taking - over ownership of the VCPU state and that the L0 can free - the storage holding the state. The VCPU state will need to - be returned to the Hypervisor via H_GUEST_SET_STATE prior - to H_GUEST_RUN_VCPU being called for this VCPU. The data - returned in the dataBuffer is in a Hypervisor internal - format. + Bit 1: getHostWideState: Request stats of the Host. This causes + the guestId and vcpuId parameters to be ignored and attempting + to get the VCPU/Guest state will cause an error. Bits 2-63: Reserved guestId: ID obtained from H_GUEST_CREATE vcpuId: ID of the vCPU pass to H_GUEST_CREATE_VCPU @@ -406,9 +402,10 @@ the partition like the timebase offset and partition scoped page table information. +--------+-------+----+--------+----------------------------------+ -| ID | Size | RW | Thread | Details | -| | Bytes | | Guest | | -| | | | Scope | | +| ID | Size | RW |(H)ost | Details | +| | Bytes | |(G)uest | | +| | | |(T)hread| | +| | | |Scope | | +========+=======+====+========+==================================+ | 0x0000 | | RW | TG | NOP element | +--------+-------+----+--------+----------------------------------+ @@ -434,6 +431,29 @@ table information. | | | | |- 0x8 Table size. | +--------+-------+----+--------+----------------------------------+ | 0x0007-| | | | Reserved | +| 0x07FF | | | | | ++--------+-------+----+--------+----------------------------------+ +| 0x0800 | 0x08 | R | H | Current usage in bytes of the | +| | | | | L0's Guest Management Space | +| | | | | for an L1-Lpar. | ++--------+-------+----+--------+----------------------------------+ +| 0x0801 | 0x08 | R | H | Max bytes available in the | +| | | | | L0's Guest Management Space for | +| | | | | an L1-Lpar | ++--------+-------+----+--------+----------------------------------+ +| 0x0802 | 0x08 | R | H | Current usage in bytes of the | +| | | | | L0's Guest Page Table Management | +| | | | | Space for an L1-Lpar | ++--------+-------+----+--------+----------------------------------+ +| 0x0803 | 0x08 | R | H | Max bytes available in the L0's | +| | | | | Guest Page Table Management | +| | | | | Space for an L1-Lpar | ++--------+-------+----+--------+----------------------------------+ +| 0x0804 | 0x08 | R | H | Cumulative Reclaimed bytes from | +| | | | | L0 Guest's Page Table Management | +| | | | | Space due to overcommit | ++--------+-------+----+--------+----------------------------------+ +| 0x0805-| | | | Reserved | | 0x0BFF | | | | | +--------+-------+----+--------+----------------------------------+ | 0x0C00 | 0x10 | RW | T |Run vCPU Input Buffer: | diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 7a1409ecc238..fee5c4731501 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -366,6 +366,12 @@ Code Seq# Include File Comments <mailto:linuxppc-dev> 0xB2 01-02 arch/powerpc/include/uapi/asm/papr-sysparm.h powerpc/pseries system parameter API <mailto:linuxppc-dev> +0xB2 03-05 arch/powerpc/include/uapi/asm/papr-indices.h powerpc/pseries indices API + <mailto:linuxppc-dev> +0xB2 06-07 arch/powerpc/include/uapi/asm/papr-platform-dump.h powerpc/pseries Platform Dump API + <mailto:linuxppc-dev> +0xB2 08 powerpc/include/uapi/asm/papr-physical-attestation.h powerpc/pseries Physical Attestation API + <mailto:linuxppc-dev> 0xB3 00 linux/mmc/ioctl.h 0xB4 00-0F linux/gpio.h <mailto:linux-gpio@vger.kernel.org> 0xB5 00-0F uapi/linux/rpmsg.h <mailto:linux-remoteproc@vger.kernel.org> diff --git a/MAINTAINERS b/MAINTAINERS index d4a2c7d4d14a..4661dc6e7523 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13665,7 +13665,6 @@ M: Madhavan Srinivasan <maddy@linux.ibm.com> M: Michael Ellerman <mpe@ellerman.id.au> R: Nicholas Piggin <npiggin@gmail.com> R: Christophe Leroy <christophe.leroy@csgroup.eu> -R: Naveen N Rao <naveen@kernel.org> L: linuxppc-dev@lists.ozlabs.org S: Supported W: https://github.com/linuxppc/wiki/wiki diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6722625a406a..c3e0cc83f120 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -277,6 +277,7 @@ config PPC select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_RETHOOK if KPROBES select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE @@ -894,7 +895,7 @@ config DATA_SHIFT int "Data shift" if DATA_SHIFT_BOOL default 24 if STRICT_KERNEL_RWX && PPC64 range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 - range 19 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx + range 14 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_85xx default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32 default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32 @@ -907,10 +908,10 @@ config DATA_SHIFT On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO. Smaller is the alignment, greater is the number of necessary DBATs. - On 8xx, large pages (512kb or 8M) are used to map kernel linear - memory. Aligning to 8M reduces TLB misses as only 8M pages are used - in that case. If PIN_TLB is selected, it must be aligned to 8M as - 8M pages will be pinned. + On 8xx, large pages (16kb or 512kb or 8M) are used to map kernel + linear memory. Aligning to 8M reduces TLB misses as only 8M pages + are used in that case. If PIN_TLB is selected, it must be aligned + to 8M as 8M pages will be pinned. config ARCH_FORCE_MAX_ORDER int "Order of maximal physically contiguous allocations" diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 184d0680e661..a7ab087d412c 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -70,6 +70,7 @@ BOOTCPPFLAGS := -nostdinc $(LINUXINCLUDE) BOOTCPPFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include) BOOTCFLAGS := $(BOOTTARGETFLAGS) \ + -std=gnu11 \ -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ -fno-strict-aliasing -O2 \ -msoft-float -mno-altivec -mno-vsx \ diff --git a/arch/powerpc/boot/rs6000.h b/arch/powerpc/boot/rs6000.h index a9d879155ef9..16df8f3c43f1 100644 --- a/arch/powerpc/boot/rs6000.h +++ b/arch/powerpc/boot/rs6000.h @@ -1,11 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* IBM RS/6000 "XCOFF" file definitions for BFD. Copyright (C) 1990, 1991 Free Software Foundation, Inc. - FIXME: Can someone provide a transliteration of this name into ASCII? - Using the following chars caused a compiler warning on HIUX (so I replaced - them with octal escapes), and isn't useful without an understanding of what - character set it is. - Written by Mimi Ph\373\364ng-Th\345o V\365 of IBM + Written by Mimi Phuong-Thao Vo of IBM and John Gilmore of Cygnus Support. */ /********************** FILE HEADER **********************/ diff --git a/arch/powerpc/include/asm/guest-state-buffer.h b/arch/powerpc/include/asm/guest-state-buffer.h index d107abe1468f..acd61eb36d59 100644 --- a/arch/powerpc/include/asm/guest-state-buffer.h +++ b/arch/powerpc/include/asm/guest-state-buffer.h @@ -28,6 +28,21 @@ /* Process Table Info */ #define KVMPPC_GSID_PROCESS_TABLE 0x0006 +/* Guest Management Heap Size */ +#define KVMPPC_GSID_L0_GUEST_HEAP 0x0800 + +/* Guest Management Heap Max Size */ +#define KVMPPC_GSID_L0_GUEST_HEAP_MAX 0x0801 + +/* Guest Pagetable Size */ +#define KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE 0x0802 + +/* Guest Pagetable Max Size */ +#define KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX 0x0803 + +/* Guest Pagetable Reclaim in bytes */ +#define KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM 0x0804 + /* H_GUEST_RUN_VCPU input buffer Info */ #define KVMPPC_GSID_RUN_INPUT 0x0C00 /* H_GUEST_RUN_VCPU output buffer Info */ @@ -106,6 +121,11 @@ #define KVMPPC_GSE_GUESTWIDE_COUNT \ (KVMPPC_GSE_GUESTWIDE_END - KVMPPC_GSE_GUESTWIDE_START + 1) +#define KVMPPC_GSE_HOSTWIDE_START KVMPPC_GSID_L0_GUEST_HEAP +#define KVMPPC_GSE_HOSTWIDE_END KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM +#define KVMPPC_GSE_HOSTWIDE_COUNT \ + (KVMPPC_GSE_HOSTWIDE_END - KVMPPC_GSE_HOSTWIDE_START + 1) + #define KVMPPC_GSE_META_START KVMPPC_GSID_RUN_INPUT #define KVMPPC_GSE_META_END KVMPPC_GSID_VPA #define KVMPPC_GSE_META_COUNT (KVMPPC_GSE_META_END - KVMPPC_GSE_META_START + 1) @@ -130,7 +150,8 @@ (KVMPPC_GSE_INTR_REGS_END - KVMPPC_GSE_INTR_REGS_START + 1) #define KVMPPC_GSE_IDEN_COUNT \ - (KVMPPC_GSE_GUESTWIDE_COUNT + KVMPPC_GSE_META_COUNT + \ + (KVMPPC_GSE_HOSTWIDE_COUNT + \ + KVMPPC_GSE_GUESTWIDE_COUNT + KVMPPC_GSE_META_COUNT + \ KVMPPC_GSE_DW_REGS_COUNT + KVMPPC_GSE_W_REGS_COUNT + \ KVMPPC_GSE_VSRS_COUNT + KVMPPC_GSE_INTR_REGS_COUNT) @@ -139,10 +160,11 @@ */ enum { KVMPPC_GS_CLASS_GUESTWIDE = 0x01, - KVMPPC_GS_CLASS_META = 0x02, - KVMPPC_GS_CLASS_DWORD_REG = 0x04, - KVMPPC_GS_CLASS_WORD_REG = 0x08, - KVMPPC_GS_CLASS_VECTOR = 0x10, + KVMPPC_GS_CLASS_HOSTWIDE = 0x02, + KVMPPC_GS_CLASS_META = 0x04, + KVMPPC_GS_CLASS_DWORD_REG = 0x08, + KVMPPC_GS_CLASS_WORD_REG = 0x10, + KVMPPC_GS_CLASS_VECTOR = 0x18, KVMPPC_GS_CLASS_INTR = 0x20, }; @@ -164,6 +186,7 @@ enum { */ enum { KVMPPC_GS_FLAGS_WIDE = 0x01, + KVMPPC_GS_FLAGS_HOST_WIDE = 0x02, }; /** @@ -287,7 +310,7 @@ struct kvmppc_gs_msg_ops { * struct kvmppc_gs_msg - a guest state message * @bitmap: the guest state ids that should be included * @ops: modify message behavior for reading and writing to buffers - * @flags: guest wide or thread wide + * @flags: host wide, guest wide or thread wide * @data: location where buffer data will be written to or from. * * A guest state message is allows flexibility in sending in receiving data diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index eeef13db2770..6df6dbbe1e7c 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -490,14 +490,15 @@ #define H_RPTI_PAGE_ALL (-1UL) /* Flags for H_GUEST_{S,G}_STATE */ -#define H_GUEST_FLAGS_WIDE (1UL<<(63-0)) +#define H_GUEST_FLAGS_WIDE (1UL << (63 - 0)) +#define H_GUEST_FLAGS_HOST_WIDE (1UL << (63 - 1)) /* Flag values used for H_{S,G}SET_GUEST_CAPABILITIES */ -#define H_GUEST_CAP_COPY_MEM (1UL<<(63-0)) -#define H_GUEST_CAP_POWER9 (1UL<<(63-1)) -#define H_GUEST_CAP_POWER10 (1UL<<(63-2)) -#define H_GUEST_CAP_POWER11 (1UL<<(63-3)) -#define H_GUEST_CAP_BITMAP2 (1UL<<(63-63)) +#define H_GUEST_CAP_COPY_MEM (1UL << (63 - 0)) +#define H_GUEST_CAP_POWER9 (1UL << (63 - 1)) +#define H_GUEST_CAP_POWER10 (1UL << (63 - 2)) +#define H_GUEST_CAP_POWER11 (1UL << (63 - 3)) +#define H_GUEST_CAP_BITMAP2 (1UL << (63 - 63)) /* * Defines for H_HTM - Macros for hardware trace macro (HTM) function. diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 91be7b885944..f2b6cc4341bb 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -65,6 +65,14 @@ static inline long register_dtl(unsigned long cpu, unsigned long vpa) return vpa_call(H_VPA_REG_DTL, cpu, vpa); } +/* + * Invokes H_HTM hcall with parameters passed from htm_hcall_wrapper. + * flags: Set to hardwareTarget. + * target: Specifies target using node index, nodal chip index and core index. + * operation : action to perform ie configure, start, stop, deconfigure, trace + * based on the HTM type. + * param1, param2, param3: parameters for each action. + */ static inline long htm_call(unsigned long flags, unsigned long target, unsigned long operation, unsigned long param1, unsigned long param2, unsigned long param3) @@ -73,17 +81,17 @@ static inline long htm_call(unsigned long flags, unsigned long target, param1, param2, param3); } -static inline long htm_get_dump_hardware(unsigned long nodeindex, +static inline long htm_hcall_wrapper(unsigned long flags, unsigned long nodeindex, unsigned long nodalchipindex, unsigned long coreindexonchip, - unsigned long type, unsigned long addr, unsigned long size, - unsigned long offset) + unsigned long type, unsigned long htm_op, unsigned long param1, unsigned long param2, + unsigned long param3) { - return htm_call(H_HTM_FLAGS_HARDWARE_TARGET, + return htm_call(H_HTM_FLAGS_HARDWARE_TARGET | flags, H_HTM_TARGET_NODE_INDEX(nodeindex) | H_HTM_TARGET_NODAL_CHIP_INDEX(nodalchipindex) | H_HTM_TARGET_CORE_INDEX_ON_CHIP(coreindexonchip), - H_HTM_OP(H_HTM_OP_DUMP_DATA) | H_HTM_TYPE(type), - addr, size, offset); + H_HTM_OP(htm_op) | H_HTM_TYPE(type), + param1, param2, param3); } extern void vpa_init(int cpu); diff --git a/arch/powerpc/include/asm/preempt.h b/arch/powerpc/include/asm/preempt.h new file mode 100644 index 000000000000..000e2b9681f3 --- /dev/null +++ b/arch/powerpc/include/asm/preempt.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_POWERPC_PREEMPT_H +#define __ASM_POWERPC_PREEMPT_H + +#include <asm-generic/preempt.h> + +#if defined(CONFIG_PREEMPT_DYNAMIC) +#include <linux/jump_label.h> +DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +#define need_irq_preemption() \ + (static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) +#else +#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION)) +#endif + +#endif /* __ASM_POWERPC_PREEMPT_H */ diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 04406162fc5a..75fa0293c508 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -515,6 +515,10 @@ extern char rtas_data_buf[RTAS_DATA_BUF_SIZE]; extern unsigned long rtas_rmo_buf; extern struct mutex rtas_ibm_get_vpd_lock; +extern struct mutex rtas_ibm_get_indices_lock; +extern struct mutex rtas_ibm_set_dynamic_indicator_lock; +extern struct mutex rtas_ibm_get_dynamic_sensor_state_lock; +extern struct mutex rtas_ibm_physical_attestation_lock; #define GLOBAL_INTERRUPT_QUEUE 9005 diff --git a/arch/powerpc/include/uapi/asm/papr-indices.h b/arch/powerpc/include/uapi/asm/papr-indices.h new file mode 100644 index 000000000000..c2999d89d52a --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr-indices.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_PAPR_INDICES_H_ +#define _UAPI_PAPR_INDICES_H_ + +#include <linux/types.h> +#include <asm/ioctl.h> +#include <asm/papr-miscdev.h> + +#define LOC_CODE_SIZE 80 +#define RTAS_GET_INDICES_BUF_SIZE SZ_4K + +struct papr_indices_io_block { + union { + struct { + __u8 is_sensor; /* 0 for indicator and 1 for sensor */ + __u32 indice_type; + } indices; + struct { + __u32 token; /* Sensor or indicator token */ + __u32 state; /* get / set state */ + /* + * PAPR+ 12.3.2.4 Converged Location Code Rules - Length + * Restrictions. 79 characters plus null. + */ + char location_code_str[LOC_CODE_SIZE]; /* location code */ + } dynamic_param; + }; +}; + +/* + * ioctls for /dev/papr-indices. + * PAPR_INDICES_IOC_GET: Returns a get-indices handle fd to read data + * PAPR_DYNAMIC_SENSOR_IOC_GET: Gets the state of the input sensor + * PAPR_DYNAMIC_INDICATOR_IOC_SET: Sets the new state for the input indicator + */ +#define PAPR_INDICES_IOC_GET _IOW(PAPR_MISCDEV_IOC_ID, 3, struct papr_indices_io_block) +#define PAPR_DYNAMIC_SENSOR_IOC_GET _IOWR(PAPR_MISCDEV_IOC_ID, 4, struct papr_indices_io_block) +#define PAPR_DYNAMIC_INDICATOR_IOC_SET _IOW(PAPR_MISCDEV_IOC_ID, 5, struct papr_indices_io_block) + + +#endif /* _UAPI_PAPR_INDICES_H_ */ diff --git a/arch/powerpc/include/uapi/asm/papr-physical-attestation.h b/arch/powerpc/include/uapi/asm/papr-physical-attestation.h new file mode 100644 index 000000000000..ea746837bb9a --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr-physical-attestation.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_PAPR_PHYSICAL_ATTESTATION_H_ +#define _UAPI_PAPR_PHYSICAL_ATTESTATION_H_ + +#include <linux/types.h> +#include <asm/ioctl.h> +#include <asm/papr-miscdev.h> + +#define PAPR_PHYATTEST_MAX_INPUT 4084 /* Max 4K buffer: 4K-12 */ + +/* + * Defined in PAPR 2.13+ 21.6 Attestation Command Structures. + * User space pass this struct and the max size should be 4K. + */ +struct papr_phy_attest_io_block { + __u8 version; + __u8 command; + __u8 TCG_major_ver; + __u8 TCG_minor_ver; + __be32 length; + __be32 correlator; + __u8 payload[PAPR_PHYATTEST_MAX_INPUT]; +}; + +/* + * ioctl for /dev/papr-physical-attestation. Returns a attestation + * command fd handle + */ +#define PAPR_PHY_ATTEST_IOC_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 8, struct papr_phy_attest_io_block) + +#endif /* _UAPI_PAPR_PHYSICAL_ATTESTATION_H_ */ diff --git a/arch/powerpc/include/uapi/asm/papr-platform-dump.h b/arch/powerpc/include/uapi/asm/papr-platform-dump.h new file mode 100644 index 000000000000..8a1c060e89a9 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/papr-platform-dump.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_PAPR_PLATFORM_DUMP_H_ +#define _UAPI_PAPR_PLATFORM_DUMP_H_ + +#include <linux/types.h> +#include <asm/ioctl.h> +#include <asm/papr-miscdev.h> + +/* + * ioctl for /dev/papr-platform-dump. Returns a platform-dump handle fd + * corresponding to dump tag. + */ +#define PAPR_PLATFORM_DUMP_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 6, __u64) +#define PAPR_PLATFORM_DUMP_IOC_INVALIDATE _IOW(PAPR_MISCDEV_IOC_ID, 7, __u64) + +#endif /* _UAPI_PAPR_PLATFORM_DUMP_H_ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 6ac621155ec3..9d1ab3971694 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -160,9 +160,7 @@ endif obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o -ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE)(CONFIG_PPC_BOOK3S),) obj-y += ppc_save_regs.o -endif obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index df16c7f547ab..8ca49e40c473 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -290,10 +290,8 @@ static void __init fadump_show_config(void) if (!fw_dump.fadump_supported) return; - pr_debug("Fadump enabled : %s\n", - (fw_dump.fadump_enabled ? "yes" : "no")); - pr_debug("Dump Active : %s\n", - (fw_dump.dump_active ? "yes" : "no")); + pr_debug("Fadump enabled : %s\n", str_yes_no(fw_dump.fadump_enabled)); + pr_debug("Dump Active : %s\n", str_yes_no(fw_dump.dump_active)); pr_debug("Dump section sizes:\n"); pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size); pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size); diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 8f4acc55407b..e0c681d0b076 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -25,6 +25,10 @@ unsigned long global_dbcr0[NR_CPUS]; #endif +#if defined(CONFIG_PREEMPT_DYNAMIC) +DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +#endif + #ifdef CONFIG_PPC_BOOK3S_64 DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); static inline bool exit_must_hard_disable(void) @@ -396,7 +400,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) /* Returning to a kernel context with local irqs enabled. */ WARN_ON_ONCE(!(regs->msr & MSR_EE)); again: - if (IS_ENABLED(CONFIG_PREEMPTION)) { + if (need_irq_preemption()) { /* Return to preemptible kernel context */ if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) { if (preempt_count() == 0) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 0ebae6e4c19d..244eb4857e7f 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -16,6 +16,7 @@ #include <linux/mm.h> #include <linux/spinlock.h> #include <linux/string.h> +#include <linux/string_choices.h> #include <linux/dma-mapping.h> #include <linux/bitmap.h> #include <linux/iommu-helper.h> @@ -769,8 +770,8 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid, iommu_table_clear(tbl); if (!welcomed) { - printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n", - novmerge ? "disabled" : "enabled"); + pr_info("IOMMU table initialized, virtual merging %s\n", + str_disabled_enabled(novmerge)); welcomed = 1; } diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index 3816a2bf2b84..d083b4517065 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -9,6 +9,7 @@ #include <linux/proc_fs.h> #include <linux/kernel.h> #include <linux/of.h> +#include <linux/string.h> #include <asm/machdep.h> #include <asm/vdso_datapage.h> @@ -56,7 +57,7 @@ static int __init proc_ppc64_init(void) { struct proc_dir_entry *pde; - strcpy((char *)systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); + strscpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); systemcfg->version.major = SYSTEMCFG_MAJOR; systemcfg->version.minor = SYSTEMCFG_MINOR; systemcfg->processor = mfspr(SPRN_PVR); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index ef91f71e07c4..855e09886503 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1000,7 +1000,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk) WARN_ON(tm_suspend_disabled); - TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, " + TM_DEBUG("---- tm_reclaim on pid %d (NIP=%lx, " "ccr=%lx, msr=%lx, trap=%lx)\n", tsk->pid, thr->regs->nip, thr->regs->ccr, thr->regs->msr, @@ -1008,7 +1008,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk) tm_reclaim_thread(thr, TM_CAUSE_RESCHED); - TM_DEBUG("--- tm_reclaim on pid %d complete\n", + TM_DEBUG("---- tm_reclaim on pid %d complete\n", tsk->pid); out_and_saveregs: @@ -2367,14 +2367,14 @@ void __no_sanitize_address show_stack(struct task_struct *tsk, (sp + STACK_INT_FRAME_REGS); lr = regs->link; - printk("%s--- interrupt: %lx at %pS\n", + printk("%s---- interrupt: %lx at %pS\n", loglvl, regs->trap, (void *)regs->nip); // Detect the case of an empty pt_regs at the very base // of the stack and suppress showing it in full. if (!empty_user_regs(regs, tsk)) { __show_regs(regs); - printk("%s--- interrupt: %lx\n", loglvl, regs->trap); + printk("%s---- interrupt: %lx\n", loglvl, regs->trap); } firstframe = 1; diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index d7a738f1858d..e61245c4468e 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -92,12 +92,12 @@ struct rtas_function { * Per-function locks for sequence-based RTAS functions. */ static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock); -static DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock); -static DEFINE_MUTEX(rtas_ibm_get_indices_lock); static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock); -static DEFINE_MUTEX(rtas_ibm_physical_attestation_lock); -static DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock); +DEFINE_MUTEX(rtas_ibm_physical_attestation_lock); DEFINE_MUTEX(rtas_ibm_get_vpd_lock); +DEFINE_MUTEX(rtas_ibm_get_indices_lock); +DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock); +DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock); static struct rtas_function rtas_function_table[] __ro_after_init = { [RTAS_FNIDX__CHECK_EXCEPTION] = { diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 2c1b24100eca..3565c67fc638 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -212,10 +212,10 @@ bne- 1f mr r3, r15 +1: mtlr r3 .if \allregs == 0 REST_GPR(15, r1) .endif -1: mtlr r3 #endif /* Restore gprs */ diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index 9ac3266e4965..a325c1c02f96 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -359,7 +359,10 @@ void default_machine_crash_shutdown(struct pt_regs *regs) if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) is_via_system_reset = 1; - crash_smp_send_stop(); + if (IS_ENABLED(CONFIG_SMP)) + crash_smp_send_stop(); + else + crash_kexec_prepare(); crash_save_cpu(regs, crashing_cpu); diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index dbfdc126bf14..2f2702c867f7 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -83,6 +83,7 @@ config KVM_BOOK3S_64_HV depends on KVM_BOOK3S_64 && PPC_POWERNV select KVM_BOOK3S_HV_POSSIBLE select KVM_GENERIC_MMU_NOTIFIER + select KVM_BOOK3S_HV_PMU select CMA help Support running unmodified book3s_64 guest kernels in @@ -171,6 +172,18 @@ config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND those buggy L1s which saves the L2 state, at the cost of performance in all nested-capable guest entry/exit. +config KVM_BOOK3S_HV_PMU + tristate "Hypervisor Perf events for KVM Book3s-HV" + depends on KVM_BOOK3S_64_HV + help + Enable Book3s-HV Hypervisor Perf events PMU named 'kvm-hv'. These + Perf events give an overview of hypervisor performance overall + instead of a specific guests. Currently the PMU reports + L0-Hypervisor stats on a kvm-hv enabled PSeries LPAR like: + * Total/Used Guest-Heap + * Total/Used Guest Page-table Memory + * Total amount of Guest Page-table Memory reclaimed + config KVM_BOOKE_HV bool diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 19f4d298dd17..7667563fb9ff 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -6541,10 +6541,6 @@ static struct kvmppc_ops kvm_ops_hv = { .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv, .arch_vm_ioctl = kvm_arch_vm_ioctl_hv, .hcall_implemented = kvmppc_hcall_impl_hv, -#ifdef CONFIG_KVM_XICS - .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv, - .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv, -#endif .configure_mmu = kvmhv_configure_mmu, .get_rmmu_info = kvmhv_get_rmmu_info, .set_smt_mode = kvmhv_set_smt_mode, @@ -6662,6 +6658,22 @@ static int kvmppc_book3s_init_hv(void) return r; } +#if defined(CONFIG_KVM_XICS) + /* + * IRQ bypass is supported only for interrupts whose EOI operations are + * handled via OPAL calls. Therefore, register IRQ bypass handlers + * exclusively for PowerNV KVM when booted with 'xive=off', indicating + * the use of the emulated XICS interrupt controller. + */ + if (!kvmhv_on_pseries()) { + pr_info("KVM-HV: Enabling IRQ bypass\n"); + kvm_ops_hv.irq_bypass_add_producer = + kvmppc_irq_bypass_add_producer_hv; + kvm_ops_hv.irq_bypass_del_producer = + kvmppc_irq_bypass_del_producer_hv; + } +#endif + kvm_ops_hv.owner = THIS_MODULE; kvmppc_hv_ops = &kvm_ops_hv; diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c index e5c7ce1fb761..87691cf86cae 100644 --- a/arch/powerpc/kvm/book3s_hv_nestedv2.c +++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c @@ -123,6 +123,12 @@ static size_t gs_msg_ops_vcpu_get_size(struct kvmppc_gs_msg *gsm) case KVMPPC_GSID_PROCESS_TABLE: case KVMPPC_GSID_RUN_INPUT: case KVMPPC_GSID_RUN_OUTPUT: + /* Host wide counters */ + case KVMPPC_GSID_L0_GUEST_HEAP: + case KVMPPC_GSID_L0_GUEST_HEAP_MAX: + case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE: + case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX: + case KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM: break; default: size += kvmppc_gse_total_size(kvmppc_gsid_size(iden)); diff --git a/arch/powerpc/kvm/guest-state-buffer.c b/arch/powerpc/kvm/guest-state-buffer.c index b80dbc58621f..871cf60ddeb6 100644 --- a/arch/powerpc/kvm/guest-state-buffer.c +++ b/arch/powerpc/kvm/guest-state-buffer.c @@ -92,6 +92,10 @@ static int kvmppc_gsid_class(u16 iden) (iden <= KVMPPC_GSE_GUESTWIDE_END)) return KVMPPC_GS_CLASS_GUESTWIDE; + if ((iden >= KVMPPC_GSE_HOSTWIDE_START) && + (iden <= KVMPPC_GSE_HOSTWIDE_END)) + return KVMPPC_GS_CLASS_HOSTWIDE; + if ((iden >= KVMPPC_GSE_META_START) && (iden <= KVMPPC_GSE_META_END)) return KVMPPC_GS_CLASS_META; @@ -118,6 +122,21 @@ static int kvmppc_gsid_type(u16 iden) int type = -1; switch (kvmppc_gsid_class(iden)) { + case KVMPPC_GS_CLASS_HOSTWIDE: + switch (iden) { + case KVMPPC_GSID_L0_GUEST_HEAP: + fallthrough; + case KVMPPC_GSID_L0_GUEST_HEAP_MAX: + fallthrough; + case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE: + fallthrough; + case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX: + fallthrough; + case KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM: + type = KVMPPC_GSE_BE64; + break; + } + break; case KVMPPC_GS_CLASS_GUESTWIDE: switch (iden) { case KVMPPC_GSID_HOST_STATE_SIZE: @@ -187,6 +206,9 @@ unsigned long kvmppc_gsid_flags(u16 iden) case KVMPPC_GS_CLASS_GUESTWIDE: flags = KVMPPC_GS_FLAGS_WIDE; break; + case KVMPPC_GS_CLASS_HOSTWIDE: + flags = KVMPPC_GS_FLAGS_HOST_WIDE; + break; case KVMPPC_GS_CLASS_META: case KVMPPC_GS_CLASS_DWORD_REG: case KVMPPC_GS_CLASS_WORD_REG: @@ -310,6 +332,13 @@ static inline int kvmppc_gse_flatten_iden(u16 iden) bit += KVMPPC_GSE_GUESTWIDE_COUNT; + if (class == KVMPPC_GS_CLASS_HOSTWIDE) { + bit += iden - KVMPPC_GSE_HOSTWIDE_START; + return bit; + } + + bit += KVMPPC_GSE_HOSTWIDE_COUNT; + if (class == KVMPPC_GS_CLASS_META) { bit += iden - KVMPPC_GSE_META_START; return bit; @@ -356,6 +385,12 @@ static inline u16 kvmppc_gse_unflatten_iden(int bit) } bit -= KVMPPC_GSE_GUESTWIDE_COUNT; + if (bit < KVMPPC_GSE_HOSTWIDE_COUNT) { + iden = KVMPPC_GSE_HOSTWIDE_START + bit; + return iden; + } + bit -= KVMPPC_GSE_HOSTWIDE_COUNT; + if (bit < KVMPPC_GSE_META_COUNT) { iden = KVMPPC_GSE_META_START + bit; return iden; @@ -588,6 +623,8 @@ int kvmppc_gsb_send(struct kvmppc_gs_buff *gsb, unsigned long flags) if (flags & KVMPPC_GS_FLAGS_WIDE) hflags |= H_GUEST_FLAGS_WIDE; + if (flags & KVMPPC_GS_FLAGS_HOST_WIDE) + hflags |= H_GUEST_FLAGS_HOST_WIDE; rc = plpar_guest_set_state(hflags, gsb->guest_id, gsb->vcpu_id, __pa(gsb->hdr), gsb->capacity, &i); @@ -613,6 +650,8 @@ int kvmppc_gsb_recv(struct kvmppc_gs_buff *gsb, unsigned long flags) if (flags & KVMPPC_GS_FLAGS_WIDE) hflags |= H_GUEST_FLAGS_WIDE; + if (flags & KVMPPC_GS_FLAGS_HOST_WIDE) + hflags |= H_GUEST_FLAGS_HOST_WIDE; rc = plpar_guest_get_state(hflags, gsb->guest_id, gsb->vcpu_id, __pa(gsb->hdr), gsb->capacity, &i); diff --git a/arch/powerpc/kvm/test-guest-state-buffer.c b/arch/powerpc/kvm/test-guest-state-buffer.c index bfd225329a18..5ccca306997a 100644 --- a/arch/powerpc/kvm/test-guest-state-buffer.c +++ b/arch/powerpc/kvm/test-guest-state-buffer.c @@ -5,6 +5,7 @@ #include <kunit/test.h> #include <asm/guest-state-buffer.h> +#include <asm/kvm_ppc.h> static void test_creating_buffer(struct kunit *test) { @@ -141,6 +142,16 @@ static void test_gs_bitmap(struct kunit *test) i++; } + for (u16 iden = KVMPPC_GSID_L0_GUEST_HEAP; + iden <= KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM; iden++) { + kvmppc_gsbm_set(&gsbm, iden); + kvmppc_gsbm_set(&gsbm1, iden); + KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden)); + kvmppc_gsbm_clear(&gsbm, iden); + KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden)); + i++; + } + for (u16 iden = KVMPPC_GSID_RUN_INPUT; iden <= KVMPPC_GSID_VPA; iden++) { kvmppc_gsbm_set(&gsbm, iden); @@ -309,12 +320,215 @@ static void test_gs_msg(struct kunit *test) kvmppc_gsm_free(gsm); } +/* Test data struct for hostwide/L0 counters */ +struct kvmppc_gs_msg_test_hostwide_data { + u64 guest_heap; + u64 guest_heap_max; + u64 guest_pgtable_size; + u64 guest_pgtable_size_max; + u64 guest_pgtable_reclaim; +}; + +static size_t test_hostwide_get_size(struct kvmppc_gs_msg *gsm) + +{ + size_t size = 0; + u16 ids[] = { + KVMPPC_GSID_L0_GUEST_HEAP, + KVMPPC_GSID_L0_GUEST_HEAP_MAX, + KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE, + KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX, + KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM + }; + + for (int i = 0; i < ARRAY_SIZE(ids); i++) + size += kvmppc_gse_total_size(kvmppc_gsid_size(ids[i])); + return size; +} + +static int test_hostwide_fill_info(struct kvmppc_gs_buff *gsb, + struct kvmppc_gs_msg *gsm) +{ + struct kvmppc_gs_msg_test_hostwide_data *data = gsm->data; + + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP)) + kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_HEAP, + data->guest_heap); + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX)) + kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_HEAP_MAX, + data->guest_heap_max); + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE)) + kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE, + data->guest_pgtable_size); + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX)) + kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX, + data->guest_pgtable_size_max); + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM)) + kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM, + data->guest_pgtable_reclaim); + + return 0; +} + +static int test_hostwide_refresh_info(struct kvmppc_gs_msg *gsm, + struct kvmppc_gs_buff *gsb) +{ + struct kvmppc_gs_parser gsp = { 0 }; + struct kvmppc_gs_msg_test_hostwide_data *data = gsm->data; + struct kvmppc_gs_elem *gse; + int rc; + + rc = kvmppc_gse_parse(&gsp, gsb); + if (rc < 0) + return rc; + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP); + if (gse) + data->guest_heap = kvmppc_gse_get_u64(gse); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX); + if (gse) + data->guest_heap_max = kvmppc_gse_get_u64(gse); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE); + if (gse) + data->guest_pgtable_size = kvmppc_gse_get_u64(gse); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX); + if (gse) + data->guest_pgtable_size_max = kvmppc_gse_get_u64(gse); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM); + if (gse) + data->guest_pgtable_reclaim = kvmppc_gse_get_u64(gse); + + return 0; +} + +static struct kvmppc_gs_msg_ops gs_msg_test_hostwide_ops = { + .get_size = test_hostwide_get_size, + .fill_info = test_hostwide_fill_info, + .refresh_info = test_hostwide_refresh_info, +}; + +static void test_gs_hostwide_msg(struct kunit *test) +{ + struct kvmppc_gs_msg_test_hostwide_data test_data = { + .guest_heap = 0xdeadbeef, + .guest_heap_max = ~0ULL, + .guest_pgtable_size = 0xff, + .guest_pgtable_size_max = 0xffffff, + .guest_pgtable_reclaim = 0xdeadbeef, + }; + struct kvmppc_gs_msg *gsm; + struct kvmppc_gs_buff *gsb; + + gsm = kvmppc_gsm_new(&gs_msg_test_hostwide_ops, &test_data, GSM_SEND, + GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsm); + + gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), 0, 0, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb); + + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP); + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX); + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE); + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX); + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM); + + kvmppc_gsm_fill_info(gsm, gsb); + + memset(&test_data, 0, sizeof(test_data)); + + kvmppc_gsm_refresh_info(gsm, gsb); + KUNIT_EXPECT_EQ(test, test_data.guest_heap, 0xdeadbeef); + KUNIT_EXPECT_EQ(test, test_data.guest_heap_max, ~0ULL); + KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_size, 0xff); + KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_size_max, 0xffffff); + KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_reclaim, 0xdeadbeef); + + kvmppc_gsm_free(gsm); +} + +/* Test if the H_GUEST_GET_STATE for hostwide counters works */ +static void test_gs_hostwide_counters(struct kunit *test) +{ + struct kvmppc_gs_msg_test_hostwide_data test_data; + struct kvmppc_gs_parser gsp = { 0 }; + + struct kvmppc_gs_msg *gsm; + struct kvmppc_gs_buff *gsb; + struct kvmppc_gs_elem *gse; + int rc; + + if (!kvmhv_on_pseries()) + kunit_skip(test, "This test need a kmv-hv guest"); + + gsm = kvmppc_gsm_new(&gs_msg_test_hostwide_ops, &test_data, GSM_SEND, + GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsm); + + gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), 0, 0, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb); + + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP); + + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX); + + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE); + + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX); + + kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM); + + kvmppc_gsm_fill_info(gsm, gsb); + + /* With HOST_WIDE flags guestid and vcpuid will be ignored */ + rc = kvmppc_gsb_recv(gsb, KVMPPC_GS_FLAGS_HOST_WIDE); + KUNIT_ASSERT_EQ(test, rc, 0); + + /* Parse the guest state buffer is successful */ + rc = kvmppc_gse_parse(&gsp, gsb); + KUNIT_ASSERT_EQ(test, rc, 0); + + /* Parse the GSB and get the counters */ + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP); + KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 Heap counter missing"); + kunit_info(test, "Guest Heap Size=%llu bytes", + kvmppc_gse_get_u64(gse)); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX); + KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 Heap counter max missing"); + kunit_info(test, "Guest Heap Size Max=%llu bytes", + kvmppc_gse_get_u64(gse)); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE); + KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table size missing"); + kunit_info(test, "Guest Page-table Size=%llu bytes", + kvmppc_gse_get_u64(gse)); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX); + KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table size-max missing"); + kunit_info(test, "Guest Page-table Size Max=%llu bytes", + kvmppc_gse_get_u64(gse)); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM); + KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table reclaim size missing"); + kunit_info(test, "Guest Page-table Reclaim Size=%llu bytes", + kvmppc_gse_get_u64(gse)); + + kvmppc_gsm_free(gsm); + kvmppc_gsb_free(gsb); +} + static struct kunit_case guest_state_buffer_testcases[] = { KUNIT_CASE(test_creating_buffer), KUNIT_CASE(test_adding_element), KUNIT_CASE(test_gs_bitmap), KUNIT_CASE(test_gs_parsing), KUNIT_CASE(test_gs_msg), + KUNIT_CASE(test_gs_hostwide_msg), + KUNIT_CASE(test_gs_hostwide_counters), {} }; diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h index 45817ab82bb4..14b0e23f601f 100644 --- a/arch/powerpc/kvm/timing.h +++ b/arch/powerpc/kvm/timing.h @@ -38,11 +38,7 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {} static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type) { /* type has to be known at build time for optimization */ - - /* The BUILD_BUG_ON below breaks in funny ways, commented out - * for now ... -BenH BUILD_BUG_ON(!__builtin_constant_p(type)); - */ switch (type) { case EXT_INTR_EXITS: vcpu->stat.ext_intr_exits++; diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c index 58ed6bd613a6..54340912398f 100644 --- a/arch/powerpc/lib/vmx-helper.c +++ b/arch/powerpc/lib/vmx-helper.c @@ -45,7 +45,7 @@ int exit_vmx_usercopy(void) * set and we are preemptible. The hack here is to schedule a * decrementer to fire here and reschedule for us if necessary. */ - if (IS_ENABLED(CONFIG_PREEMPTION) && need_resched()) + if (need_irq_preemption() && need_resched()) set_dec(1); return 0; } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index c156fe0d53c3..806c74e0d5ab 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -17,6 +17,7 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/string.h> +#include <linux/string_choices.h> #include <linux/types.h> #include <linux/pagemap.h> #include <linux/ptrace.h> @@ -218,7 +219,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, // Read/write fault blocked by KUAP is bad, it can never succeed. if (bad_kuap_fault(regs, address, is_write)) { pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n", - is_write ? "write" : "read", address, + str_write_read(is_write), address, from_kuid(&init_user_ns, current_uid())); // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad @@ -625,7 +626,7 @@ static void __bad_page_fault(struct pt_regs *regs, int sig) case INTERRUPT_DATA_STORAGE: case INTERRUPT_H_DATA_STORAGE: pr_alert("BUG: %s on %s at 0x%08lx\n", msg, - is_write ? "write" : "read", regs->dar); + str_write_read(is_write), regs->dar); break; case INTERRUPT_DATA_SEGMENT: pr_alert("BUG: %s at 0x%08lx\n", msg, regs->dar); diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 8b54f12d1889..ab1505cf42bf 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -54,20 +54,13 @@ static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa, { pmd_t *pmdp = pmd_off_k(va); pte_t *ptep; - - if (WARN_ON(psize != MMU_PAGE_512K && psize != MMU_PAGE_8M)) - return -EINVAL; + unsigned int shift = mmu_psize_to_shift(psize); if (new) { if (WARN_ON(slab_is_available())) return -EINVAL; - if (psize == MMU_PAGE_512K) { - ptep = early_pte_alloc_kernel(pmdp, va); - /* The PTE should never be already present */ - if (WARN_ON(pte_present(*ptep) && pgprot_val(prot))) - return -EINVAL; - } else { + if (psize == MMU_PAGE_8M) { if (WARN_ON(!pmd_none(*pmdp) || !pmd_none(*(pmdp + 1)))) return -EINVAL; @@ -78,20 +71,25 @@ static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa, pmd_populate_kernel(&init_mm, pmdp + 1, ptep); ptep = (pte_t *)pmdp; + } else { + ptep = early_pte_alloc_kernel(pmdp, va); + /* The PTE should never be already present */ + if (WARN_ON(pte_present(*ptep) && pgprot_val(prot))) + return -EINVAL; } } else { - if (psize == MMU_PAGE_512K) - ptep = pte_offset_kernel(pmdp, va); - else + if (psize == MMU_PAGE_8M) ptep = (pte_t *)pmdp; + else + ptep = pte_offset_kernel(pmdp, va); } if (WARN_ON(!ptep)) return -ENOMEM; set_huge_pte_at(&init_mm, va, ptep, - pte_mkhuge(pfn_pte(pa >> PAGE_SHIFT, prot)), - 1UL << mmu_psize_to_shift(psize)); + arch_make_huge_pte(pfn_pte(pa >> PAGE_SHIFT, prot), shift, 0), + 1UL << shift); return 0; } @@ -123,14 +121,18 @@ static int mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, unsigned long p = offset; int err = 0; - WARN_ON(!IS_ALIGNED(offset, SZ_512K) || !IS_ALIGNED(top, SZ_512K)); + WARN_ON(!IS_ALIGNED(offset, SZ_16K) || !IS_ALIGNED(top, SZ_16K)); + for (; p < ALIGN(p, SZ_512K) && p < top && !err; p += SZ_16K, v += SZ_16K) + err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_16K, new); for (; p < ALIGN(p, SZ_8M) && p < top && !err; p += SZ_512K, v += SZ_512K) err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); for (; p < ALIGN_DOWN(top, SZ_8M) && p < top && !err; p += SZ_8M, v += SZ_8M) err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new); for (; p < ALIGN_DOWN(top, SZ_512K) && p < top && !err; p += SZ_512K, v += SZ_512K) err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); + for (; p < ALIGN_DOWN(top, SZ_16K) && p < top && !err; p += SZ_16K, v += SZ_16K) + err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_16K, new); if (!new) flush_tlb_kernel_range(PAGE_OFFSET + v, PAGE_OFFSET + top); diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 6beacaec63d3..4c26912c2e3c 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -51,8 +51,16 @@ EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \ } while (0) -/* Sign-extended 32-bit immediate load */ +/* + * Sign-extended 32-bit immediate load + * + * If this is a dummy pass (!image), account for + * maximum possible instructions. + */ #define PPC_LI32(d, i) do { \ + if (!image) \ + ctx->idx += 2; \ + else { \ if ((int)(uintptr_t)(i) >= -32768 && \ (int)(uintptr_t)(i) < 32768) \ EMIT(PPC_RAW_LI(d, i)); \ @@ -60,10 +68,15 @@ EMIT(PPC_RAW_LIS(d, IMM_H(i))); \ if (IMM_L(i)) \ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \ - } } while(0) + } \ + } } while (0) #ifdef CONFIG_PPC64 +/* If dummy pass (!image), account for maximum possible instructions */ #define PPC_LI64(d, i) do { \ + if (!image) \ + ctx->idx += 5; \ + else { \ if ((long)(i) >= -2147483648 && \ (long)(i) < 2147483648) \ PPC_LI32(d, i); \ @@ -84,7 +97,8 @@ if ((uintptr_t)(i) & 0x000000000000ffffULL) \ EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ 0xffff)); \ - } } while (0) + } \ + } } while (0) #define PPC_LI_ADDR PPC_LI64 #ifndef CONFIG_PPC_KERNEL_PCREL diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 2991bb171a9b..c0684733e9d6 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -504,10 +504,11 @@ static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ct EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off)); if (!p->jited) PPC_LI_ADDR(_R4, (unsigned long)p->insnsi); - if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned long)p->bpf_func, - BRANCH_SET_LINK)) { - if (image) - image[ctx->idx] = ppc_inst_val(branch_insn); + /* Account for max possible instructions during dummy pass for size calculation */ + if (image && !create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], + (unsigned long)p->bpf_func, + BRANCH_SET_LINK)) { + image[ctx->idx] = ppc_inst_val(branch_insn); ctx->idx++; } else { EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func))); @@ -889,7 +890,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off); /* Reserve space to patch branch instruction to skip fexit progs */ - im->ip_after_call = &((u32 *)ro_image)[ctx->idx]; + if (ro_image) /* image is NULL for dummy pass */ + im->ip_after_call = &((u32 *)ro_image)[ctx->idx]; EMIT(PPC_RAW_NOP()); } @@ -912,7 +914,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im } if (flags & BPF_TRAMP_F_CALL_ORIG) { - im->ip_epilogue = &((u32 *)ro_image)[ctx->idx]; + if (ro_image) /* image is NULL for dummy pass */ + im->ip_epilogue = &((u32 *)ro_image)[ctx->idx]; PPC_LI_ADDR(_R3, im); ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, (unsigned long)__bpf_tramp_exit); @@ -973,25 +976,9 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) { struct bpf_tramp_image im; - void *image; int ret; - /* - * Allocate a temporary buffer for __arch_prepare_bpf_trampoline(). - * This will NOT cause fragmentation in direct map, as we do not - * call set_memory_*() on this buffer. - * - * We cannot use kvmalloc here, because we need image to be in - * module memory range. - */ - image = bpf_jit_alloc_exec(PAGE_SIZE); - if (!image) - return -ENOMEM; - - ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image, - m, flags, tlinks, func_addr); - bpf_jit_free_exec(image); - + ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tlinks, func_addr); return ret; } diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index c4db278dae36..0aace304dfe1 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -313,7 +313,6 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code u64 func_addr; u32 true_cond; u32 tmp_idx; - int j; if (i && (BPF_CLASS(code) == BPF_ALU64 || BPF_CLASS(code) == BPF_ALU) && (BPF_CLASS(prevcode) == BPF_ALU64 || BPF_CLASS(prevcode) == BPF_ALU) && @@ -1099,13 +1098,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code * 16 byte instruction that uses two 'struct bpf_insn' */ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ - tmp_idx = ctx->idx; PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); PPC_LI32(dst_reg, (u32)insn[i].imm); - /* padding to allow full 4 instructions for later patching */ - if (!image) - for (j = ctx->idx - tmp_idx; j < 4; j++) - EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; break; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 233703b06d7c..5daa77aee7f7 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -227,7 +227,14 @@ int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context * #ifdef CONFIG_PPC_KERNEL_PCREL reladdr = func_addr - local_paca->kernelbase; - if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) { + /* + * If fimage is NULL (the initial pass to find image size), + * account for the maximum no. of instructions possible. + */ + if (!fimage) { + ctx->idx += 7; + return 0; + } else if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) { EMIT(PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase))); /* Align for subsequent prefix instruction */ if (!IS_ALIGNED((unsigned long)fimage + CTX_NIA(ctx), 8)) @@ -412,7 +419,6 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code u64 imm64; u32 true_cond; u32 tmp_idx; - int j; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -1046,12 +1052,7 @@ emit_clear: case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ imm64 = ((u64)(u32) insn[i].imm) | (((u64)(u32) insn[i+1].imm) << 32); - tmp_idx = ctx->idx; PPC_LI64(dst_reg, imm64); - /* padding to allow full 5 instructions for later patching */ - if (!image) - for (j = ctx->idx - tmp_idx; j < 5; j++) - EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; break; diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index ac2cf58d62db..7f53fcb7495a 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -18,6 +18,8 @@ obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o obj-$(CONFIG_VPA_PMU) += vpa-pmu.o +obj-$(CONFIG_KVM_BOOK3S_HV_PMU) += kvm-hv-pmu.o + obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/perf/kvm-hv-pmu.c b/arch/powerpc/perf/kvm-hv-pmu.c new file mode 100644 index 000000000000..ae264c9080ef --- /dev/null +++ b/arch/powerpc/perf/kvm-hv-pmu.c @@ -0,0 +1,435 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Description: PMUs specific to running nested KVM-HV guests + * on Book3S processors (specifically POWER9 and later). + */ + +#define pr_fmt(fmt) "kvmppc-pmu: " fmt + +#include "asm-generic/local64.h" +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/ratelimit.h> +#include <linux/kvm_host.h> +#include <linux/gfp_types.h> +#include <linux/pgtable.h> +#include <linux/perf_event.h> +#include <linux/spinlock_types.h> +#include <linux/spinlock.h> + +#include <asm/types.h> +#include <asm/kvm_ppc.h> +#include <asm/kvm_book3s.h> +#include <asm/mmu.h> +#include <asm/pgalloc.h> +#include <asm/pte-walk.h> +#include <asm/reg.h> +#include <asm/plpar_wrappers.h> +#include <asm/firmware.h> + +#include "asm/guest-state-buffer.h" + +enum kvmppc_pmu_eventid { + KVMPPC_EVENT_HOST_HEAP, + KVMPPC_EVENT_HOST_HEAP_MAX, + KVMPPC_EVENT_HOST_PGTABLE, + KVMPPC_EVENT_HOST_PGTABLE_MAX, + KVMPPC_EVENT_HOST_PGTABLE_RECLAIM, + KVMPPC_EVENT_MAX, +}; + +#define KVMPPC_PMU_EVENT_ATTR(_name, _id) \ + PMU_EVENT_ATTR_ID(_name, kvmppc_events_sysfs_show, _id) + +static ssize_t kvmppc_events_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + return sprintf(page, "event=0x%02llx\n", pmu_attr->id); +} + +/* Holds the hostwide stats */ +static struct kvmppc_hostwide_stats { + u64 guest_heap; + u64 guest_heap_max; + u64 guest_pgtable_size; + u64 guest_pgtable_size_max; + u64 guest_pgtable_reclaim; +} l0_stats; + +/* Protect access to l0_stats */ +static DEFINE_SPINLOCK(lock_l0_stats); + +/* GSB related structs needed to talk to L0 */ +static struct kvmppc_gs_msg *gsm_l0_stats; +static struct kvmppc_gs_buff *gsb_l0_stats; +static struct kvmppc_gs_parser gsp_l0_stats; + +static struct attribute *kvmppc_pmu_events_attr[] = { + KVMPPC_PMU_EVENT_ATTR(host_heap, KVMPPC_EVENT_HOST_HEAP), + KVMPPC_PMU_EVENT_ATTR(host_heap_max, KVMPPC_EVENT_HOST_HEAP_MAX), + KVMPPC_PMU_EVENT_ATTR(host_pagetable, KVMPPC_EVENT_HOST_PGTABLE), + KVMPPC_PMU_EVENT_ATTR(host_pagetable_max, KVMPPC_EVENT_HOST_PGTABLE_MAX), + KVMPPC_PMU_EVENT_ATTR(host_pagetable_reclaim, KVMPPC_EVENT_HOST_PGTABLE_RECLAIM), + NULL, +}; + +static const struct attribute_group kvmppc_pmu_events_group = { + .name = "events", + .attrs = kvmppc_pmu_events_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-5"); +static struct attribute *kvmppc_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group kvmppc_pmu_format_group = { + .name = "format", + .attrs = kvmppc_pmu_format_attr, +}; + +static const struct attribute_group *kvmppc_pmu_attr_groups[] = { + &kvmppc_pmu_events_group, + &kvmppc_pmu_format_group, + NULL, +}; + +/* + * Issue the hcall to get the L0-host stats. + * Should be called with l0-stat lock held + */ +static int kvmppc_update_l0_stats(void) +{ + int rc; + + /* With HOST_WIDE flags guestid and vcpuid will be ignored */ + rc = kvmppc_gsb_recv(gsb_l0_stats, KVMPPC_GS_FLAGS_HOST_WIDE); + if (rc) + goto out; + + /* Parse the guest state buffer is successful */ + rc = kvmppc_gse_parse(&gsp_l0_stats, gsb_l0_stats); + if (rc) + goto out; + + /* Update the l0 returned stats*/ + memset(&l0_stats, 0, sizeof(l0_stats)); + rc = kvmppc_gsm_refresh_info(gsm_l0_stats, gsb_l0_stats); + +out: + return rc; +} + +/* Update the value of the given perf_event */ +static int kvmppc_pmu_event_update(struct perf_event *event) +{ + int rc; + u64 curr_val, prev_val; + unsigned long flags; + unsigned int config = event->attr.config; + + /* Ensure no one else is modifying the l0_stats */ + spin_lock_irqsave(&lock_l0_stats, flags); + + rc = kvmppc_update_l0_stats(); + if (!rc) { + switch (config) { + case KVMPPC_EVENT_HOST_HEAP: + curr_val = l0_stats.guest_heap; + break; + case KVMPPC_EVENT_HOST_HEAP_MAX: + curr_val = l0_stats.guest_heap_max; + break; + case KVMPPC_EVENT_HOST_PGTABLE: + curr_val = l0_stats.guest_pgtable_size; + break; + case KVMPPC_EVENT_HOST_PGTABLE_MAX: + curr_val = l0_stats.guest_pgtable_size_max; + break; + case KVMPPC_EVENT_HOST_PGTABLE_RECLAIM: + curr_val = l0_stats.guest_pgtable_reclaim; + break; + default: + rc = -ENOENT; + break; + } + } + + spin_unlock_irqrestore(&lock_l0_stats, flags); + + /* If no error than update the perf event */ + if (!rc) { + prev_val = local64_xchg(&event->hw.prev_count, curr_val); + if (curr_val > prev_val) + local64_add(curr_val - prev_val, &event->count); + } + + return rc; +} + +static int kvmppc_pmu_event_init(struct perf_event *event) +{ + unsigned int config = event->attr.config; + + pr_debug("%s: Event(%p) id=%llu cpu=%x on_cpu=%x config=%u", + __func__, event, event->id, event->cpu, + event->oncpu, config); + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (config >= KVMPPC_EVENT_MAX) + return -EINVAL; + + local64_set(&event->hw.prev_count, 0); + local64_set(&event->count, 0); + + return 0; +} + +static void kvmppc_pmu_del(struct perf_event *event, int flags) +{ + kvmppc_pmu_event_update(event); +} + +static int kvmppc_pmu_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + return kvmppc_pmu_event_update(event); + return 0; +} + +static void kvmppc_pmu_read(struct perf_event *event) +{ + kvmppc_pmu_event_update(event); +} + +/* Return the size of the needed guest state buffer */ +static size_t hostwide_get_size(struct kvmppc_gs_msg *gsm) + +{ + size_t size = 0; + const u16 ids[] = { + KVMPPC_GSID_L0_GUEST_HEAP, + KVMPPC_GSID_L0_GUEST_HEAP_MAX, + KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE, + KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX, + KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM + }; + + for (int i = 0; i < ARRAY_SIZE(ids); i++) + size += kvmppc_gse_total_size(kvmppc_gsid_size(ids[i])); + return size; +} + +/* Populate the request guest state buffer */ +static int hostwide_fill_info(struct kvmppc_gs_buff *gsb, + struct kvmppc_gs_msg *gsm) +{ + int rc = 0; + struct kvmppc_hostwide_stats *stats = gsm->data; + + /* + * It doesn't matter what values are put into request buffer as + * they are going to be overwritten anyways. But for the sake of + * testcode and symmetry contents of existing stats are put + * populated into the request guest state buffer. + */ + if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP)) + rc = kvmppc_gse_put_u64(gsb, + KVMPPC_GSID_L0_GUEST_HEAP, + stats->guest_heap); + + if (!rc && kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX)) + rc = kvmppc_gse_put_u64(gsb, + KVMPPC_GSID_L0_GUEST_HEAP_MAX, + stats->guest_heap_max); + + if (!rc && kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE)) + rc = kvmppc_gse_put_u64(gsb, + KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE, + stats->guest_pgtable_size); + if (!rc && + kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX)) + rc = kvmppc_gse_put_u64(gsb, + KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX, + stats->guest_pgtable_size_max); + if (!rc && + kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM)) + rc = kvmppc_gse_put_u64(gsb, + KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM, + stats->guest_pgtable_reclaim); + + return rc; +} + +/* Parse and update the host wide stats from returned gsb */ +static int hostwide_refresh_info(struct kvmppc_gs_msg *gsm, + struct kvmppc_gs_buff *gsb) +{ + struct kvmppc_gs_parser gsp = { 0 }; + struct kvmppc_hostwide_stats *stats = gsm->data; + struct kvmppc_gs_elem *gse; + int rc; + + rc = kvmppc_gse_parse(&gsp, gsb); + if (rc < 0) + return rc; + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP); + if (gse) + stats->guest_heap = kvmppc_gse_get_u64(gse); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX); + if (gse) + stats->guest_heap_max = kvmppc_gse_get_u64(gse); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE); + if (gse) + stats->guest_pgtable_size = kvmppc_gse_get_u64(gse); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX); + if (gse) + stats->guest_pgtable_size_max = kvmppc_gse_get_u64(gse); + + gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM); + if (gse) + stats->guest_pgtable_reclaim = kvmppc_gse_get_u64(gse); + + return 0; +} + +/* gsb-message ops for setting up/parsing */ +static struct kvmppc_gs_msg_ops gsb_ops_l0_stats = { + .get_size = hostwide_get_size, + .fill_info = hostwide_fill_info, + .refresh_info = hostwide_refresh_info, +}; + +static int kvmppc_init_hostwide(void) +{ + int rc = 0; + unsigned long flags; + + spin_lock_irqsave(&lock_l0_stats, flags); + + /* already registered ? */ + if (gsm_l0_stats) { + rc = 0; + goto out; + } + + /* setup the Guest state message/buffer to talk to L0 */ + gsm_l0_stats = kvmppc_gsm_new(&gsb_ops_l0_stats, &l0_stats, + GSM_SEND, GFP_KERNEL); + if (!gsm_l0_stats) { + rc = -ENOMEM; + goto out; + } + + /* Populate the Idents */ + kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_HEAP); + kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_HEAP_MAX); + kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE); + kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX); + kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM); + + /* allocate GSB. Guest/Vcpu Id is ignored */ + gsb_l0_stats = kvmppc_gsb_new(kvmppc_gsm_size(gsm_l0_stats), 0, 0, + GFP_KERNEL); + if (!gsb_l0_stats) { + rc = -ENOMEM; + goto out; + } + + /* ask the ops to fill in the info */ + rc = kvmppc_gsm_fill_info(gsm_l0_stats, gsb_l0_stats); + +out: + if (rc) { + if (gsm_l0_stats) + kvmppc_gsm_free(gsm_l0_stats); + if (gsb_l0_stats) + kvmppc_gsb_free(gsb_l0_stats); + gsm_l0_stats = NULL; + gsb_l0_stats = NULL; + } + spin_unlock_irqrestore(&lock_l0_stats, flags); + return rc; +} + +static void kvmppc_cleanup_hostwide(void) +{ + unsigned long flags; + + spin_lock_irqsave(&lock_l0_stats, flags); + + if (gsm_l0_stats) + kvmppc_gsm_free(gsm_l0_stats); + if (gsb_l0_stats) + kvmppc_gsb_free(gsb_l0_stats); + gsm_l0_stats = NULL; + gsb_l0_stats = NULL; + + spin_unlock_irqrestore(&lock_l0_stats, flags); +} + +/* L1 wide counters PMU */ +static struct pmu kvmppc_pmu = { + .module = THIS_MODULE, + .task_ctx_nr = perf_sw_context, + .name = "kvm-hv", + .event_init = kvmppc_pmu_event_init, + .add = kvmppc_pmu_add, + .del = kvmppc_pmu_del, + .read = kvmppc_pmu_read, + .attr_groups = kvmppc_pmu_attr_groups, + .type = -1, + .scope = PERF_PMU_SCOPE_SYS_WIDE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, +}; + +static int __init kvmppc_register_pmu(void) +{ + int rc = -EOPNOTSUPP; + + /* only support events for nestedv2 right now */ + if (kvmhv_is_nestedv2()) { + rc = kvmppc_init_hostwide(); + if (rc) + goto out; + + /* Register the pmu */ + rc = perf_pmu_register(&kvmppc_pmu, kvmppc_pmu.name, -1); + if (rc) + goto out; + + pr_info("Registered kvm-hv pmu"); + } + +out: + return rc; +} + +static void __exit kvmppc_unregister_pmu(void) +{ + if (kvmhv_is_nestedv2()) { + kvmppc_cleanup_hostwide(); + + if (kvmppc_pmu.type != -1) + perf_pmu_unregister(&kvmppc_pmu); + + pr_info("kvmhv_pmu unregistered.\n"); + } +} + +module_init(kvmppc_register_pmu); +module_exit(kvmppc_unregister_pmu); +MODULE_DESCRIPTION("KVM PPC Book3s-hv PMU"); +MODULE_AUTHOR("Vaibhav Jain <vaibhav@linux.ibm.com>"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/44x/gpio.c b/arch/powerpc/platforms/44x/gpio.c index e5f2319e5cbe..d540e261d85a 100644 --- a/arch/powerpc/platforms/44x/gpio.c +++ b/arch/powerpc/platforms/44x/gpio.c @@ -75,8 +75,7 @@ __ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) clrbits32(®s->or, GPIO_MASK(gpio)); } -static void -ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) +static int ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) { struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc); unsigned long flags; @@ -88,6 +87,8 @@ ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) spin_unlock_irqrestore(&chip->lock, flags); pr_debug("%s: gpio: %d val: %d\n", __func__, gpio, val); + + return 0; } static int ppc4xx_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio) @@ -179,7 +180,7 @@ static int __init ppc4xx_add_gpiochips(void) gc->direction_input = ppc4xx_gpio_dir_in; gc->direction_output = ppc4xx_gpio_dir_out; gc->get = ppc4xx_gpio_get; - gc->set = ppc4xx_gpio_set; + gc->set_rv = ppc4xx_gpio_set; ret = of_mm_gpiochip_add_data(np, mm_gc, ppc4xx_gc); if (ret) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 1ea591ec6083..c96af6b0eab4 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -280,7 +280,7 @@ static int mpc52xx_gpt_gpio_get(struct gpio_chip *gc, unsigned int gpio) return (in_be32(&gpt->regs->status) >> 8) & 1; } -static void +static int mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v) { struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc); @@ -293,6 +293,8 @@ mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v) raw_spin_lock_irqsave(&gpt->lock, flags); clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK, r); raw_spin_unlock_irqrestore(&gpt->lock, flags); + + return 0; } static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio) @@ -334,7 +336,7 @@ static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt) gpt->gc.direction_input = mpc52xx_gpt_gpio_dir_in; gpt->gc.direction_output = mpc52xx_gpt_gpio_dir_out; gpt->gc.get = mpc52xx_gpt_gpio_get; - gpt->gc.set = mpc52xx_gpt_gpio_set; + gpt->gc.set_rv = mpc52xx_gpt_gpio_set; gpt->gc.base = -1; gpt->gc.parent = gpt->dev; diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c index 4d8fa9ed1a67..6e37dfc6c5c9 100644 --- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -92,10 +92,11 @@ static void mcu_power_off(void) mutex_unlock(&mcu->lock); } -static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) +static int mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) { struct mcu *mcu = gpiochip_get_data(gc); u8 bit = 1 << (4 + gpio); + int ret; mutex_lock(&mcu->lock); if (val) @@ -103,14 +104,16 @@ static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) else mcu->reg_ctrl |= bit; - i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL, mcu->reg_ctrl); + ret = i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL, + mcu->reg_ctrl); mutex_unlock(&mcu->lock); + + return ret; } static int mcu_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) { - mcu_gpio_set(gc, gpio, val); - return 0; + return mcu_gpio_set(gc, gpio, val); } static int mcu_gpiochip_add(struct mcu *mcu) @@ -123,7 +126,7 @@ static int mcu_gpiochip_add(struct mcu *mcu) gc->can_sleep = 1; gc->ngpio = MCU_NUM_GPIO; gc->base = -1; - gc->set = mcu_gpio_set; + gc->set_rv = mcu_gpio_set; gc->direction_output = mcu_gpio_dir_out; gc->parent = dev; diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 1dc095ad48fc..7462c221115c 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -417,7 +417,7 @@ static void __cpm1_gpio16_set(struct cpm1_gpio16_chip *cpm1_gc, u16 pin_mask, in out_be16(&iop->dat, cpm1_gc->cpdata); } -static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value) +static int cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value) { struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc); unsigned long flags; @@ -428,6 +428,8 @@ static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value) __cpm1_gpio16_set(cpm1_gc, pin_mask, value); spin_unlock_irqrestore(&cpm1_gc->lock, flags); + + return 0; } static int cpm1_gpio16_to_irq(struct gpio_chip *gc, unsigned int gpio) @@ -497,7 +499,7 @@ int cpm1_gpiochip_add16(struct device *dev) gc->direction_input = cpm1_gpio16_dir_in; gc->direction_output = cpm1_gpio16_dir_out; gc->get = cpm1_gpio16_get; - gc->set = cpm1_gpio16_set; + gc->set_rv = cpm1_gpio16_set; gc->to_irq = cpm1_gpio16_to_irq; gc->parent = dev; gc->owner = THIS_MODULE; @@ -554,7 +556,7 @@ static void __cpm1_gpio32_set(struct cpm1_gpio32_chip *cpm1_gc, u32 pin_mask, in out_be32(&iop->dat, cpm1_gc->cpdata); } -static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value) +static int cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value) { struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc); unsigned long flags; @@ -565,6 +567,8 @@ static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value) __cpm1_gpio32_set(cpm1_gc, pin_mask, value); spin_unlock_irqrestore(&cpm1_gc->lock, flags); + + return 0; } static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) @@ -618,7 +622,7 @@ int cpm1_gpiochip_add32(struct device *dev) gc->direction_input = cpm1_gpio32_dir_in; gc->direction_output = cpm1_gpio32_dir_out; gc->get = cpm1_gpio32_get; - gc->set = cpm1_gpio32_set; + gc->set_rv = cpm1_gpio32_set; gc->parent = dev; gc->owner = THIS_MODULE; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 6de1cd5d8a58..e119ced05d10 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -45,6 +45,7 @@ #include <linux/root_dev.h> #include <linux/bitops.h> #include <linux/suspend.h> +#include <linux/string_choices.h> #include <linux/of.h> #include <linux/of_platform.h> @@ -238,8 +239,7 @@ static void __init l2cr_init(void) _set_L2CR(0); _set_L2CR(*l2cr); pr_info("L2CR overridden (0x%x), backside cache is %s\n", - *l2cr, ((*l2cr) & 0x80000000) ? - "enabled" : "disabled"); + *l2cr, str_enabled_disabled((*l2cr) & 0x80000000)); } of_node_put(np); break; diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c index 8633891b7aa5..b4426a35aca3 100644 --- a/arch/powerpc/platforms/powermac/time.c +++ b/arch/powerpc/platforms/powermac/time.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/param.h> #include <linux/string.h> +#include <linux/string_choices.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/time.h> @@ -77,7 +78,7 @@ long __init pmac_time_init(void) delta |= 0xFF000000UL; dst = ((pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x8) & 0x80) != 0); printk("GMT Delta read from XPRAM: %d minutes, DST: %s\n", delta/60, - dst ? "on" : "off"); + str_on_off(dst)); #endif return delta; } diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index 61722133eb2d..22d91ac424dd 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/reboot.h> #include <linux/rcuwait.h> +#include <linux/string_choices.h> #include <asm/firmware.h> #include <asm/lv1call.h> @@ -724,7 +725,7 @@ static irqreturn_t ps3_notification_interrupt(int irq, void *data) static int ps3_notification_read_write(struct ps3_notification_device *dev, u64 lpar, int write) { - const char *op = write ? "write" : "read"; + const char *op = str_write_read(write); unsigned long flags; int res; diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 3f3e3492e436..57222678bb3f 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -3,7 +3,8 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG obj-y := lpar.o hvCall.o nvram.o reconfig.o \ of_helpers.o rtas-work-area.o papr-sysparm.o \ - papr-vpd.o \ + papr-rtas-common.o papr-vpd.o papr-indices.o \ + papr-platform-dump.o papr-phy-attest.o \ setup.o iommu.o event_sources.o ras.o \ firmware.o power.o dlpar.o mobility.o rng.o \ pci.o pci_dlpar.o eeh_pseries.o msi.o \ diff --git a/arch/powerpc/platforms/pseries/htmdump.c b/arch/powerpc/platforms/pseries/htmdump.c index 57fc1700f604..742ec52c9d4d 100644 --- a/arch/powerpc/platforms/pseries/htmdump.c +++ b/arch/powerpc/platforms/pseries/htmdump.c @@ -10,28 +10,40 @@ #include <asm/io.h> #include <asm/machdep.h> #include <asm/plpar_wrappers.h> +#include <asm/kvm_guest.h> static void *htm_buf; +static void *htm_status_buf; +static void *htm_info_buf; +static void *htm_caps_buf; static u32 nodeindex; static u32 nodalchipindex; static u32 coreindexonchip; static u32 htmtype; +static u32 htmconfigure; +static u32 htmstart; +static u32 htmsetup; +static u64 htmflags; + static struct dentry *htmdump_debugfs_dir; +#define HTM_ENABLE 1 +#define HTM_DISABLE 0 +#define HTM_NOWRAP 1 +#define HTM_WRAP 0 -static ssize_t htmdump_read(struct file *filp, char __user *ubuf, - size_t count, loff_t *ppos) +/* + * Check the return code for H_HTM hcall. + * Return non-zero value (1) if either H_PARTIAL or H_SUCCESS + * is returned. For other return codes: + * Return zero if H_NOT_AVAILABLE. + * Return -EBUSY if hcall return busy. + * Return -EINVAL if any parameter or operation is not valid. + * Return -EPERM if HTM Virtualization Engine Technology code + * is not applied. + * Return -EIO if the HTM state is not valid. + */ +static ssize_t htm_return_check(long rc) { - void *htm_buf = filp->private_data; - unsigned long page, read_size, available; - loff_t offset; - long rc; - - page = ALIGN_DOWN(*ppos, PAGE_SIZE); - offset = (*ppos) % PAGE_SIZE; - - rc = htm_get_dump_hardware(nodeindex, nodalchipindex, coreindexonchip, - htmtype, virt_to_phys(htm_buf), PAGE_SIZE, page); - switch (rc) { case H_SUCCESS: /* H_PARTIAL for the case where all available data can't be @@ -65,6 +77,38 @@ static ssize_t htmdump_read(struct file *filp, char __user *ubuf, return -EPERM; } + /* + * Return 1 for H_SUCCESS/H_PARTIAL + */ + return 1; +} + +static ssize_t htmdump_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + void *htm_buf = filp->private_data; + unsigned long page, read_size, available; + loff_t offset; + long rc, ret; + + page = ALIGN_DOWN(*ppos, PAGE_SIZE); + offset = (*ppos) % PAGE_SIZE; + + /* + * Invoke H_HTM call with: + * - operation as htm dump (H_HTM_OP_DUMP_DATA) + * - last three values are address, size and offset + */ + rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, + htmtype, H_HTM_OP_DUMP_DATA, virt_to_phys(htm_buf), + PAGE_SIZE, page); + + ret = htm_return_check(rc); + if (ret <= 0) { + pr_debug("H_HTM hcall failed for op: H_HTM_OP_DUMP_DATA, returning %ld\n", ret); + return ret; + } + available = PAGE_SIZE; read_size = min(count, available); *ppos += read_size; @@ -77,6 +121,292 @@ static const struct file_operations htmdump_fops = { .open = simple_open, }; +static int htmconfigure_set(void *data, u64 val) +{ + long rc, ret; + unsigned long param1 = -1, param2 = -1; + + /* + * value as 1 : configure HTM. + * value as 0 : deconfigure HTM. Return -EINVAL for + * other values. + */ + if (val == HTM_ENABLE) { + /* + * Invoke H_HTM call with: + * - operation as htm configure (H_HTM_OP_CONFIGURE) + * - If htmflags is set, param1 and param2 will be -1 + * which is an indicator to use default htm mode reg mask + * and htm mode reg value. + * - last three values are unused, hence set to zero + */ + if (!htmflags) { + param1 = 0; + param2 = 0; + } + + rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, + htmtype, H_HTM_OP_CONFIGURE, param1, param2, 0); + } else if (val == HTM_DISABLE) { + /* + * Invoke H_HTM call with: + * - operation as htm deconfigure (H_HTM_OP_DECONFIGURE) + * - last three values are unused, hence set to zero + */ + rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, + htmtype, H_HTM_OP_DECONFIGURE, 0, 0, 0); + } else + return -EINVAL; + + ret = htm_return_check(rc); + if (ret <= 0) { + pr_debug("H_HTM hcall failed, returning %ld\n", ret); + return ret; + } + + /* Set htmconfigure if operation succeeds */ + htmconfigure = val; + + return 0; +} + +static int htmconfigure_get(void *data, u64 *val) +{ + *val = htmconfigure; + return 0; +} + +static int htmstart_set(void *data, u64 val) +{ + long rc, ret; + + /* + * value as 1: start HTM + * value as 0: stop HTM + * Return -EINVAL for other values. + */ + if (val == HTM_ENABLE) { + /* + * Invoke H_HTM call with: + * - operation as htm start (H_HTM_OP_START) + * - last three values are unused, hence set to zero + */ + rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, + htmtype, H_HTM_OP_START, 0, 0, 0); + + } else if (val == HTM_DISABLE) { + /* + * Invoke H_HTM call with: + * - operation as htm stop (H_HTM_OP_STOP) + * - last three values are unused, hence set to zero + */ + rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, + htmtype, H_HTM_OP_STOP, 0, 0, 0); + } else + return -EINVAL; + + ret = htm_return_check(rc); + if (ret <= 0) { + pr_debug("H_HTM hcall failed, returning %ld\n", ret); + return ret; + } + + /* Set htmstart if H_HTM_OP_START/H_HTM_OP_STOP operation succeeds */ + htmstart = val; + + return 0; +} + +static int htmstart_get(void *data, u64 *val) +{ + *val = htmstart; + return 0; +} + +static ssize_t htmstatus_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + void *htm_status_buf = filp->private_data; + long rc, ret; + u64 *num_entries; + u64 to_copy; + int htmstatus_flag; + + /* + * Invoke H_HTM call with: + * - operation as htm status (H_HTM_OP_STATUS) + * - last three values as addr, size and offset + */ + rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, + htmtype, H_HTM_OP_STATUS, virt_to_phys(htm_status_buf), + PAGE_SIZE, 0); + + ret = htm_return_check(rc); + if (ret <= 0) { + pr_debug("H_HTM hcall failed for op: H_HTM_OP_STATUS, returning %ld\n", ret); + return ret; + } + + /* + * HTM status buffer, start of buffer + 0x10 gives the + * number of HTM entries in the buffer. Each nest htm status + * entry is 0x6 bytes where each core htm status entry is + * 0x8 bytes. + * So total count to copy is: + * 32 bytes (for first 7 fields) + (number of HTM entries * entry size) + */ + num_entries = htm_status_buf + 0x10; + if (htmtype == 0x2) + htmstatus_flag = 0x8; + else + htmstatus_flag = 0x6; + to_copy = 32 + (be64_to_cpu(*num_entries) * htmstatus_flag); + return simple_read_from_buffer(ubuf, count, ppos, htm_status_buf, to_copy); +} + +static const struct file_operations htmstatus_fops = { + .llseek = NULL, + .read = htmstatus_read, + .open = simple_open, +}; + +static ssize_t htminfo_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + void *htm_info_buf = filp->private_data; + long rc, ret; + u64 *num_entries; + u64 to_copy; + + /* + * Invoke H_HTM call with: + * - operation as htm status (H_HTM_OP_STATUS) + * - last three values as addr, size and offset + */ + rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, + htmtype, H_HTM_OP_DUMP_SYSPROC_CONF, virt_to_phys(htm_info_buf), + PAGE_SIZE, 0); + + ret = htm_return_check(rc); + if (ret <= 0) { + pr_debug("H_HTM hcall failed for op: H_HTM_OP_DUMP_SYSPROC_CONF, returning %ld\n", ret); + return ret; + } + + /* + * HTM status buffer, start of buffer + 0x10 gives the + * number of HTM entries in the buffer. Each entry of processor + * is 16 bytes. + * + * So total count to copy is: + * 32 bytes (for first 5 fields) + (number of HTM entries * entry size) + */ + num_entries = htm_info_buf + 0x10; + to_copy = 32 + (be64_to_cpu(*num_entries) * 16); + return simple_read_from_buffer(ubuf, count, ppos, htm_info_buf, to_copy); +} + +static ssize_t htmcaps_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + void *htm_caps_buf = filp->private_data; + long rc, ret; + + /* + * Invoke H_HTM call with: + * - operation as htm capabilities (H_HTM_OP_CAPABILITIES) + * - last three values as addr, size (0x80 for Capabilities Output Buffer + * and zero + */ + rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, + htmtype, H_HTM_OP_CAPABILITIES, virt_to_phys(htm_caps_buf), + 0x80, 0); + + ret = htm_return_check(rc); + if (ret <= 0) { + pr_debug("H_HTM hcall failed for op: H_HTM_OP_CAPABILITIES, returning %ld\n", ret); + return ret; + } + + return simple_read_from_buffer(ubuf, count, ppos, htm_caps_buf, 0x80); +} + +static const struct file_operations htminfo_fops = { + .llseek = NULL, + .read = htminfo_read, + .open = simple_open, +}; + +static const struct file_operations htmcaps_fops = { + .llseek = NULL, + .read = htmcaps_read, + .open = simple_open, +}; + +static int htmsetup_set(void *data, u64 val) +{ + long rc, ret; + + /* + * Input value: HTM buffer size in the power of 2 + * example: hex value 0x21 ( decimal: 33 ) is for + * 8GB + * Invoke H_HTM call with: + * - operation as htm start (H_HTM_OP_SETUP) + * - parameter 1 set to input value. + * - last two values are unused, hence set to zero + */ + rc = htm_hcall_wrapper(htmflags, nodeindex, nodalchipindex, coreindexonchip, + htmtype, H_HTM_OP_SETUP, val, 0, 0); + + ret = htm_return_check(rc); + if (ret <= 0) { + pr_debug("H_HTM hcall failed for op: H_HTM_OP_SETUP, returning %ld\n", ret); + return ret; + } + + /* Set htmsetup if H_HTM_OP_SETUP operation succeeds */ + htmsetup = val; + + return 0; +} + +static int htmsetup_get(void *data, u64 *val) +{ + *val = htmsetup; + return 0; +} + +static int htmflags_set(void *data, u64 val) +{ + /* + * Input value: + * Currently supported flag value is to enable/disable + * HTM buffer wrap. wrap is used along with "configure" + * to prevent HTM buffer from wrapping. + * Writing 1 will set noWrap while configuring HTM + */ + if (val == HTM_NOWRAP) + htmflags = H_HTM_FLAGS_NOWRAP; + else if (val == HTM_WRAP) + htmflags = 0; + else + return -EINVAL; + + return 0; +} + +static int htmflags_get(void *data, u64 *val) +{ + *val = htmflags; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(htmconfigure_fops, htmconfigure_get, htmconfigure_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(htmstart_fops, htmstart_get, htmstart_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(htmsetup_fops, htmsetup_get, htmsetup_set, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(htmflags_fops, htmflags_get, htmflags_set, "%llu\n"); + static int htmdump_init_debugfs(void) { htm_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); @@ -98,11 +428,50 @@ static int htmdump_init_debugfs(void) htmdump_debugfs_dir, &htmtype); debugfs_create_file("trace", 0400, htmdump_debugfs_dir, htm_buf, &htmdump_fops); + /* + * Debugfs interface files to control HTM operations: + */ + debugfs_create_file("htmconfigure", 0600, htmdump_debugfs_dir, NULL, &htmconfigure_fops); + debugfs_create_file("htmstart", 0600, htmdump_debugfs_dir, NULL, &htmstart_fops); + debugfs_create_file("htmsetup", 0600, htmdump_debugfs_dir, NULL, &htmsetup_fops); + debugfs_create_file("htmflags", 0600, htmdump_debugfs_dir, NULL, &htmflags_fops); + + /* Debugfs interface file to present status of HTM */ + htm_status_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!htm_status_buf) { + pr_err("Failed to allocate htmstatus buf\n"); + return -ENOMEM; + } + + /* Debugfs interface file to present System Processor Configuration */ + htm_info_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!htm_info_buf) { + pr_err("Failed to allocate htm info buf\n"); + return -ENOMEM; + } + + /* Debugfs interface file to present HTM capabilities */ + htm_caps_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!htm_caps_buf) { + pr_err("Failed to allocate htm caps buf\n"); + return -ENOMEM; + } + + debugfs_create_file("htmstatus", 0400, htmdump_debugfs_dir, htm_status_buf, &htmstatus_fops); + debugfs_create_file("htminfo", 0400, htmdump_debugfs_dir, htm_info_buf, &htminfo_fops); + debugfs_create_file("htmcaps", 0400, htmdump_debugfs_dir, htm_caps_buf, &htmcaps_fops); + return 0; } static int __init htmdump_init(void) { + /* Disable on kvm guest */ + if (is_kvm_guest()) { + pr_info("htmdump not supported inside KVM guest\n"); + return -EOPNOTSUPP; + } + if (htmdump_init_debugfs()) return -ENOMEM; diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index d6ebc19fb99c..eec333dd2e59 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -197,7 +197,7 @@ static void tce_iommu_userspace_view_free(struct iommu_table *tbl) static void tce_free_pSeries(struct iommu_table *tbl) { - if (!tbl->it_userspace) + if (tbl->it_userspace) tce_iommu_userspace_view_free(tbl); } diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index f9d80111c322..957c0c03d259 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -525,7 +525,12 @@ static struct msi_domain_info pseries_msi_domain_info = { static void pseries_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) { - __pci_read_msi_msg(irq_data_get_msi_desc(data), msg); + struct pci_dev *dev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data)); + + if (dev->current_state == PCI_D0) + __pci_read_msi_msg(irq_data_get_msi_desc(data), msg); + else + get_cached_msi_msg(data->irq, msg); } static struct irq_chip pseries_msi_irq_chip = { diff --git a/arch/powerpc/platforms/pseries/papr-indices.c b/arch/powerpc/platforms/pseries/papr-indices.c new file mode 100644 index 000000000000..3c7545591c45 --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr-indices.c @@ -0,0 +1,488 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) "papr-indices: " fmt + +#include <linux/build_bug.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/lockdep.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/signal.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/string_helpers.h> +#include <linux/uaccess.h> +#include <asm/machdep.h> +#include <asm/rtas-work-area.h> +#include <asm/rtas.h> +#include <uapi/asm/papr-indices.h> +#include "papr-rtas-common.h" + +/* + * Function-specific return values for ibm,set-dynamic-indicator and + * ibm,get-dynamic-sensor-state RTAS calls. + * PAPR+ v2.13 7.3.18 and 7.3.19. + */ +#define RTAS_IBM_DYNAMIC_INDICE_NO_INDICATOR -3 + +/** + * struct rtas_get_indices_params - Parameters (in and out) for + * ibm,get-indices. + * @is_sensor: In: Caller-provided whether sensor or indicator. + * @indice_type:In: Caller-provided indice (sensor or indicator) token + * @work_area: In: Caller-provided work area buffer for results. + * @next: In: Sequence number. Out: Next sequence number. + * @status: Out: RTAS call status. + */ +struct rtas_get_indices_params { + u8 is_sensor; + u32 indice_type; + struct rtas_work_area *work_area; + u32 next; + s32 status; +}; + +/* + * rtas_ibm_get_indices() - Call ibm,get-indices to fill a work area buffer. + * @params: See &struct rtas_ibm_get_indices_params. + * + * Calls ibm,get-indices until it errors or successfully deposits data + * into the supplied work area. Handles RTAS retry statuses. Maps RTAS + * error statuses to reasonable errno values. + * + * The caller is expected to invoke rtas_ibm_get_indices() multiple times + * to retrieve all indices data for the provided indice type. Only one + * sequence should be in progress at any time; starting a new sequence + * will disrupt any sequence already in progress. Serialization of + * indices retrieval sequences is the responsibility of the caller. + * + * The caller should inspect @params.status to determine whether more + * calls are needed to complete the sequence. + * + * Context: May sleep. + * Return: -ve on error, 0 otherwise. + */ +static int rtas_ibm_get_indices(struct rtas_get_indices_params *params) +{ + struct rtas_work_area *work_area = params->work_area; + const s32 token = rtas_function_token(RTAS_FN_IBM_GET_INDICES); + u32 rets; + s32 fwrc; + int ret; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + lockdep_assert_held(&rtas_ibm_get_indices_lock); + + do { + fwrc = rtas_call(token, 5, 2, &rets, params->is_sensor, + params->indice_type, + rtas_work_area_phys(work_area), + rtas_work_area_size(work_area), + params->next); + } while (rtas_busy_delay(fwrc)); + + switch (fwrc) { + case RTAS_HARDWARE_ERROR: + ret = -EIO; + break; + case RTAS_INVALID_PARAMETER: /* Indicator type is not supported */ + ret = -EINVAL; + break; + case RTAS_SEQ_START_OVER: + ret = -EAGAIN; + pr_info_ratelimited("Indices changed during retrieval, retrying\n"); + params->next = 1; + break; + case RTAS_SEQ_MORE_DATA: + params->next = rets; + ret = 0; + break; + case RTAS_SEQ_COMPLETE: + params->next = 0; + ret = 0; + break; + default: + ret = -EIO; + pr_err_ratelimited("unexpected ibm,get-indices status %d\n", fwrc); + break; + } + + params->status = fwrc; + return ret; +} + +/* + * Internal indices sequence APIs. A sequence is a series of calls to + * ibm,get-indices for a given location code. The sequence ends when + * an error is encountered or all indices for the input has been + * returned. + */ + +/* + * indices_sequence_begin() - Begin a indices retrieval sequence. + * + * Context: May sleep. + */ +static void indices_sequence_begin(struct papr_rtas_sequence *seq) +{ + struct rtas_get_indices_params *param; + + param = (struct rtas_get_indices_params *)seq->params; + /* + * We could allocate the work area before acquiring the + * function lock, but that would allow concurrent requests to + * exhaust the limited work area pool for no benefit. So + * allocate the work area under the lock. + */ + mutex_lock(&rtas_ibm_get_indices_lock); + param->work_area = rtas_work_area_alloc(RTAS_GET_INDICES_BUF_SIZE); + param->next = 1; + param->status = 0; +} + +/* + * indices_sequence_end() - Finalize a indices retrieval sequence. + * + * Releases resources obtained by indices_sequence_begin(). + */ +static void indices_sequence_end(struct papr_rtas_sequence *seq) +{ + struct rtas_get_indices_params *param; + + param = (struct rtas_get_indices_params *)seq->params; + rtas_work_area_free(param->work_area); + mutex_unlock(&rtas_ibm_get_indices_lock); +} + +/* + * Work function to be passed to papr_rtas_blob_generate(). + * + * ibm,get-indices RTAS call fills the work area with the certain + * format but does not return the bytes written in the buffer. So + * instead of kernel parsing this work area to determine the buffer + * length, copy the complete work area (RTAS_GET_INDICES_BUF_SIZE) + * to the blob and let the user space to obtain the data. + * Means RTAS_GET_INDICES_BUF_SIZE data will be returned for each + * read(). + */ + +static const char *indices_sequence_fill_work_area(struct papr_rtas_sequence *seq, + size_t *len) +{ + struct rtas_get_indices_params *p; + bool init_state; + + p = (struct rtas_get_indices_params *)seq->params; + init_state = (p->next == 1) ? true : false; + + if (papr_rtas_sequence_should_stop(seq, p->status, init_state)) + return NULL; + if (papr_rtas_sequence_set_err(seq, rtas_ibm_get_indices(p))) + return NULL; + + *len = RTAS_GET_INDICES_BUF_SIZE; + return rtas_work_area_raw_buf(p->work_area); +} + +/* + * papr_indices_handle_read - returns indices blob data to the user space + * + * ibm,get-indices RTAS call fills the work area with the certian + * format but does not return the bytes written in the buffer and + * copied RTAS_GET_INDICES_BUF_SIZE data to the blob for each RTAS + * call. So send RTAS_GET_INDICES_BUF_SIZE buffer to the user space + * for each read(). + */ +static ssize_t papr_indices_handle_read(struct file *file, + char __user *buf, size_t size, loff_t *off) +{ + const struct papr_rtas_blob *blob = file->private_data; + + /* we should not instantiate a handle without any data attached. */ + if (!papr_rtas_blob_has_data(blob)) { + pr_err_once("handle without data\n"); + return -EIO; + } + + if (size < RTAS_GET_INDICES_BUF_SIZE) { + pr_err_once("Invalid buffer length %ld, expect %d\n", + size, RTAS_GET_INDICES_BUF_SIZE); + return -EINVAL; + } else if (size > RTAS_GET_INDICES_BUF_SIZE) + size = RTAS_GET_INDICES_BUF_SIZE; + + return simple_read_from_buffer(buf, size, off, blob->data, blob->len); +} + +static const struct file_operations papr_indices_handle_ops = { + .read = papr_indices_handle_read, + .llseek = papr_rtas_common_handle_seek, + .release = papr_rtas_common_handle_release, +}; + +/* + * papr_indices_create_handle() - Create a fd-based handle for reading + * indices data + * @ubuf: Input parameters to RTAS call such as whether sensor or indicator + * and indice type in user memory + * + * Handler for PAPR_INDICES_IOC_GET ioctl command. Validates @ubuf + * and instantiates an immutable indices "blob" for it. The blob is + * attached to a file descriptor for reading by user space. The memory + * backing the blob is freed when the file is released. + * + * The entire requested indices is retrieved by this call and all + * necessary RTAS interactions are performed before returning the fd + * to user space. This keeps the read handler simple and ensures that + * the kernel can prevent interleaving of ibm,get-indices call sequences. + * + * Return: The installed fd number if successful, -ve errno otherwise. + */ +static long papr_indices_create_handle(struct papr_indices_io_block __user *ubuf) +{ + struct papr_rtas_sequence seq = {}; + struct rtas_get_indices_params params = {}; + int fd; + + if (get_user(params.is_sensor, &ubuf->indices.is_sensor)) + return -EFAULT; + + if (get_user(params.indice_type, &ubuf->indices.indice_type)) + return -EFAULT; + + seq = (struct papr_rtas_sequence) { + .begin = indices_sequence_begin, + .end = indices_sequence_end, + .work = indices_sequence_fill_work_area, + }; + + seq.params = ¶ms; + fd = papr_rtas_setup_file_interface(&seq, + &papr_indices_handle_ops, "[papr-indices]"); + + return fd; +} + +/* + * Create work area with the input parameters. This function is used + * for both ibm,set-dynamic-indicator and ibm,get-dynamic-sensor-state + * RTAS Calls. + */ +static struct rtas_work_area * +papr_dynamic_indice_buf_from_user(struct papr_indices_io_block __user *ubuf, + struct papr_indices_io_block *kbuf) +{ + struct rtas_work_area *work_area; + u32 length; + __be32 len_be; + + if (copy_from_user(kbuf, ubuf, sizeof(*kbuf))) + return ERR_PTR(-EFAULT); + + + if (!string_is_terminated(kbuf->dynamic_param.location_code_str, + ARRAY_SIZE(kbuf->dynamic_param.location_code_str))) + return ERR_PTR(-EINVAL); + + /* + * The input data in the work area should be as follows: + * - 32-bit integer length of the location code string, + * including NULL. + * - Location code string, NULL terminated, identifying the + * token (sensor or indicator). + * PAPR 2.13 - R1–7.3.18–5 ibm,set-dynamic-indicator + * - R1–7.3.19–5 ibm,get-dynamic-sensor-state + */ + /* + * Length that user space passed should also include NULL + * terminator. + */ + length = strlen(kbuf->dynamic_param.location_code_str) + 1; + if (length > LOC_CODE_SIZE) + return ERR_PTR(-EINVAL); + + len_be = cpu_to_be32(length); + + work_area = rtas_work_area_alloc(LOC_CODE_SIZE + sizeof(u32)); + memcpy(rtas_work_area_raw_buf(work_area), &len_be, sizeof(u32)); + memcpy((rtas_work_area_raw_buf(work_area) + sizeof(u32)), + &kbuf->dynamic_param.location_code_str, length); + + return work_area; +} + +/** + * papr_dynamic_indicator_ioc_set - ibm,set-dynamic-indicator RTAS Call + * PAPR 2.13 7.3.18 + * + * @ubuf: Input parameters to RTAS call such as indicator token and + * new state. + * + * Returns success or -errno. + */ +static long papr_dynamic_indicator_ioc_set(struct papr_indices_io_block __user *ubuf) +{ + struct papr_indices_io_block kbuf; + struct rtas_work_area *work_area; + s32 fwrc, token, ret; + + token = rtas_function_token(RTAS_FN_IBM_SET_DYNAMIC_INDICATOR); + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + mutex_lock(&rtas_ibm_set_dynamic_indicator_lock); + work_area = papr_dynamic_indice_buf_from_user(ubuf, &kbuf); + if (IS_ERR(work_area)) { + ret = PTR_ERR(work_area); + goto out; + } + + do { + fwrc = rtas_call(token, 3, 1, NULL, + kbuf.dynamic_param.token, + kbuf.dynamic_param.state, + rtas_work_area_phys(work_area)); + } while (rtas_busy_delay(fwrc)); + + rtas_work_area_free(work_area); + + switch (fwrc) { + case RTAS_SUCCESS: + ret = 0; + break; + case RTAS_IBM_DYNAMIC_INDICE_NO_INDICATOR: /* No such indicator */ + ret = -EOPNOTSUPP; + break; + default: + pr_err("unexpected ibm,set-dynamic-indicator result %d\n", + fwrc); + fallthrough; + case RTAS_HARDWARE_ERROR: /* Hardware/platform error */ + ret = -EIO; + break; + } + +out: + mutex_unlock(&rtas_ibm_set_dynamic_indicator_lock); + return ret; +} + +/** + * papr_dynamic_sensor_ioc_get - ibm,get-dynamic-sensor-state RTAS Call + * PAPR 2.13 7.3.19 + * + * @ubuf: Input parameters to RTAS call such as sensor token + * Copies the state in user space buffer. + * + * + * Returns success or -errno. + */ + +static long papr_dynamic_sensor_ioc_get(struct papr_indices_io_block __user *ubuf) +{ + struct papr_indices_io_block kbuf; + struct rtas_work_area *work_area; + s32 fwrc, token, ret; + u32 rets; + + token = rtas_function_token(RTAS_FN_IBM_GET_DYNAMIC_SENSOR_STATE); + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + mutex_lock(&rtas_ibm_get_dynamic_sensor_state_lock); + work_area = papr_dynamic_indice_buf_from_user(ubuf, &kbuf); + if (IS_ERR(work_area)) { + ret = PTR_ERR(work_area); + goto out; + } + + do { + fwrc = rtas_call(token, 2, 2, &rets, + kbuf.dynamic_param.token, + rtas_work_area_phys(work_area)); + } while (rtas_busy_delay(fwrc)); + + rtas_work_area_free(work_area); + + switch (fwrc) { + case RTAS_SUCCESS: + if (put_user(rets, &ubuf->dynamic_param.state)) + ret = -EFAULT; + else + ret = 0; + break; + case RTAS_IBM_DYNAMIC_INDICE_NO_INDICATOR: /* No such indicator */ + ret = -EOPNOTSUPP; + break; + default: + pr_err("unexpected ibm,get-dynamic-sensor result %d\n", + fwrc); + fallthrough; + case RTAS_HARDWARE_ERROR: /* Hardware/platform error */ + ret = -EIO; + break; + } + +out: + mutex_unlock(&rtas_ibm_get_dynamic_sensor_state_lock); + return ret; +} + +/* + * Top-level ioctl handler for /dev/papr-indices. + */ +static long papr_indices_dev_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + void __user *argp = (__force void __user *)arg; + long ret; + + switch (ioctl) { + case PAPR_INDICES_IOC_GET: + ret = papr_indices_create_handle(argp); + break; + case PAPR_DYNAMIC_SENSOR_IOC_GET: + ret = papr_dynamic_sensor_ioc_get(argp); + break; + case PAPR_DYNAMIC_INDICATOR_IOC_SET: + if (filp->f_mode & FMODE_WRITE) + ret = papr_dynamic_indicator_ioc_set(argp); + else + ret = -EBADF; + break; + default: + ret = -ENOIOCTLCMD; + break; + } + + return ret; +} + +static const struct file_operations papr_indices_ops = { + .unlocked_ioctl = papr_indices_dev_ioctl, +}; + +static struct miscdevice papr_indices_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "papr-indices", + .fops = &papr_indices_ops, +}; + +static __init int papr_indices_init(void) +{ + if (!rtas_function_implemented(RTAS_FN_IBM_GET_INDICES)) + return -ENODEV; + + if (!rtas_function_implemented(RTAS_FN_IBM_SET_DYNAMIC_INDICATOR)) + return -ENODEV; + + if (!rtas_function_implemented(RTAS_FN_IBM_GET_DYNAMIC_SENSOR_STATE)) + return -ENODEV; + + return misc_register(&papr_indices_dev); +} +machine_device_initcall(pseries, papr_indices_init); diff --git a/arch/powerpc/platforms/pseries/papr-phy-attest.c b/arch/powerpc/platforms/pseries/papr-phy-attest.c new file mode 100644 index 000000000000..1907f2411567 --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr-phy-attest.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) "papr-phy-attest: " fmt + +#include <linux/build_bug.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/lockdep.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/signal.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/string_helpers.h> +#include <linux/uaccess.h> +#include <asm/machdep.h> +#include <asm/rtas-work-area.h> +#include <asm/rtas.h> +#include <uapi/asm/papr-physical-attestation.h> +#include "papr-rtas-common.h" + +/** + * struct rtas_phy_attest_params - Parameters (in and out) for + * ibm,physical-attestation. + * + * @cmd: In: Caller-provided attestation command buffer. Must be + * RTAS-addressable. + * @work_area: In: Caller-provided work area buffer for attestation + * command structure + * Out: Caller-provided work area buffer for the response + * @cmd_len: In: Caller-provided attestation command structure + * length + * @sequence: In: Sequence number. Out: Next sequence number. + * @written: Out: Bytes written by ibm,physical-attestation to + * @work_area. + * @status: Out: RTAS call status. + */ +struct rtas_phy_attest_params { + struct papr_phy_attest_io_block cmd; + struct rtas_work_area *work_area; + u32 cmd_len; + u32 sequence; + u32 written; + s32 status; +}; + +/** + * rtas_physical_attestation() - Call ibm,physical-attestation to + * fill a work area buffer. + * @params: See &struct rtas_phy_attest_params. + * + * Calls ibm,physical-attestation until it errors or successfully + * deposits data into the supplied work area. Handles RTAS retry + * statuses. Maps RTAS error statuses to reasonable errno values. + * + * The caller is expected to invoke rtas_physical_attestation() + * multiple times to retrieve all the data for the provided + * attestation command. Only one sequence should be in progress at + * any time; starting a new sequence will disrupt any sequence + * already in progress. Serialization of attestation retrieval + * sequences is the responsibility of the caller. + * + * The caller should inspect @params.status to determine whether more + * calls are needed to complete the sequence. + * + * Context: May sleep. + * Return: -ve on error, 0 otherwise. + */ +static int rtas_physical_attestation(struct rtas_phy_attest_params *params) +{ + struct rtas_work_area *work_area; + s32 fwrc, token; + u32 rets[2]; + int ret; + + work_area = params->work_area; + token = rtas_function_token(RTAS_FN_IBM_PHYSICAL_ATTESTATION); + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + lockdep_assert_held(&rtas_ibm_physical_attestation_lock); + + do { + fwrc = rtas_call(token, 3, 3, rets, + rtas_work_area_phys(work_area), + params->cmd_len, + params->sequence); + } while (rtas_busy_delay(fwrc)); + + switch (fwrc) { + case RTAS_HARDWARE_ERROR: + ret = -EIO; + break; + case RTAS_INVALID_PARAMETER: + ret = -EINVAL; + break; + case RTAS_SEQ_MORE_DATA: + params->sequence = rets[0]; + fallthrough; + case RTAS_SEQ_COMPLETE: + params->written = rets[1]; + /* + * Kernel or firmware bug, do not continue. + */ + if (WARN(params->written > rtas_work_area_size(work_area), + "possible write beyond end of work area")) + ret = -EFAULT; + else + ret = 0; + break; + default: + ret = -EIO; + pr_err_ratelimited("unexpected ibm,get-phy_attest status %d\n", fwrc); + break; + } + + params->status = fwrc; + return ret; +} + +/* + * Internal physical-attestation sequence APIs. A physical-attestation + * sequence is a series of calls to get ibm,physical-attestation + * for a given attestation command. The sequence ends when an error + * is encountered or all data for the attestation command has been + * returned. + */ + +/** + * phy_attest_sequence_begin() - Begin a response data for attestation + * command retrieval sequence. + * @seq: user specified parameters for RTAS call from seq struct. + * + * Context: May sleep. + */ +static void phy_attest_sequence_begin(struct papr_rtas_sequence *seq) +{ + struct rtas_phy_attest_params *param; + + /* + * We could allocate the work area before acquiring the + * function lock, but that would allow concurrent requests to + * exhaust the limited work area pool for no benefit. So + * allocate the work area under the lock. + */ + mutex_lock(&rtas_ibm_physical_attestation_lock); + param = (struct rtas_phy_attest_params *)seq->params; + param->work_area = rtas_work_area_alloc(SZ_4K); + memcpy(rtas_work_area_raw_buf(param->work_area), ¶m->cmd, + param->cmd_len); + param->sequence = 1; + param->status = 0; +} + +/** + * phy_attest_sequence_end() - Finalize a attestation command + * response retrieval sequence. + * @seq: Sequence state. + * + * Releases resources obtained by phy_attest_sequence_begin(). + */ +static void phy_attest_sequence_end(struct papr_rtas_sequence *seq) +{ + struct rtas_phy_attest_params *param; + + param = (struct rtas_phy_attest_params *)seq->params; + rtas_work_area_free(param->work_area); + mutex_unlock(&rtas_ibm_physical_attestation_lock); + kfree(param); +} + +/* + * Generator function to be passed to papr_rtas_blob_generate(). + */ +static const char *phy_attest_sequence_fill_work_area(struct papr_rtas_sequence *seq, + size_t *len) +{ + struct rtas_phy_attest_params *p; + bool init_state; + + p = (struct rtas_phy_attest_params *)seq->params; + init_state = (p->written == 0) ? true : false; + + if (papr_rtas_sequence_should_stop(seq, p->status, init_state)) + return NULL; + if (papr_rtas_sequence_set_err(seq, rtas_physical_attestation(p))) + return NULL; + *len = p->written; + return rtas_work_area_raw_buf(p->work_area); +} + +static const struct file_operations papr_phy_attest_handle_ops = { + .read = papr_rtas_common_handle_read, + .llseek = papr_rtas_common_handle_seek, + .release = papr_rtas_common_handle_release, +}; + +/** + * papr_phy_attest_create_handle() - Create a fd-based handle for + * reading the response for the given attestation command. + * @ulc: Attestation command in user memory; defines the scope of + * data for the attestation command to retrieve. + * + * Handler for PAPR_PHYSICAL_ATTESTATION_IOC_CREATE_HANDLE ioctl + * command. Validates @ulc and instantiates an immutable response + * "blob" for attestation command. The blob is attached to a file + * descriptor for reading by user space. The memory backing the blob + * is freed when the file is released. + * + * The entire requested response buffer for the attestation command + * retrieved by this call and all necessary RTAS interactions are + * performed before returning the fd to user space. This keeps the + * read handler simple and ensures that kernel can prevent + * interleaving ibm,physical-attestation call sequences. + * + * Return: The installed fd number if successful, -ve errno otherwise. + */ +static long papr_phy_attest_create_handle(struct papr_phy_attest_io_block __user *ulc) +{ + struct rtas_phy_attest_params *params; + struct papr_rtas_sequence seq = {}; + int fd; + + /* + * Freed in phy_attest_sequence_end(). + */ + params = kzalloc(sizeof(*params), GFP_KERNEL_ACCOUNT); + if (!params) + return -ENOMEM; + + if (copy_from_user(¶ms->cmd, ulc, + sizeof(struct papr_phy_attest_io_block))) + return -EFAULT; + + params->cmd_len = be32_to_cpu(params->cmd.length); + seq = (struct papr_rtas_sequence) { + .begin = phy_attest_sequence_begin, + .end = phy_attest_sequence_end, + .work = phy_attest_sequence_fill_work_area, + }; + + seq.params = (void *)params; + + fd = papr_rtas_setup_file_interface(&seq, + &papr_phy_attest_handle_ops, + "[papr-physical-attestation]"); + + return fd; +} + +/* + * Top-level ioctl handler for /dev/papr-physical-attestation. + */ +static long papr_phy_attest_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (__force void __user *)arg; + long ret; + + switch (ioctl) { + case PAPR_PHY_ATTEST_IOC_HANDLE: + ret = papr_phy_attest_create_handle(argp); + break; + default: + ret = -ENOIOCTLCMD; + break; + } + return ret; +} + +static const struct file_operations papr_phy_attest_ops = { + .unlocked_ioctl = papr_phy_attest_dev_ioctl, +}; + +static struct miscdevice papr_phy_attest_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "papr-physical-attestation", + .fops = &papr_phy_attest_ops, +}; + +static __init int papr_phy_attest_init(void) +{ + if (!rtas_function_implemented(RTAS_FN_IBM_PHYSICAL_ATTESTATION)) + return -ENODEV; + + return misc_register(&papr_phy_attest_dev); +} +machine_device_initcall(pseries, papr_phy_attest_init); diff --git a/arch/powerpc/platforms/pseries/papr-platform-dump.c b/arch/powerpc/platforms/pseries/papr-platform-dump.c new file mode 100644 index 000000000000..f8d55eccdb6b --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr-platform-dump.c @@ -0,0 +1,411 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) "papr-platform-dump: " fmt + +#include <linux/anon_inodes.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <asm/machdep.h> +#include <asm/rtas-work-area.h> +#include <asm/rtas.h> +#include <uapi/asm/papr-platform-dump.h> + +/* + * Function-specific return values for ibm,platform-dump, derived from + * PAPR+ v2.13 7.3.3.4.1 "ibm,platform-dump RTAS Call". + */ +#define RTAS_IBM_PLATFORM_DUMP_COMPLETE 0 /* Complete dump retrieved. */ +#define RTAS_IBM_PLATFORM_DUMP_CONTINUE 1 /* Continue dump */ +#define RTAS_NOT_AUTHORIZED -9002 /* Not Authorized */ + +#define RTAS_IBM_PLATFORM_DUMP_START 2 /* Linux status to start dump */ + +/** + * struct ibm_platform_dump_params - Parameters (in and out) for + * ibm,platform-dump + * @work_area: In: work area buffer for results. + * @buf_length: In: work area buffer length in bytes + * @dump_tag_hi: In: Most-significant 32 bits of a Dump_Tag representing + * an id of the dump being processed. + * @dump_tag_lo: In: Least-significant 32 bits of a Dump_Tag representing + * an id of the dump being processed. + * @sequence_hi: In: Sequence number in most-significant 32 bits. + * Out: Next sequence number in most-significant 32 bits. + * @sequence_lo: In: Sequence number in Least-significant 32 bits + * Out: Next sequence number in Least-significant 32 bits. + * @bytes_ret_hi: Out: Bytes written in most-significant 32 bits. + * @bytes_ret_lo: Out: Bytes written in Least-significant 32 bits. + * @status: Out: RTAS call status. + * @list: Maintain the list of dumps are in progress. Can + * retrieve multiple dumps with different dump IDs at + * the same time but not with the same dump ID. This list + * is used to determine whether the dump for the same ID + * is in progress. + */ +struct ibm_platform_dump_params { + struct rtas_work_area *work_area; + u32 buf_length; + u32 dump_tag_hi; + u32 dump_tag_lo; + u32 sequence_hi; + u32 sequence_lo; + u32 bytes_ret_hi; + u32 bytes_ret_lo; + s32 status; + struct list_head list; +}; + +/* + * Multiple dumps with different dump IDs can be retrieved at the same + * time, but not with dame dump ID. platform_dump_list_mutex and + * platform_dump_list are used to prevent this behavior. + */ +static DEFINE_MUTEX(platform_dump_list_mutex); +static LIST_HEAD(platform_dump_list); + +/** + * rtas_ibm_platform_dump() - Call ibm,platform-dump to fill a work area + * buffer. + * @params: See &struct ibm_platform_dump_params. + * @buf_addr: Address of dump buffer (work_area) + * @buf_length: Length of the buffer in bytes (min. 1024) + * + * Calls ibm,platform-dump until it errors or successfully deposits data + * into the supplied work area. Handles RTAS retry statuses. Maps RTAS + * error statuses to reasonable errno values. + * + * Can request multiple dumps with different dump IDs at the same time, + * but not with the same dump ID which is prevented with the check in + * the ioctl code (papr_platform_dump_create_handle()). + * + * The caller should inspect @params.status to determine whether more + * calls are needed to complete the sequence. + * + * Context: May sleep. + * Return: -ve on error, 0 for dump complete and 1 for continue dump + */ +static int rtas_ibm_platform_dump(struct ibm_platform_dump_params *params, + phys_addr_t buf_addr, u32 buf_length) +{ + u32 rets[4]; + s32 fwrc; + int ret = 0; + + do { + fwrc = rtas_call(rtas_function_token(RTAS_FN_IBM_PLATFORM_DUMP), + 6, 5, + rets, + params->dump_tag_hi, + params->dump_tag_lo, + params->sequence_hi, + params->sequence_lo, + buf_addr, + buf_length); + } while (rtas_busy_delay(fwrc)); + + switch (fwrc) { + case RTAS_HARDWARE_ERROR: + ret = -EIO; + break; + case RTAS_NOT_AUTHORIZED: + ret = -EPERM; + break; + case RTAS_IBM_PLATFORM_DUMP_CONTINUE: + case RTAS_IBM_PLATFORM_DUMP_COMPLETE: + params->sequence_hi = rets[0]; + params->sequence_lo = rets[1]; + params->bytes_ret_hi = rets[2]; + params->bytes_ret_lo = rets[3]; + break; + default: + ret = -EIO; + pr_err_ratelimited("unexpected ibm,platform-dump status %d\n", + fwrc); + break; + } + + params->status = fwrc; + return ret; +} + +/* + * Platform dump is used with multiple RTAS calls to retrieve the + * complete dump for the provided dump ID. Once the complete dump is + * retrieved, the hypervisor returns dump complete status (0) for the + * last RTAS call and expects the caller issues one more call with + * NULL buffer to invalidate the dump so that the hypervisor can remove + * the dump. + * + * After the specific dump is invalidated in the hypervisor, expect the + * dump complete status for the new sequence - the user space initiates + * new request for the same dump ID. + */ +static ssize_t papr_platform_dump_handle_read(struct file *file, + char __user *buf, size_t size, loff_t *off) +{ + struct ibm_platform_dump_params *params = file->private_data; + u64 total_bytes; + s32 fwrc; + + /* + * Dump already completed with the previous read calls. + * In case if the user space issues further reads, returns + * -EINVAL. + */ + if (!params->buf_length) { + pr_warn_once("Platform dump completed for dump ID %llu\n", + (u64) (((u64)params->dump_tag_hi << 32) | + params->dump_tag_lo)); + return -EINVAL; + } + + /* + * The hypervisor returns status 0 if no more data available to + * download. The dump will be invalidated with ioctl (see below). + */ + if (params->status == RTAS_IBM_PLATFORM_DUMP_COMPLETE) { + params->buf_length = 0; + /* + * Returns 0 to the user space so that user + * space read stops. + */ + return 0; + } + + if (size < SZ_1K) { + pr_err_once("Buffer length should be minimum 1024 bytes\n"); + return -EINVAL; + } else if (size > params->buf_length) { + /* + * Allocate 4K work area. So if the user requests > 4K, + * resize the buffer length. + */ + size = params->buf_length; + } + + fwrc = rtas_ibm_platform_dump(params, + rtas_work_area_phys(params->work_area), + size); + if (fwrc < 0) + return fwrc; + + total_bytes = (u64) (((u64)params->bytes_ret_hi << 32) | + params->bytes_ret_lo); + + /* + * Kernel or firmware bug, do not continue. + */ + if (WARN(total_bytes > size, "possible write beyond end of work area")) + return -EFAULT; + + if (copy_to_user(buf, rtas_work_area_raw_buf(params->work_area), + total_bytes)) + return -EFAULT; + + return total_bytes; +} + +static int papr_platform_dump_handle_release(struct inode *inode, + struct file *file) +{ + struct ibm_platform_dump_params *params = file->private_data; + + if (params->work_area) + rtas_work_area_free(params->work_area); + + mutex_lock(&platform_dump_list_mutex); + list_del(¶ms->list); + mutex_unlock(&platform_dump_list_mutex); + + kfree(params); + file->private_data = NULL; + return 0; +} + +/* + * This ioctl is used to invalidate the dump assuming the user space + * issue this ioctl after obtain the complete dump. + * Issue the last RTAS call with NULL buffer to invalidate the dump + * which means dump will be freed in the hypervisor. + */ +static long papr_platform_dump_invalidate_ioctl(struct file *file, + unsigned int ioctl, unsigned long arg) +{ + struct ibm_platform_dump_params *params; + u64 __user *argp = (void __user *)arg; + u64 param_dump_tag, dump_tag; + + if (ioctl != PAPR_PLATFORM_DUMP_IOC_INVALIDATE) + return -ENOIOCTLCMD; + + if (get_user(dump_tag, argp)) + return -EFAULT; + + /* + * private_data is freeded during release(), so should not + * happen. + */ + if (!file->private_data) { + pr_err("No valid FD to invalidate dump for the ID(%llu)\n", + dump_tag); + return -EINVAL; + } + + params = file->private_data; + param_dump_tag = (u64) (((u64)params->dump_tag_hi << 32) | + params->dump_tag_lo); + if (dump_tag != param_dump_tag) { + pr_err("Invalid dump ID(%llu) to invalidate dump\n", + dump_tag); + return -EINVAL; + } + + if (params->status != RTAS_IBM_PLATFORM_DUMP_COMPLETE) { + pr_err("Platform dump is not complete, but requested " + "to invalidate dump for ID(%llu)\n", + dump_tag); + return -EINPROGRESS; + } + + return rtas_ibm_platform_dump(params, 0, 0); +} + +static const struct file_operations papr_platform_dump_handle_ops = { + .read = papr_platform_dump_handle_read, + .release = papr_platform_dump_handle_release, + .unlocked_ioctl = papr_platform_dump_invalidate_ioctl, +}; + +/** + * papr_platform_dump_create_handle() - Create a fd-based handle for + * reading platform dump + * + * Handler for PAPR_PLATFORM_DUMP_IOC_CREATE_HANDLE ioctl command + * Allocates RTAS parameter struct and work area and attached to the + * file descriptor for reading by user space with the multiple RTAS + * calls until the dump is completed. This memory allocation is freed + * when the file is released. + * + * Multiple dump requests with different IDs are allowed at the same + * time, but not with the same dump ID. So if the user space is + * already opened file descriptor for the specific dump ID, return + * -EALREADY for the next request. + * + * @dump_tag: Dump ID for the dump requested to retrieve from the + * hypervisor + * + * Return: The installed fd number if successful, -ve errno otherwise. + */ +static long papr_platform_dump_create_handle(u64 dump_tag) +{ + struct ibm_platform_dump_params *params; + u64 param_dump_tag; + struct file *file; + long err; + int fd; + + /* + * Return failure if the user space is already opened FD for + * the specific dump ID. This check will prevent multiple dump + * requests for the same dump ID at the same time. Generally + * should not expect this, but in case. + */ + list_for_each_entry(params, &platform_dump_list, list) { + param_dump_tag = (u64) (((u64)params->dump_tag_hi << 32) | + params->dump_tag_lo); + if (dump_tag == param_dump_tag) { + pr_err("Platform dump for ID(%llu) is already in progress\n", + dump_tag); + return -EALREADY; + } + } + + params = kzalloc(sizeof(struct ibm_platform_dump_params), + GFP_KERNEL_ACCOUNT); + if (!params) + return -ENOMEM; + + params->work_area = rtas_work_area_alloc(SZ_4K); + params->buf_length = SZ_4K; + params->dump_tag_hi = (u32)(dump_tag >> 32); + params->dump_tag_lo = (u32)(dump_tag & 0x00000000ffffffffULL); + params->status = RTAS_IBM_PLATFORM_DUMP_START; + + fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); + if (fd < 0) { + err = fd; + goto free_area; + } + + file = anon_inode_getfile_fmode("[papr-platform-dump]", + &papr_platform_dump_handle_ops, + (void *)params, O_RDONLY, + FMODE_LSEEK | FMODE_PREAD); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto put_fd; + } + + fd_install(fd, file); + + list_add(¶ms->list, &platform_dump_list); + + pr_info("%s (%d) initiated platform dump for dump tag %llu\n", + current->comm, current->pid, dump_tag); + return fd; +put_fd: + put_unused_fd(fd); +free_area: + rtas_work_area_free(params->work_area); + kfree(params); + return err; +} + +/* + * Top-level ioctl handler for /dev/papr-platform-dump. + */ +static long papr_platform_dump_dev_ioctl(struct file *filp, + unsigned int ioctl, + unsigned long arg) +{ + u64 __user *argp = (void __user *)arg; + u64 dump_tag; + long ret; + + if (get_user(dump_tag, argp)) + return -EFAULT; + + switch (ioctl) { + case PAPR_PLATFORM_DUMP_IOC_CREATE_HANDLE: + mutex_lock(&platform_dump_list_mutex); + ret = papr_platform_dump_create_handle(dump_tag); + mutex_unlock(&platform_dump_list_mutex); + break; + default: + ret = -ENOIOCTLCMD; + break; + } + return ret; +} + +static const struct file_operations papr_platform_dump_ops = { + .unlocked_ioctl = papr_platform_dump_dev_ioctl, +}; + +static struct miscdevice papr_platform_dump_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "papr-platform-dump", + .fops = &papr_platform_dump_ops, +}; + +static __init int papr_platform_dump_init(void) +{ + if (!rtas_function_implemented(RTAS_FN_IBM_PLATFORM_DUMP)) + return -ENODEV; + + return misc_register(&papr_platform_dump_dev); +} +machine_device_initcall(pseries, papr_platform_dump_init); diff --git a/arch/powerpc/platforms/pseries/papr-rtas-common.c b/arch/powerpc/platforms/pseries/papr-rtas-common.c new file mode 100644 index 000000000000..33c606e3378a --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr-rtas-common.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) "papr-common: " fmt + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/slab.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/anon_inodes.h> +#include <linux/sched/signal.h> +#include "papr-rtas-common.h" + +/* + * Sequence based RTAS HCALL has to issue multiple times to retrieve + * complete data from the hypervisor. For some of these RTAS calls, + * the OS should not interleave calls with different input until the + * sequence is completed. So data is collected for these calls during + * ioctl handle and export to user space with read() handle. + * This file provides common functions needed for such sequence based + * RTAS calls Ex: ibm,get-vpd and ibm,get-indices. + */ + +bool papr_rtas_blob_has_data(const struct papr_rtas_blob *blob) +{ + return blob->data && blob->len; +} + +void papr_rtas_blob_free(const struct papr_rtas_blob *blob) +{ + if (blob) { + kvfree(blob->data); + kfree(blob); + } +} + +/** + * papr_rtas_blob_extend() - Append data to a &struct papr_rtas_blob. + * @blob: The blob to extend. + * @data: The new data to append to @blob. + * @len: The length of @data. + * + * Context: May sleep. + * Return: -ENOMEM on allocation failure, 0 otherwise. + */ +static int papr_rtas_blob_extend(struct papr_rtas_blob *blob, + const char *data, size_t len) +{ + const size_t new_len = blob->len + len; + const size_t old_len = blob->len; + const char *old_ptr = blob->data; + char *new_ptr; + + new_ptr = kvrealloc(old_ptr, new_len, GFP_KERNEL_ACCOUNT); + if (!new_ptr) + return -ENOMEM; + + memcpy(&new_ptr[old_len], data, len); + blob->data = new_ptr; + blob->len = new_len; + return 0; +} + +/** + * papr_rtas_blob_generate() - Construct a new &struct papr_rtas_blob. + * @seq: work function of the caller that is called to obtain + * data with the caller RTAS call. + * + * The @work callback is invoked until it returns NULL. @seq is + * passed to @work in its first argument on each call. When + * @work returns data, it should store the data length in its + * second argument. + * + * Context: May sleep. + * Return: A completely populated &struct papr_rtas_blob, or NULL on error. + */ +static const struct papr_rtas_blob * +papr_rtas_blob_generate(struct papr_rtas_sequence *seq) +{ + struct papr_rtas_blob *blob; + const char *buf; + size_t len; + int err = 0; + + blob = kzalloc(sizeof(*blob), GFP_KERNEL_ACCOUNT); + if (!blob) + return NULL; + + if (!seq->work) + return ERR_PTR(-EINVAL); + + + while (err == 0 && (buf = seq->work(seq, &len))) + err = papr_rtas_blob_extend(blob, buf, len); + + if (err != 0 || !papr_rtas_blob_has_data(blob)) + goto free_blob; + + return blob; +free_blob: + papr_rtas_blob_free(blob); + return NULL; +} + +int papr_rtas_sequence_set_err(struct papr_rtas_sequence *seq, int err) +{ + /* Preserve the first error recorded. */ + if (seq->error == 0) + seq->error = err; + + return seq->error; +} + +/* + * Higher-level retrieval code below. These functions use the + * papr_rtas_blob_* and sequence_* APIs defined above to create fd-based + * handles for consumption by user space. + */ + +/** + * papr_rtas_run_sequence() - Run a single retrieval sequence. + * @seq: Functions of the caller to complete the sequence + * + * Context: May sleep. Holds a mutex and an RTAS work area for its + * duration. Typically performs multiple sleepable slab + * allocations. + * + * Return: A populated &struct papr_rtas_blob on success. Encoded error + * pointer otherwise. + */ +static const struct papr_rtas_blob *papr_rtas_run_sequence(struct papr_rtas_sequence *seq) +{ + const struct papr_rtas_blob *blob; + + if (seq->begin) + seq->begin(seq); + + blob = papr_rtas_blob_generate(seq); + if (!blob) + papr_rtas_sequence_set_err(seq, -ENOMEM); + + if (seq->end) + seq->end(seq); + + + if (seq->error) { + papr_rtas_blob_free(blob); + return ERR_PTR(seq->error); + } + + return blob; +} + +/** + * papr_rtas_retrieve() - Return the data blob that is exposed to + * user space. + * @seq: RTAS call specific functions to be invoked until the + * sequence is completed. + * + * Run sequences against @param until a blob is successfully + * instantiated, or a hard error is encountered, or a fatal signal is + * pending. + * + * Context: May sleep. + * Return: A fully populated data blob when successful. Encoded error + * pointer otherwise. + */ +const struct papr_rtas_blob *papr_rtas_retrieve(struct papr_rtas_sequence *seq) +{ + const struct papr_rtas_blob *blob; + + /* + * EAGAIN means the sequence returns error with a -4 (data + * changed and need to start the sequence) status from RTAS calls + * and we should attempt a new sequence. PAPR+ (v2.13 R1–7.3.20–5 + * - ibm,get-vpd, R1–7.3.17–6 - ibm,get-indices) indicates that + * this should be a transient condition, not something that + * happens continuously. But we'll stop trying on a fatal signal. + */ + do { + blob = papr_rtas_run_sequence(seq); + if (!IS_ERR(blob)) /* Success. */ + break; + if (PTR_ERR(blob) != -EAGAIN) /* Hard error. */ + break; + cond_resched(); + } while (!fatal_signal_pending(current)); + + return blob; +} + +/** + * papr_rtas_setup_file_interface - Complete the sequence and obtain + * the data and export to user space with fd-based handles. Then the + * user spave gets the data with read() handle. + * @seq: RTAS call specific functions to get the data. + * @fops: RTAS call specific file operations such as read(). + * @name: RTAS call specific char device node. + * + * Return: FD handle for consumption by user space + */ +long papr_rtas_setup_file_interface(struct papr_rtas_sequence *seq, + const struct file_operations *fops, + char *name) +{ + const struct papr_rtas_blob *blob; + struct file *file; + long ret; + int fd; + + blob = papr_rtas_retrieve(seq); + if (IS_ERR(blob)) + return PTR_ERR(blob); + + fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); + if (fd < 0) { + ret = fd; + goto free_blob; + } + + file = anon_inode_getfile_fmode(name, fops, (void *)blob, + O_RDONLY, FMODE_LSEEK | FMODE_PREAD); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto put_fd; + } + + fd_install(fd, file); + return fd; + +put_fd: + put_unused_fd(fd); +free_blob: + papr_rtas_blob_free(blob); + return ret; +} + +/* + * papr_rtas_sequence_should_stop() - Determine whether RTAS retrieval + * sequence should continue. + * + * Examines the sequence error state and outputs of the last call to + * the specific RTAS to determine whether the sequence in progress + * should continue or stop. + * + * Return: True if the sequence has encountered an error or if all data + * for this sequence has been retrieved. False otherwise. + */ +bool papr_rtas_sequence_should_stop(const struct papr_rtas_sequence *seq, + s32 status, bool init_state) +{ + bool done; + + if (seq->error) + return true; + + switch (status) { + case RTAS_SEQ_COMPLETE: + if (init_state) + done = false; /* Initial state. */ + else + done = true; /* All data consumed. */ + break; + case RTAS_SEQ_MORE_DATA: + done = false; /* More data available. */ + break; + default: + done = true; /* Error encountered. */ + break; + } + + return done; +} + +/* + * User space read to retrieve data for the corresponding RTAS call. + * papr_rtas_blob is filled with the data using the corresponding RTAS + * call sequence API. + */ +ssize_t papr_rtas_common_handle_read(struct file *file, + char __user *buf, size_t size, loff_t *off) +{ + const struct papr_rtas_blob *blob = file->private_data; + + /* We should not instantiate a handle without any data attached. */ + if (!papr_rtas_blob_has_data(blob)) { + pr_err_once("handle without data\n"); + return -EIO; + } + + return simple_read_from_buffer(buf, size, off, blob->data, blob->len); +} + +int papr_rtas_common_handle_release(struct inode *inode, + struct file *file) +{ + const struct papr_rtas_blob *blob = file->private_data; + + papr_rtas_blob_free(blob); + + return 0; +} + +loff_t papr_rtas_common_handle_seek(struct file *file, loff_t off, + int whence) +{ + const struct papr_rtas_blob *blob = file->private_data; + + return fixed_size_llseek(file, off, whence, blob->len); +} diff --git a/arch/powerpc/platforms/pseries/papr-rtas-common.h b/arch/powerpc/platforms/pseries/papr-rtas-common.h new file mode 100644 index 000000000000..4ceabcaf4905 --- /dev/null +++ b/arch/powerpc/platforms/pseries/papr-rtas-common.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_POWERPC_PAPR_RTAS_COMMON_H +#define _ASM_POWERPC_PAPR_RTAS_COMMON_H + +#include <linux/types.h> + +/* + * Return codes for sequence based RTAS calls. + * Not listed under PAPR+ v2.13 7.2.8: "Return Codes". + * But defined in the specific section of each RTAS call. + */ +#define RTAS_SEQ_COMPLETE 0 /* All data has been retrieved. */ +#define RTAS_SEQ_MORE_DATA 1 /* More data is available */ +#define RTAS_SEQ_START_OVER -4 /* Data changed, restart call sequence. */ + +/* + * Internal "blob" APIs for accumulating RTAS call results into + * an immutable buffer to be attached to a file descriptor. + */ +struct papr_rtas_blob { + const char *data; + size_t len; +}; + +/** + * struct papr_sequence - State for managing a sequence of RTAS calls. + * @error: Shall be zero as long as the sequence has not encountered an error, + * -ve errno otherwise. Use papr_rtas_sequence_set_err() to update. + * @params: Parameter block to pass to rtas_*() calls. + * @begin: Work area allocation and initialize the needed parameter + * values passed to RTAS call + * @end: Free the allocated work area + * @work: Obtain data with RTAS call and invoke it until the sequence is + * completed. + * + */ +struct papr_rtas_sequence { + int error; + void *params; + void (*begin)(struct papr_rtas_sequence *seq); + void (*end)(struct papr_rtas_sequence *seq); + const char *(*work)(struct papr_rtas_sequence *seq, size_t *len); +}; + +extern bool papr_rtas_blob_has_data(const struct papr_rtas_blob *blob); +extern void papr_rtas_blob_free(const struct papr_rtas_blob *blob); +extern int papr_rtas_sequence_set_err(struct papr_rtas_sequence *seq, + int err); +extern const struct papr_rtas_blob *papr_rtas_retrieve(struct papr_rtas_sequence *seq); +extern long papr_rtas_setup_file_interface(struct papr_rtas_sequence *seq, + const struct file_operations *fops, char *name); +extern bool papr_rtas_sequence_should_stop(const struct papr_rtas_sequence *seq, + s32 status, bool init_state); +extern ssize_t papr_rtas_common_handle_read(struct file *file, + char __user *buf, size_t size, loff_t *off); +extern int papr_rtas_common_handle_release(struct inode *inode, + struct file *file); +extern loff_t papr_rtas_common_handle_seek(struct file *file, loff_t off, + int whence); +#endif /* _ASM_POWERPC_PAPR_RTAS_COMMON_H */ + diff --git a/arch/powerpc/platforms/pseries/papr-vpd.c b/arch/powerpc/platforms/pseries/papr-vpd.c index c86950d7105a..f38c188fc4a1 100644 --- a/arch/powerpc/platforms/pseries/papr-vpd.c +++ b/arch/powerpc/platforms/pseries/papr-vpd.c @@ -2,7 +2,6 @@ #define pr_fmt(fmt) "papr-vpd: " fmt -#include <linux/anon_inodes.h> #include <linux/build_bug.h> #include <linux/file.h> #include <linux/fs.h> @@ -20,14 +19,7 @@ #include <asm/rtas-work-area.h> #include <asm/rtas.h> #include <uapi/asm/papr-vpd.h> - -/* - * Function-specific return values for ibm,get-vpd, derived from PAPR+ - * v2.13 7.3.20 "ibm,get-vpd RTAS Call". - */ -#define RTAS_IBM_GET_VPD_COMPLETE 0 /* All VPD has been retrieved. */ -#define RTAS_IBM_GET_VPD_MORE_DATA 1 /* More VPD is available. */ -#define RTAS_IBM_GET_VPD_START_OVER -4 /* VPD changed, restart call sequence. */ +#include "papr-rtas-common.h" /** * struct rtas_ibm_get_vpd_params - Parameters (in and out) for ibm,get-vpd. @@ -91,13 +83,14 @@ static int rtas_ibm_get_vpd(struct rtas_ibm_get_vpd_params *params) case RTAS_INVALID_PARAMETER: ret = -EINVAL; break; - case RTAS_IBM_GET_VPD_START_OVER: + case RTAS_SEQ_START_OVER: ret = -EAGAIN; + pr_info_ratelimited("VPD changed during retrieval, retrying\n"); break; - case RTAS_IBM_GET_VPD_MORE_DATA: + case RTAS_SEQ_MORE_DATA: params->sequence = rets[0]; fallthrough; - case RTAS_IBM_GET_VPD_COMPLETE: + case RTAS_SEQ_COMPLETE: params->written = rets[1]; /* * Kernel or firmware bug, do not continue. @@ -119,91 +112,6 @@ static int rtas_ibm_get_vpd(struct rtas_ibm_get_vpd_params *params) } /* - * Internal VPD "blob" APIs for accumulating ibm,get-vpd results into - * an immutable buffer to be attached to a file descriptor. - */ -struct vpd_blob { - const char *data; - size_t len; -}; - -static bool vpd_blob_has_data(const struct vpd_blob *blob) -{ - return blob->data && blob->len; -} - -static void vpd_blob_free(const struct vpd_blob *blob) -{ - if (blob) { - kvfree(blob->data); - kfree(blob); - } -} - -/** - * vpd_blob_extend() - Append data to a &struct vpd_blob. - * @blob: The blob to extend. - * @data: The new data to append to @blob. - * @len: The length of @data. - * - * Context: May sleep. - * Return: -ENOMEM on allocation failure, 0 otherwise. - */ -static int vpd_blob_extend(struct vpd_blob *blob, const char *data, size_t len) -{ - const size_t new_len = blob->len + len; - const size_t old_len = blob->len; - const char *old_ptr = blob->data; - char *new_ptr; - - new_ptr = kvrealloc(old_ptr, new_len, GFP_KERNEL_ACCOUNT); - if (!new_ptr) - return -ENOMEM; - - memcpy(&new_ptr[old_len], data, len); - blob->data = new_ptr; - blob->len = new_len; - return 0; -} - -/** - * vpd_blob_generate() - Construct a new &struct vpd_blob. - * @generator: Function that supplies the blob data. - * @arg: Context pointer supplied by caller, passed to @generator. - * - * The @generator callback is invoked until it returns NULL. @arg is - * passed to @generator in its first argument on each call. When - * @generator returns data, it should store the data length in its - * second argument. - * - * Context: May sleep. - * Return: A completely populated &struct vpd_blob, or NULL on error. - */ -static const struct vpd_blob * -vpd_blob_generate(const char * (*generator)(void *, size_t *), void *arg) -{ - struct vpd_blob *blob; - const char *buf; - size_t len; - int err = 0; - - blob = kzalloc(sizeof(*blob), GFP_KERNEL_ACCOUNT); - if (!blob) - return NULL; - - while (err == 0 && (buf = generator(arg, &len))) - err = vpd_blob_extend(blob, buf, len); - - if (err != 0 || !vpd_blob_has_data(blob)) - goto free_blob; - - return blob; -free_blob: - vpd_blob_free(blob); - return NULL; -} - -/* * Internal VPD sequence APIs. A VPD sequence is a series of calls to * ibm,get-vpd for a given location code. The sequence ends when an * error is encountered or all VPD for the location code has been @@ -211,30 +119,14 @@ free_blob: */ /** - * struct vpd_sequence - State for managing a VPD sequence. - * @error: Shall be zero as long as the sequence has not encountered an error, - * -ve errno otherwise. Use vpd_sequence_set_err() to update this. - * @params: Parameter block to pass to rtas_ibm_get_vpd(). - */ -struct vpd_sequence { - int error; - struct rtas_ibm_get_vpd_params params; -}; - -/** * vpd_sequence_begin() - Begin a VPD retrieval sequence. - * @seq: Uninitialized sequence state. - * @loc_code: Location code that defines the scope of the VPD to return. - * - * Initializes @seq with the resources necessary to carry out a VPD - * sequence. Callers must pass @seq to vpd_sequence_end() regardless - * of whether the sequence succeeds. + * @seq: vpd call parameters from sequence struct * * Context: May sleep. */ -static void vpd_sequence_begin(struct vpd_sequence *seq, - const struct papr_location_code *loc_code) +static void vpd_sequence_begin(struct papr_rtas_sequence *seq) { + struct rtas_ibm_get_vpd_params *vpd_params; /* * Use a static data structure for the location code passed to * RTAS to ensure it's in the RMA and avoid a separate work @@ -242,6 +134,7 @@ static void vpd_sequence_begin(struct vpd_sequence *seq, */ static struct papr_location_code static_loc_code; + vpd_params = (struct rtas_ibm_get_vpd_params *)seq->params; /* * We could allocate the work area before acquiring the * function lock, but that would allow concurrent requests to @@ -249,14 +142,12 @@ static void vpd_sequence_begin(struct vpd_sequence *seq, * allocate the work area under the lock. */ mutex_lock(&rtas_ibm_get_vpd_lock); - static_loc_code = *loc_code; - *seq = (struct vpd_sequence) { - .params = { - .work_area = rtas_work_area_alloc(SZ_4K), - .loc_code = &static_loc_code, - .sequence = 1, - }, - }; + static_loc_code = *(struct papr_location_code *)vpd_params->loc_code; + vpd_params = (struct rtas_ibm_get_vpd_params *)seq->params; + vpd_params->work_area = rtas_work_area_alloc(SZ_4K); + vpd_params->loc_code = &static_loc_code; + vpd_params->sequence = 1; + vpd_params->status = 0; } /** @@ -265,180 +156,39 @@ static void vpd_sequence_begin(struct vpd_sequence *seq, * * Releases resources obtained by vpd_sequence_begin(). */ -static void vpd_sequence_end(struct vpd_sequence *seq) +static void vpd_sequence_end(struct papr_rtas_sequence *seq) { - rtas_work_area_free(seq->params.work_area); - mutex_unlock(&rtas_ibm_get_vpd_lock); -} - -/** - * vpd_sequence_should_stop() - Determine whether a VPD retrieval sequence - * should continue. - * @seq: VPD sequence state. - * - * Examines the sequence error state and outputs of the last call to - * ibm,get-vpd to determine whether the sequence in progress should - * continue or stop. - * - * Return: True if the sequence has encountered an error or if all VPD for - * this sequence has been retrieved. False otherwise. - */ -static bool vpd_sequence_should_stop(const struct vpd_sequence *seq) -{ - bool done; - - if (seq->error) - return true; + struct rtas_ibm_get_vpd_params *vpd_params; - switch (seq->params.status) { - case 0: - if (seq->params.written == 0) - done = false; /* Initial state. */ - else - done = true; /* All data consumed. */ - break; - case 1: - done = false; /* More data available. */ - break; - default: - done = true; /* Error encountered. */ - break; - } - - return done; -} - -static int vpd_sequence_set_err(struct vpd_sequence *seq, int err) -{ - /* Preserve the first error recorded. */ - if (seq->error == 0) - seq->error = err; - - return seq->error; + vpd_params = (struct rtas_ibm_get_vpd_params *)seq->params; + rtas_work_area_free(vpd_params->work_area); + mutex_unlock(&rtas_ibm_get_vpd_lock); } /* - * Generator function to be passed to vpd_blob_generate(). + * Generator function to be passed to papr_rtas_blob_generate(). */ -static const char *vpd_sequence_fill_work_area(void *arg, size_t *len) +static const char *vpd_sequence_fill_work_area(struct papr_rtas_sequence *seq, + size_t *len) { - struct vpd_sequence *seq = arg; - struct rtas_ibm_get_vpd_params *p = &seq->params; + struct rtas_ibm_get_vpd_params *p; + bool init_state; - if (vpd_sequence_should_stop(seq)) + p = (struct rtas_ibm_get_vpd_params *)seq->params; + init_state = (p->written == 0) ? true : false; + + if (papr_rtas_sequence_should_stop(seq, p->status, init_state)) return NULL; - if (vpd_sequence_set_err(seq, rtas_ibm_get_vpd(p))) + if (papr_rtas_sequence_set_err(seq, rtas_ibm_get_vpd(p))) return NULL; *len = p->written; return rtas_work_area_raw_buf(p->work_area); } -/* - * Higher-level VPD retrieval code below. These functions use the - * vpd_blob_* and vpd_sequence_* APIs defined above to create fd-based - * VPD handles for consumption by user space. - */ - -/** - * papr_vpd_run_sequence() - Run a single VPD retrieval sequence. - * @loc_code: Location code that defines the scope of VPD to return. - * - * Context: May sleep. Holds a mutex and an RTAS work area for its - * duration. Typically performs multiple sleepable slab - * allocations. - * - * Return: A populated &struct vpd_blob on success. Encoded error - * pointer otherwise. - */ -static const struct vpd_blob *papr_vpd_run_sequence(const struct papr_location_code *loc_code) -{ - const struct vpd_blob *blob; - struct vpd_sequence seq; - - vpd_sequence_begin(&seq, loc_code); - blob = vpd_blob_generate(vpd_sequence_fill_work_area, &seq); - if (!blob) - vpd_sequence_set_err(&seq, -ENOMEM); - vpd_sequence_end(&seq); - - if (seq.error) { - vpd_blob_free(blob); - return ERR_PTR(seq.error); - } - - return blob; -} - -/** - * papr_vpd_retrieve() - Return the VPD for a location code. - * @loc_code: Location code that defines the scope of VPD to return. - * - * Run VPD sequences against @loc_code until a blob is successfully - * instantiated, or a hard error is encountered, or a fatal signal is - * pending. - * - * Context: May sleep. - * Return: A fully populated VPD blob when successful. Encoded error - * pointer otherwise. - */ -static const struct vpd_blob *papr_vpd_retrieve(const struct papr_location_code *loc_code) -{ - const struct vpd_blob *blob; - - /* - * EAGAIN means the sequence errored with a -4 (VPD changed) - * status from ibm,get-vpd, and we should attempt a new - * sequence. PAPR+ v2.13 R1–7.3.20–5 indicates that this - * should be a transient condition, not something that happens - * continuously. But we'll stop trying on a fatal signal. - */ - do { - blob = papr_vpd_run_sequence(loc_code); - if (!IS_ERR(blob)) /* Success. */ - break; - if (PTR_ERR(blob) != -EAGAIN) /* Hard error. */ - break; - pr_info_ratelimited("VPD changed during retrieval, retrying\n"); - cond_resched(); - } while (!fatal_signal_pending(current)); - - return blob; -} - -static ssize_t papr_vpd_handle_read(struct file *file, char __user *buf, size_t size, loff_t *off) -{ - const struct vpd_blob *blob = file->private_data; - - /* bug: we should not instantiate a handle without any data attached. */ - if (!vpd_blob_has_data(blob)) { - pr_err_once("handle without data\n"); - return -EIO; - } - - return simple_read_from_buffer(buf, size, off, blob->data, blob->len); -} - -static int papr_vpd_handle_release(struct inode *inode, struct file *file) -{ - const struct vpd_blob *blob = file->private_data; - - vpd_blob_free(blob); - - return 0; -} - -static loff_t papr_vpd_handle_seek(struct file *file, loff_t off, int whence) -{ - const struct vpd_blob *blob = file->private_data; - - return fixed_size_llseek(file, off, whence, blob->len); -} - - static const struct file_operations papr_vpd_handle_ops = { - .read = papr_vpd_handle_read, - .llseek = papr_vpd_handle_seek, - .release = papr_vpd_handle_release, + .read = papr_rtas_common_handle_read, + .llseek = papr_rtas_common_handle_seek, + .release = papr_rtas_common_handle_release, }; /** @@ -460,10 +210,9 @@ static const struct file_operations papr_vpd_handle_ops = { */ static long papr_vpd_create_handle(struct papr_location_code __user *ulc) { + struct rtas_ibm_get_vpd_params vpd_params = {}; + struct papr_rtas_sequence seq = {}; struct papr_location_code klc; - const struct vpd_blob *blob; - struct file *file; - long err; int fd; if (copy_from_user(&klc, ulc, sizeof(klc))) @@ -472,30 +221,19 @@ static long papr_vpd_create_handle(struct papr_location_code __user *ulc) if (!string_is_terminated(klc.str, ARRAY_SIZE(klc.str))) return -EINVAL; - blob = papr_vpd_retrieve(&klc); - if (IS_ERR(blob)) - return PTR_ERR(blob); + seq = (struct papr_rtas_sequence) { + .begin = vpd_sequence_begin, + .end = vpd_sequence_end, + .work = vpd_sequence_fill_work_area, + }; - fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); - if (fd < 0) { - err = fd; - goto free_blob; - } + vpd_params.loc_code = &klc; + seq.params = (void *)&vpd_params; + + fd = papr_rtas_setup_file_interface(&seq, &papr_vpd_handle_ops, + "[papr-vpd]"); - file = anon_inode_getfile_fmode("[papr-vpd]", &papr_vpd_handle_ops, - (void *)blob, O_RDONLY, - FMODE_LSEEK | FMODE_PREAD); - if (IS_ERR(file)) { - err = PTR_ERR(file); - goto put_fd; - } - fd_install(fd, file); return fd; -put_fd: - put_unused_fd(fd); -free_blob: - vpd_blob_free(blob); - return err; } /* diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index 47db732981a8..e22fc638dbc7 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -138,7 +138,7 @@ static void __cpm2_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask, out_be32(&iop->dat, cpm2_gc->cpdata); } -static void cpm2_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value) +static int cpm2_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value) { struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(gc); @@ -150,6 +150,8 @@ static void cpm2_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value) __cpm2_gpio32_set(mm_gc, pin_mask, value); spin_unlock_irqrestore(&cpm2_gc->lock, flags); + + return 0; } static int cpm2_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) @@ -208,7 +210,7 @@ int cpm2_gpiochip_add32(struct device *dev) gc->direction_input = cpm2_gpio32_dir_in; gc->direction_output = cpm2_gpio32_dir_out; gc->get = cpm2_gpio32_get; - gc->set = cpm2_gpio32_set; + gc->set_rv = cpm2_gpio32_set; gc->parent = dev; gc->owner = THIS_MODULE; diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 4afbab83a2e2..c706a08e9955 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -27,6 +27,7 @@ #include <linux/spinlock.h> #include <linux/pci.h> #include <linux/slab.h> +#include <linux/string_choices.h> #include <linux/syscore_ops.h> #include <linux/ratelimit.h> #include <linux/pgtable.h> @@ -474,9 +475,9 @@ static void __init mpic_scan_ht_msi(struct mpic *mpic, u8 __iomem *devbase, addr = addr | ((u64)readl(base + HT_MSI_ADDR_HI) << 32); } - printk(KERN_DEBUG "mpic: - HT:%02x.%x %s MSI mapping found @ 0x%llx\n", - PCI_SLOT(devfn), PCI_FUNC(devfn), - flags & HT_MSI_FLAGS_ENABLE ? "enabled" : "disabled", addr); + pr_debug("mpic: - HT:%02x.%x %s MSI mapping found @ 0x%llx\n", + PCI_SLOT(devfn), PCI_FUNC(devfn), + str_enabled_disabled(flags & HT_MSI_FLAGS_ENABLE), addr); if (!(flags & HT_MSI_FLAGS_ENABLE)) writeb(flags | HT_MSI_FLAGS_ENABLE, base + HT_MSI_FLAGS); diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 88abffa8b54c..cb3a3244ae6f 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1770,7 +1770,7 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr, sp + STACK_INT_FRAME_REGS); break; } - printf("--- Exception: %lx %s at ", regs.trap, + printf("---- Exception: %lx %s at ", regs.trap, getvecname(TRAP(®s))); pc = regs.nip; lr = regs.link; |