diff options
30 files changed, 562 insertions, 235 deletions
diff --git a/Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml b/Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml index a8d40c766dcd..0bea4f5287ce 100644 --- a/Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml +++ b/Documentation/devicetree/bindings/bus/microsoft,vmbus.yaml @@ -10,8 +10,8 @@ maintainers: - Saurabh Sengar <ssengar@linux.microsoft.com> description: - VMBus is a software bus that implement the protocols for communication - between the root or host OS and guest OSs (virtual machines). + VMBus is a software bus that implements the protocols for communication + between the root or host OS and guest OS'es (virtual machines). properties: compatible: @@ -25,9 +25,16 @@ properties: '#size-cells': const: 1 + dma-coherent: true + + interrupts: + maxItems: 1 + description: Interrupt is used to report a message from the host. + required: - compatible - ranges + - interrupts - '#address-cells' - '#size-cells' @@ -35,6 +42,8 @@ additionalProperties: false examples: - | + #include <dt-bindings/interrupt-controller/irq.h> + #include <dt-bindings/interrupt-controller/arm-gic.h> soc { #address-cells = <2>; #size-cells = <1>; @@ -49,6 +58,9 @@ examples: #address-cells = <2>; #size-cells = <1>; ranges = <0x0f 0xf0000000 0x0f 0xf0000000 0x10000000>; + dma-coherent; + interrupt-parent = <&gic>; + interrupts = <GIC_PPI 2 IRQ_TYPE_EDGE_RISING>; }; }; }; diff --git a/Documentation/virt/hyperv/vmbus.rst b/Documentation/virt/hyperv/vmbus.rst index 1dcef6a7fda3..654bb4849972 100644 --- a/Documentation/virt/hyperv/vmbus.rst +++ b/Documentation/virt/hyperv/vmbus.rst @@ -250,10 +250,18 @@ interrupts are not Linux IRQs, there are no entries in /proc/interrupts or /proc/irq corresponding to individual VMBus channel interrupts. An online CPU in a Linux guest may not be taken offline if it has -VMBus channel interrupts assigned to it. Any such channel -interrupts must first be manually reassigned to another CPU as -described above. When no channel interrupts are assigned to the -CPU, it can be taken offline. +VMBus channel interrupts assigned to it. Starting in kernel v6.15, +any such interrupts are automatically reassigned to some other CPU +at the time of offlining. The "other" CPU is chosen by the +implementation and is not load balanced or otherwise intelligently +determined. If the CPU is onlined again, channel interrupts previously +assigned to it are not moved back. As a result, after multiple CPUs +have been offlined, and perhaps onlined again, the interrupt-to-CPU +mapping may be scrambled and non-optimal. In such a case, optimal +assignments must be re-established manually. For kernels v6.14 and +earlier, any conflicting channel interrupts must first be manually +reassigned to another CPU as described above. Then when no channel +interrupts are assigned to the CPU, it can be taken offline. The VMBus channel interrupt handling code is designed to work correctly even if an interrupt is received on a CPU other than the @@ -324,3 +332,15 @@ rescinded, neither Hyper-V nor Linux retains any state about its previous existence. Such a device might be re-added later, in which case it is treated as an entirely new device. See vmbus_onoffer_rescind(). + +For some devices, such as the KVP device, Hyper-V automatically +sends a rescind message when the primary channel is closed, +likely as a result of unbinding the device from its driver. +The rescind causes Linux to remove the device. But then Hyper-V +immediately reoffers the device to the guest, causing a new +instance of the device to be created in Linux. For other +devices, such as the synthetic SCSI and NIC devices, closing the +primary channel does *not* result in Hyper-V sending a rescind +message. The device continues to exist in Linux on the VMBus, +but with no driver bound to it. The same driver or a new driver +can subsequently be bound to the existing instance of the device. diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c index 4e27cc29c79e..4fdc26ade1d7 100644 --- a/arch/arm64/hyperv/mshyperv.c +++ b/arch/arm64/hyperv/mshyperv.c @@ -28,6 +28,48 @@ int hv_get_hypervisor_version(union hv_hypervisor_version_info *info) } EXPORT_SYMBOL_GPL(hv_get_hypervisor_version); +#ifdef CONFIG_ACPI + +static bool __init hyperv_detect_via_acpi(void) +{ + if (acpi_disabled) + return false; + /* + * Hypervisor ID is only available in ACPI v6+, and the + * structure layout was extended in v6 to accommodate that + * new field. + * + * At the very minimum, this check makes sure not to read + * past the FADT structure. + * + * It is also needed to catch running in some unknown + * non-Hyper-V environment that has ACPI 5.x or less. + * In such a case, it can't be Hyper-V. + */ + if (acpi_gbl_FADT.header.revision < 6) + return false; + return strncmp((char *)&acpi_gbl_FADT.hypervisor_id, "MsHyperV", 8) == 0; +} + +#else + +static bool __init hyperv_detect_via_acpi(void) +{ + return false; +} + +#endif + +static bool __init hyperv_detect_via_smccc(void) +{ + uuid_t hyperv_uuid = UUID_INIT( + 0x58ba324d, 0x6447, 0x24cd, + 0x75, 0x6c, 0xef, 0x8e, + 0x24, 0x70, 0x59, 0x16); + + return arm_smccc_hypervisor_has_uuid(&hyperv_uuid); +} + static int __init hyperv_init(void) { struct hv_get_vp_registers_output result; @@ -36,13 +78,11 @@ static int __init hyperv_init(void) /* * Allow for a kernel built with CONFIG_HYPERV to be running in - * a non-Hyper-V environment, including on DT instead of ACPI. + * a non-Hyper-V environment. + * * In such cases, do nothing and return success. */ - if (acpi_disabled) - return 0; - - if (strncmp((char *)&acpi_gbl_FADT.hypervisor_id, "MsHyperV", 8)) + if (!hyperv_detect_via_acpi() && !hyperv_detect_via_smccc()) return 0; /* Setup the guest ID */ @@ -77,6 +117,9 @@ static int __init hyperv_init(void) if (ms_hyperv.priv_high & HV_ACCESS_PARTITION_ID) hv_get_partition_id(); + ms_hyperv.vtl = get_vtl(); + if (ms_hyperv.vtl > 0) /* non default VTL */ + pr_info("Linux runs in Hyper-V Virtual Trust Level %d\n", ms_hyperv.vtl); ms_hyperv_late_init(); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 569941eeb3fe..58c5fe7d7572 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -270,6 +270,7 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) u32 feature; u8 action; gpa_t gpa; + uuid_t uuid; action = kvm_smccc_get_action(vcpu, func_id); switch (action) { @@ -355,10 +356,11 @@ int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) val[0] = gpa; break; case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID: - val[0] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0; - val[1] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1; - val[2] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2; - val[3] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3; + uuid = ARM_SMCCC_VENDOR_HYP_UID_KVM; + val[0] = smccc_uuid_to_reg(&uuid, 0); + val[1] = smccc_uuid_to_reg(&uuid, 1); + val[2] = smccc_uuid_to_reg(&uuid, 2); + val[3] = smccc_uuid_to_reg(&uuid, 3); break; case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID: val[0] = smccc_feat->vendor_hyp_bmap; diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index fbc1215d2746..b6db4e0b936b 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -869,12 +869,12 @@ static void *snp_alloc_vmsa_page(int cpu) return page_address(p + 1); } -static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) +static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip, unsigned int cpu) { struct sev_es_save_area *cur_vmsa, *vmsa; struct svsm_ca *caa; u8 sipi_vector; - int cpu, ret; + int ret; u64 cr4; /* @@ -895,15 +895,6 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) /* Override start_ip with known protected guest start IP */ start_ip = real_mode_header->sev_es_trampoline_start; - - /* Find the logical CPU for the APIC ID */ - for_each_present_cpu(cpu) { - if (arch_match_cpu_phys_id(cpu, apic_id)) - break; - } - if (cpu >= nr_cpu_ids) - return -EINVAL; - cur_vmsa = per_cpu(sev_vmsa, cpu); /* diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 5d27194a2efa..3d1d3547095a 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -391,40 +391,6 @@ static void __init hv_stimer_setup_percpu_clockev(void) old_setup_percpu_clockev(); } -#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) -static u8 __init get_vtl(void) -{ - u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS; - struct hv_input_get_vp_registers *input; - struct hv_output_get_vp_registers *output; - unsigned long flags; - u64 ret; - - local_irq_save(flags); - input = *this_cpu_ptr(hyperv_pcpu_input_arg); - output = *this_cpu_ptr(hyperv_pcpu_output_arg); - - memset(input, 0, struct_size(input, names, 1)); - input->partition_id = HV_PARTITION_ID_SELF; - input->vp_index = HV_VP_INDEX_SELF; - input->input_vtl.as_uint8 = 0; - input->names[0] = HV_REGISTER_VSM_VP_STATUS; - - ret = hv_do_hypercall(control, input, output); - if (hv_result_success(ret)) { - ret = output->values[0].reg8 & HV_X64_VTL_MASK; - } else { - pr_err("Failed to get VTL(error: %lld) exiting...\n", ret); - BUG(); - } - - local_irq_restore(flags); - return ret; -} -#else -static inline u8 get_vtl(void) { return 0; } -#endif - /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -707,3 +673,36 @@ bool hv_is_hyperv_initialized(void) return hypercall_msr.enable; } EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); + +int hv_apicid_to_vp_index(u32 apic_id) +{ + u64 control; + u64 status; + unsigned long irq_flags; + struct hv_get_vp_from_apic_id_in *input; + u32 *output, ret; + + local_irq_save(irq_flags); + + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + memset(input, 0, sizeof(*input)); + input->partition_id = HV_PARTITION_ID_SELF; + input->apic_ids[0] = apic_id; + + output = *this_cpu_ptr(hyperv_pcpu_output_arg); + + control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_INDEX_FROM_APIC_ID; + status = hv_do_hypercall(control, input, output); + ret = output[0]; + + local_irq_restore(irq_flags); + + if (!hv_result_success(status)) { + pr_err("failed to get vp index from apic id %d, status %#llx\n", + apic_id, status); + return -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL_GPL(hv_apicid_to_vp_index); diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 4580936dcb03..042e8712d8de 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -56,7 +56,12 @@ static void __noreturn hv_vtl_restart(char __maybe_unused *cmd) void __init hv_vtl_init_platform(void) { - pr_info("Linux runs in Hyper-V Virtual Trust Level\n"); + /* + * This function is a no-op if the VTL mode is not enabled. + * If it is, this function runs if and only the kernel boots in + * VTL2 which the x86 hv initialization path makes sure of. + */ + pr_info("Linux runs in Hyper-V Virtual Trust Level %d\n", ms_hyperv.vtl); x86_platform.realmode_reserve = x86_init_noop; x86_platform.realmode_init = x86_init_noop; @@ -207,63 +212,23 @@ free_lock: return ret; } -static int hv_vtl_apicid_to_vp_id(u32 apic_id) -{ - u64 control; - u64 status; - unsigned long irq_flags; - struct hv_get_vp_from_apic_id_in *input; - u32 *output, ret; - - local_irq_save(irq_flags); - - input = *this_cpu_ptr(hyperv_pcpu_input_arg); - memset(input, 0, sizeof(*input)); - input->partition_id = HV_PARTITION_ID_SELF; - input->apic_ids[0] = apic_id; - - output = *this_cpu_ptr(hyperv_pcpu_output_arg); - - control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID; - status = hv_do_hypercall(control, input, output); - ret = output[0]; - - local_irq_restore(irq_flags); - - if (!hv_result_success(status)) { - pr_err("failed to get vp id from apic id %d, status %#llx\n", - apic_id, status); - return -EINVAL; - } - - return ret; -} - -static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) +static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip, unsigned int cpu) { - int vp_id, cpu; - - /* Find the logical CPU for the APIC ID */ - for_each_present_cpu(cpu) { - if (arch_match_cpu_phys_id(cpu, apicid)) - break; - } - if (cpu >= nr_cpu_ids) - return -EINVAL; + int vp_index; pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid); - vp_id = hv_vtl_apicid_to_vp_id(apicid); + vp_index = hv_apicid_to_vp_index(apicid); - if (vp_id < 0) { + if (vp_index < 0) { pr_err("Couldn't find CPU with APIC ID %d\n", apicid); return -EINVAL; } - if (vp_id > ms_hyperv.max_vp_index) { - pr_err("Invalid CPU id %d for APIC ID %d\n", vp_id, apicid); + if (vp_index > ms_hyperv.max_vp_index) { + pr_err("Invalid CPU id %d for APIC ID %d\n", vp_index, apicid); return -EINVAL; } - return hv_vtl_bringup_vcpu(vp_id, cpu, start_eip); + return hv_vtl_bringup_vcpu(vp_index, cpu, start_eip); } int __init hv_vtl_early_init(void) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 09a165a3c41e..e93a2f488ff7 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -9,6 +9,7 @@ #include <linux/bitfield.h> #include <linux/types.h> #include <linux/slab.h> +#include <linux/cpu.h> #include <asm/svm.h> #include <asm/sev.h> #include <asm/io.h> @@ -289,7 +290,7 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) free_page((unsigned long)vmsa); } -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu) { struct sev_es_save_area *vmsa = (struct sev_es_save_area *) __get_free_page(GFP_KERNEL | __GFP_ZERO); @@ -298,10 +299,16 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) u64 ret, retry = 5; struct hv_enable_vp_vtl *start_vp_input; unsigned long flags; + int vp_index; if (!vmsa) return -ENOMEM; + /* Find the Hyper-V VP index which might be not the same as APIC ID */ + vp_index = hv_apicid_to_vp_index(apic_id); + if (vp_index < 0 || vp_index > ms_hyperv.max_vp_index) + return -EINVAL; + native_store_gdt(&gdtr); vmsa->gdtr.base = gdtr.address; @@ -349,7 +356,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; memset(start_vp_input, 0, sizeof(*start_vp_input)); start_vp_input->partition_id = -1; - start_vp_input->vp_index = cpu; + start_vp_input->vp_index = vp_index; start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 68e10e30fe9b..23d86c9750b9 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -313,9 +313,9 @@ struct apic { u32 (*get_apic_id)(u32 id); /* wakeup_secondary_cpu */ - int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip); + int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip, unsigned int cpu); /* wakeup secondary CPU using 64-bit wakeup point */ - int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip); + int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu); char *name; }; @@ -333,8 +333,8 @@ struct apic_override { void (*send_IPI_self)(int vector); u64 (*icr_read)(void); void (*icr_write)(u32 low, u32 high); - int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip); - int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip); + int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip, unsigned int cpu); + int (*wakeup_secondary_cpu_64)(u32 apicid, unsigned long start_eip, unsigned int cpu); }; /* diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 778444310cfb..e1752ba47e67 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -269,11 +269,12 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); #ifdef CONFIG_AMD_MEM_ENCRYPT bool hv_ghcb_negotiate_protocol(void); void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip); +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, unsigned int cpu); #else static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} -static inline int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) { return 0; } +static inline int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip, + unsigned int cpu) { return 0; } #endif #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) @@ -307,6 +308,7 @@ static __always_inline u64 hv_raw_get_msr(unsigned int reg) { return native_rdmsrq(reg); } +int hv_apicid_to_vp_index(u32 apic_id); #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} @@ -328,6 +330,7 @@ static inline void hv_set_msr(unsigned int reg, u64 value) { } static inline u64 hv_get_msr(unsigned int reg) { return 0; } static inline void hv_set_non_nested_msr(unsigned int reg, u64 value) { } static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; } +static inline int hv_apicid_to_vp_index(u32 apic_id) { return -EINVAL; } #endif /* CONFIG_HYPERV */ diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index f36f28405dcc..6d7603511f52 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -126,7 +126,7 @@ static int __init acpi_mp_setup_reset(u64 reset_vector) return 0; } -static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip) +static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip, unsigned int cpu) { if (!acpi_mp_wake_mailbox_paddr) { pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n"); diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index b5bb7a2e8340..58abb941c45b 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -27,7 +27,13 @@ static void noop_send_IPI_allbutself(int vector) { } static void noop_send_IPI_all(int vector) { } static void noop_send_IPI_self(int vector) { } static void noop_apic_icr_write(u32 low, u32 id) { } -static int noop_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) { return -1; } + +static int noop_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip, + unsigned int cpu) +{ + return -1; +} + static u64 noop_apic_icr_read(void) { return 0; } static u32 noop_get_apic_id(u32 apicid) { return 0; } static void noop_apic_eoi(void) { } diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index e272bc7fdc8e..5c5be2d58242 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -57,7 +57,7 @@ static void numachip2_apic_icr_write(int apicid, unsigned int val) numachip2_write32_lcsr(NUMACHIP2_APIC_ICR, (apicid << 12) | val); } -static int numachip_wakeup_secondary(u32 phys_apicid, unsigned long start_rip) +static int numachip_wakeup_secondary(u32 phys_apicid, unsigned long start_rip, unsigned int cpu) { numachip_apic_icr_write(phys_apicid, APIC_DM_INIT); numachip_apic_icr_write(phys_apicid, APIC_DM_STARTUP | diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 7fef504ca508..15209f220e1f 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -667,7 +667,7 @@ static __init void build_uv_gr_table(void) } } -static int uv_wakeup_secondary(u32 phys_apicid, unsigned long start_rip) +static int uv_wakeup_secondary(u32 phys_apicid, unsigned long start_rip, unsigned int cpu) { unsigned long val; int pnode; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 1ba92ac9441d..fc78c2325fd2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -695,7 +695,7 @@ static void send_init_sequence(u32 phys_apicid) /* * Wake up AP by INIT, INIT, STARTUP sequence. */ -static int wakeup_secondary_cpu_via_init(u32 phys_apicid, unsigned long start_eip) +static int wakeup_secondary_cpu_via_init(u32 phys_apicid, unsigned long start_eip, unsigned int cpu) { unsigned long send_status = 0, accept_status = 0; int num_starts, j, maxlvt; @@ -842,7 +842,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) * Returns zero if startup was successfully sent, else error code from * ->wakeup_secondary_cpu. */ -static int do_boot_cpu(u32 apicid, int cpu, struct task_struct *idle) +static int do_boot_cpu(u32 apicid, unsigned int cpu, struct task_struct *idle) { unsigned long start_ip = real_mode_header->trampoline_start; int ret; @@ -896,11 +896,11 @@ static int do_boot_cpu(u32 apicid, int cpu, struct task_struct *idle) * - Use an INIT boot APIC message */ if (apic->wakeup_secondary_cpu_64) - ret = apic->wakeup_secondary_cpu_64(apicid, start_ip); + ret = apic->wakeup_secondary_cpu_64(apicid, start_ip, cpu); else if (apic->wakeup_secondary_cpu) - ret = apic->wakeup_secondary_cpu(apicid, start_ip); + ret = apic->wakeup_secondary_cpu(apicid, start_ip, cpu); else - ret = wakeup_secondary_cpu_via_init(apicid, start_ip); + ret = wakeup_secondary_cpu_via_init(apicid, start_ip, cpu); /* If the wakeup mechanism failed, cleanup the warm reset vector */ if (ret) diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c index 1687483ff319..76a856c32c4d 100644 --- a/drivers/acpi/irq.c +++ b/drivers/acpi/irq.c @@ -12,7 +12,7 @@ enum acpi_irq_model_id acpi_irq_model; -static struct fwnode_handle *(*acpi_get_gsi_domain_id)(u32 gsi); +static acpi_gsi_domain_disp_fn acpi_get_gsi_domain_id; static u32 (*acpi_gsi_to_irq_fallback)(u32 gsi); /** @@ -307,12 +307,24 @@ EXPORT_SYMBOL_GPL(acpi_irq_get); * for a given GSI */ void __init acpi_set_irq_model(enum acpi_irq_model_id model, - struct fwnode_handle *(*fn)(u32)) + acpi_gsi_domain_disp_fn fn) { acpi_irq_model = model; acpi_get_gsi_domain_id = fn; } +/* + * acpi_get_gsi_dispatcher() - Get the GSI dispatcher function + * + * Return the dispatcher function that computes the domain fwnode for + * a given GSI. + */ +acpi_gsi_domain_disp_fn acpi_get_gsi_dispatcher(void) +{ + return acpi_get_gsi_domain_id; +} +EXPORT_SYMBOL_GPL(acpi_get_gsi_dispatcher); + /** * acpi_set_gsi_to_irq_fallback - Register a GSI transfer * callback to fallback to arch specified implementation. diff --git a/drivers/firmware/smccc/kvm_guest.c b/drivers/firmware/smccc/kvm_guest.c index a123c05cbc9e..49e1de83d2e8 100644 --- a/drivers/firmware/smccc/kvm_guest.c +++ b/drivers/firmware/smccc/kvm_guest.c @@ -17,17 +17,11 @@ static DECLARE_BITMAP(__kvm_arm_hyp_services, ARM_SMCCC_KVM_NUM_FUNCS) __ro_afte void __init kvm_init_hyp_services(void) { + uuid_t kvm_uuid = ARM_SMCCC_VENDOR_HYP_UID_KVM; struct arm_smccc_res res; u32 val[4]; - if (arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_HVC) - return; - - arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, &res); - if (res.a0 != ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 || - res.a1 != ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 || - res.a2 != ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 || - res.a3 != ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3) + if (!arm_smccc_hypervisor_has_uuid(&kvm_uuid)) return; memset(&res, 0, sizeof(res)); diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c index a74600d9f2d7..cd65b434dc6e 100644 --- a/drivers/firmware/smccc/smccc.c +++ b/drivers/firmware/smccc/smccc.c @@ -67,6 +67,23 @@ s32 arm_smccc_get_soc_id_revision(void) } EXPORT_SYMBOL_GPL(arm_smccc_get_soc_id_revision); +bool arm_smccc_hypervisor_has_uuid(const uuid_t *hyp_uuid) +{ + struct arm_smccc_res res = {}; + uuid_t uuid; + + if (arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_HVC) + return false; + + arm_smccc_1_1_hvc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, &res); + if (res.a0 == SMCCC_RET_NOT_SUPPORTED) + return false; + + uuid = smccc_res_to_uuid(res.a0, res.a1, res.a2, res.a3); + return uuid_equal(&uuid, hyp_uuid); +} +EXPORT_SYMBOL_GPL(arm_smccc_hypervisor_has_uuid); + static int __init smccc_devices_init(void) { struct platform_device *pdev; diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 6c1416167bd2..1cd188b73b74 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -5,17 +5,18 @@ menu "Microsoft Hyper-V guest support" config HYPERV tristate "Microsoft Hyper-V client drivers" depends on (X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \ - || (ACPI && ARM64 && !CPU_BIG_ENDIAN) + || (ARM64 && !CPU_BIG_ENDIAN) select PARAVIRT select X86_HV_CALLBACK_VECTOR if X86 select OF_EARLY_FLATTREE if OF + select SYSFB if !HYPERV_VTL_MODE help Select this option to run Linux as a Hyper-V client operating system. config HYPERV_VTL_MODE bool "Enable Linux to boot in VTL context" - depends on X86_64 && HYPERV + depends on (X86_64 || ARM64) && HYPERV depends on SMP default n help @@ -31,7 +32,7 @@ config HYPERV_VTL_MODE Select this option to build a Linux kernel to run at a VTL other than the normal VTL0, which currently is only VTL2. This option - initializes the x86 platform for VTL2, and adds the ability to boot + initializes the kernel to run in VTL2, and adds the ability to boot secondary CPUs directly into 64-bit context as required for VTLs other than 0. A kernel built with this option must run at VTL2, and will not run as a normal guest. diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 8351360bba16..be490c598785 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -207,10 +207,19 @@ int vmbus_connect(void) mutex_init(&vmbus_connection.channel_mutex); /* + * The following Hyper-V interrupt and monitor pages can be used by + * UIO for mapping to user-space, so they should always be allocated on + * system page boundaries. The system page size must be >= the Hyper-V + * page size. + */ + BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); + + /* * Setup the vmbus event connection for channel interrupt * abstraction stuff */ - vmbus_connection.int_page = hv_alloc_hyperv_zeroed_page(); + vmbus_connection.int_page = + (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); if (vmbus_connection.int_page == NULL) { ret = -ENOMEM; goto cleanup; @@ -225,8 +234,8 @@ int vmbus_connect(void) * Setup the monitor notification facility. The 1st page for * parent->child and the 2nd page for child->parent */ - vmbus_connection.monitor_pages[0] = hv_alloc_hyperv_page(); - vmbus_connection.monitor_pages[1] = hv_alloc_hyperv_page(); + vmbus_connection.monitor_pages[0] = (void *)__get_free_page(GFP_KERNEL); + vmbus_connection.monitor_pages[1] = (void *)__get_free_page(GFP_KERNEL); if ((vmbus_connection.monitor_pages[0] == NULL) || (vmbus_connection.monitor_pages[1] == NULL)) { ret = -ENOMEM; @@ -342,21 +351,23 @@ void vmbus_disconnect(void) destroy_workqueue(vmbus_connection.work_queue); if (vmbus_connection.int_page) { - hv_free_hyperv_page(vmbus_connection.int_page); + free_page((unsigned long)vmbus_connection.int_page); vmbus_connection.int_page = NULL; } if (vmbus_connection.monitor_pages[0]) { if (!set_memory_encrypted( (unsigned long)vmbus_connection.monitor_pages[0], 1)) - hv_free_hyperv_page(vmbus_connection.monitor_pages[0]); + free_page((unsigned long) + vmbus_connection.monitor_pages[0]); vmbus_connection.monitor_pages[0] = NULL; } if (vmbus_connection.monitor_pages[1]) { if (!set_memory_encrypted( (unsigned long)vmbus_connection.monitor_pages[1], 1)) - hv_free_hyperv_page(vmbus_connection.monitor_pages[1]); + free_page((unsigned long) + vmbus_connection.monitor_pages[1]); vmbus_connection.monitor_pages[1] = NULL; } } diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 59792e00cecf..49898d10faff 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -105,45 +105,6 @@ void __init hv_common_free(void) hv_synic_eventring_tail = NULL; } -/* - * Functions for allocating and freeing memory with size and - * alignment HV_HYP_PAGE_SIZE. These functions are needed because - * the guest page size may not be the same as the Hyper-V page - * size. We depend upon kmalloc() aligning power-of-two size - * allocations to the allocation size boundary, so that the - * allocated memory appears to Hyper-V as a page of the size - * it expects. - */ - -void *hv_alloc_hyperv_page(void) -{ - BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE); - - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - return (void *)__get_free_page(GFP_KERNEL); - else - return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page); - -void *hv_alloc_hyperv_zeroed_page(void) -{ - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); - else - return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); -} -EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page); - -void hv_free_hyperv_page(void *addr) -{ - if (PAGE_SIZE == HV_HYP_PAGE_SIZE) - free_page((unsigned long)addr); - else - kfree(addr); -} -EXPORT_SYMBOL_GPL(hv_free_hyperv_page); - static void *hv_panic_page; /* @@ -272,7 +233,7 @@ static void hv_kmsg_dump_unregister(void) atomic_notifier_chain_unregister(&panic_notifier_list, &hyperv_panic_report_block); - hv_free_hyperv_page(hv_panic_page); + kfree(hv_panic_page); hv_panic_page = NULL; } @@ -280,7 +241,7 @@ static void hv_kmsg_dump_register(void) { int ret; - hv_panic_page = hv_alloc_hyperv_zeroed_page(); + hv_panic_page = kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); if (!hv_panic_page) { pr_err("Hyper-V: panic message page memory allocation failed\n"); return; @@ -289,7 +250,7 @@ static void hv_kmsg_dump_register(void) ret = kmsg_dump_register(&hv_kmsg_dumper); if (ret) { pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret); - hv_free_hyperv_page(hv_panic_page); + kfree(hv_panic_page); hv_panic_page = NULL; } } @@ -317,6 +278,37 @@ void __init hv_get_partition_id(void) pr_err("Hyper-V: failed to get partition ID: %#x\n", hv_result(status)); } +#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) +u8 __init get_vtl(void) +{ + u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS; + struct hv_input_get_vp_registers *input; + struct hv_output_get_vp_registers *output; + unsigned long flags; + u64 ret; + + local_irq_save(flags); + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + output = *this_cpu_ptr(hyperv_pcpu_output_arg); + + memset(input, 0, struct_size(input, names, 1)); + input->partition_id = HV_PARTITION_ID_SELF; + input->vp_index = HV_VP_INDEX_SELF; + input->input_vtl.as_uint8 = 0; + input->names[0] = HV_REGISTER_VSM_VP_STATUS; + + ret = hv_do_hypercall(control, input, output); + if (hv_result_success(ret)) { + ret = output->values[0].reg8 & HV_VTL_MASK; + } else { + pr_err("Failed to get VTL(error: %lld) exiting...\n", ret); + BUG(); + } + + local_irq_restore(flags); + return ret; +} +#endif int __init hv_common_init(void) { diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index d74adb5bba44..33b524b4eb5e 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -45,7 +45,8 @@ struct vmbus_dynid { struct hv_vmbus_device_id id; }; -static struct device *hv_dev; +/* VMBus Root Device */ +static struct device *vmbus_root_device; static int hyperv_cpuhp_online; @@ -80,9 +81,15 @@ static struct resource *fb_mmio; static struct resource *hyperv_mmio; static DEFINE_MUTEX(hyperv_mmio_lock); +struct device *hv_get_vmbus_root_device(void) +{ + return vmbus_root_device; +} +EXPORT_SYMBOL_GPL(hv_get_vmbus_root_device); + static int vmbus_exists(void) { - if (hv_dev == NULL) + if (vmbus_root_device == NULL) return -ENODEV; return 0; @@ -707,7 +714,30 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id(const struct hv_driver * return id; } -/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */ +/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices + * + * This function can race with vmbus_device_register(). This function is + * typically running on a user thread in response to writing to the "new_id" + * sysfs entry for a driver. vmbus_device_register() is running on a + * workqueue thread in response to the Hyper-V host offering a device to the + * guest. This function calls driver_attach(), which looks for an existing + * device matching the new id, and attaches the driver to which the new id + * has been assigned. vmbus_device_register() calls device_register(), which + * looks for a driver that matches the device being registered. If both + * operations are running simultaneously, the device driver probe function runs + * on whichever thread establishes the linkage between the driver and device. + * + * In most cases, it doesn't matter which thread runs the driver probe + * function. But if vmbus_device_register() does not find a matching driver, + * it proceeds to create the "channels" subdirectory and numbered per-channel + * subdirectory in sysfs. While that multi-step creation is in progress, this + * function could run the driver probe function. If the probe function checks + * for, or operates on, entries in the "channels" subdirectory, including by + * calling hv_create_ring_sysfs(), the operation may or may not succeed + * depending on the race. The race can't create a kernel failure in VMBus + * or device subsystem code, but probe functions in VMBus drivers doing such + * operations must be prepared for the failure case. + */ static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid) { struct vmbus_dynid *dynid; @@ -861,7 +891,7 @@ static int vmbus_dma_configure(struct device *child_device) * On x86/x64 coherence is assumed and these calls have no effect. */ hv_setup_dma_ops(child_device, - device_get_dma_attr(hv_dev) == DEV_DMA_COHERENT); + device_get_dma_attr(vmbus_root_device) == DEV_DMA_COHERENT); return 0; } @@ -1921,7 +1951,8 @@ static const struct kobj_type vmbus_chan_ktype = { * ring for userspace to use. * Note: Race conditions can happen with userspace and it is not encouraged to create new * use-cases for this. This was added to maintain backward compatibility, while solving - * one of the race conditions in uio_hv_generic while creating sysfs. + * one of the race conditions in uio_hv_generic while creating sysfs. See comments with + * vmbus_add_dynid() and vmbus_device_register(). * * Returns 0 on success or error code on failure. */ @@ -2037,7 +2068,7 @@ int vmbus_device_register(struct hv_device *child_device_obj) &child_device_obj->channel->offermsg.offer.if_instance); child_device_obj->device.bus = &hv_bus; - child_device_obj->device.parent = hv_dev; + child_device_obj->device.parent = vmbus_root_device; child_device_obj->device.release = vmbus_device_release; child_device_obj->device.dma_parms = &child_device_obj->dma_parms; @@ -2055,6 +2086,20 @@ int vmbus_device_register(struct hv_device *child_device_obj) return ret; } + /* + * If device_register() found a driver to assign to the device, the + * driver's probe function has already run at this point. If that + * probe function accesses or operates on the "channels" subdirectory + * in sysfs, those operations will have failed because the "channels" + * subdirectory doesn't exist until the code below runs. Or if the + * probe function creates a /dev entry, a user space program could + * find and open the /dev entry, and then create a race by accessing + * the "channels" subdirectory while the creation steps are in progress + * here. The race can't result in a kernel failure, but the user space + * program may get an error in accessing "channels" or its + * subdirectories. See also comments with vmbus_add_dynid() about a + * related race condition. + */ child_device_obj->channels_kset = kset_create_and_add("channels", NULL, kobj); if (!child_device_obj->channels_kset) { @@ -2412,7 +2457,7 @@ static int vmbus_acpi_add(struct platform_device *pdev) struct acpi_device *ancestor; struct acpi_device *device = ACPI_COMPANION(&pdev->dev); - hv_dev = &device->dev; + vmbus_root_device = &device->dev; /* * Older versions of Hyper-V for ARM64 fail to include the _CCA @@ -2465,6 +2510,31 @@ static int vmbus_acpi_add(struct platform_device *pdev) } #endif +static int vmbus_set_irq(struct platform_device *pdev) +{ + struct irq_data *data; + int irq; + irq_hw_number_t hwirq; + + irq = platform_get_irq(pdev, 0); + /* platform_get_irq() may not return 0. */ + if (irq < 0) + return irq; + + data = irq_get_irq_data(irq); + if (!data) { + pr_err("No interrupt data for VMBus virq %d\n", irq); + return -ENODEV; + } + hwirq = irqd_to_hwirq(data); + + vmbus_irq = irq; + vmbus_interrupt = hwirq; + pr_debug("VMBus virq %d, hwirq %d\n", vmbus_irq, vmbus_interrupt); + + return 0; +} + static int vmbus_device_add(struct platform_device *pdev) { struct resource **cur_res = &hyperv_mmio; @@ -2473,12 +2543,17 @@ static int vmbus_device_add(struct platform_device *pdev) struct device_node *np = pdev->dev.of_node; int ret; - hv_dev = &pdev->dev; + vmbus_root_device = &pdev->dev; ret = of_range_parser_init(&parser, np); if (ret) return ret; + if (!__is_defined(HYPERVISOR_CALLBACK_VECTOR)) + ret = vmbus_set_irq(pdev); + if (ret) + return ret; + for_each_of_range(&parser, &range) { struct resource *res; @@ -2786,7 +2861,7 @@ static int __init hv_acpi_init(void) if (ret) return ret; - if (!hv_dev) { + if (!vmbus_root_device) { ret = -ENODEV; goto cleanup; } @@ -2817,7 +2892,7 @@ static int __init hv_acpi_init(void) cleanup: platform_driver_unregister(&vmbus_platform_driver); - hv_dev = NULL; + vmbus_root_device = NULL; return ret; } diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index e1eaa24559a2..ef5d655a0052 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -50,6 +50,7 @@ #include <linux/irqdomain.h> #include <linux/acpi.h> #include <linux/sizes.h> +#include <linux/of_irq.h> #include <asm/mshyperv.h> /* @@ -309,8 +310,6 @@ struct pci_packet { void (*completion_func)(void *context, struct pci_response *resp, int resp_packet_size); void *compl_ctxt; - - struct pci_message message[]; }; /* @@ -817,9 +816,17 @@ static int hv_pci_vec_irq_gic_domain_alloc(struct irq_domain *domain, int ret; fwspec.fwnode = domain->parent->fwnode; - fwspec.param_count = 2; - fwspec.param[0] = hwirq; - fwspec.param[1] = IRQ_TYPE_EDGE_RISING; + if (is_of_node(fwspec.fwnode)) { + /* SPI lines for OF translations start at offset 32 */ + fwspec.param_count = 3; + fwspec.param[0] = 0; + fwspec.param[1] = hwirq - 32; + fwspec.param[2] = IRQ_TYPE_EDGE_RISING; + } else { + fwspec.param_count = 2; + fwspec.param[0] = hwirq; + fwspec.param[1] = IRQ_TYPE_EDGE_RISING; + } ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec); if (ret) @@ -887,10 +894,44 @@ static const struct irq_domain_ops hv_pci_domain_ops = { .activate = hv_pci_vec_irq_domain_activate, }; +#ifdef CONFIG_OF + +static struct irq_domain *hv_pci_of_irq_domain_parent(void) +{ + struct device_node *parent; + struct irq_domain *domain; + + parent = of_irq_find_parent(hv_get_vmbus_root_device()->of_node); + if (!parent) + return NULL; + domain = irq_find_host(parent); + of_node_put(parent); + + return domain; +} + +#endif + +#ifdef CONFIG_ACPI + +static struct irq_domain *hv_pci_acpi_irq_domain_parent(void) +{ + acpi_gsi_domain_disp_fn gsi_domain_disp_fn; + + gsi_domain_disp_fn = acpi_get_gsi_dispatcher(); + if (!gsi_domain_disp_fn) + return NULL; + return irq_find_matching_fwnode(gsi_domain_disp_fn(0), + DOMAIN_BUS_ANY); +} + +#endif + static int hv_pci_irqchip_init(void) { static struct hv_pci_chip_data *chip_data; struct fwnode_handle *fn = NULL; + struct irq_domain *irq_domain_parent = NULL; int ret = -ENOMEM; chip_data = kzalloc(sizeof(*chip_data), GFP_KERNEL); @@ -907,9 +948,24 @@ static int hv_pci_irqchip_init(void) * way to ensure that all the corresponding devices are also gone and * no interrupts will be generated. */ - hv_msi_gic_irq_domain = acpi_irq_create_hierarchy(0, HV_PCI_MSI_SPI_NR, - fn, &hv_pci_domain_ops, - chip_data); +#ifdef CONFIG_ACPI + if (!acpi_disabled) + irq_domain_parent = hv_pci_acpi_irq_domain_parent(); +#endif +#ifdef CONFIG_OF + if (!irq_domain_parent) + irq_domain_parent = hv_pci_of_irq_domain_parent(); +#endif + if (!irq_domain_parent) { + WARN_ONCE(1, "Invalid firmware configuration for VMBus interrupts\n"); + ret = -EINVAL; + goto free_chip; + } + + hv_msi_gic_irq_domain = irq_domain_create_hierarchy(irq_domain_parent, 0, + HV_PCI_MSI_SPI_NR, + fn, &hv_pci_domain_ops, + chip_data); if (!hv_msi_gic_irq_domain) { pr_err("Failed to create Hyper-V arm64 vPCI MSI IRQ domain\n"); @@ -1438,7 +1494,7 @@ static int hv_read_config_block(struct pci_dev *pdev, void *buf, memset(&pkt, 0, sizeof(pkt)); pkt.pkt.completion_func = hv_pci_read_config_compl; pkt.pkt.compl_ctxt = &comp_pkt; - read_blk = (struct pci_read_block *)&pkt.pkt.message; + read_blk = (struct pci_read_block *)pkt.buf; read_blk->message_type.type = PCI_READ_BLOCK; read_blk->wslot.slot = devfn_to_wslot(pdev->devfn); read_blk->block_id = block_id; @@ -1518,7 +1574,7 @@ static int hv_write_config_block(struct pci_dev *pdev, void *buf, memset(&pkt, 0, sizeof(pkt)); pkt.pkt.completion_func = hv_pci_write_config_compl; pkt.pkt.compl_ctxt = &comp_pkt; - write_blk = (struct pci_write_block *)&pkt.pkt.message; + write_blk = (struct pci_write_block *)pkt.buf; write_blk->message_type.type = PCI_WRITE_BLOCK; write_blk->wslot.slot = devfn_to_wslot(pdev->devfn); write_blk->block_id = block_id; @@ -1599,7 +1655,7 @@ static void hv_int_desc_free(struct hv_pci_dev *hpdev, return; } memset(&ctxt, 0, sizeof(ctxt)); - int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message; + int_pkt = (struct pci_delete_interrupt *)ctxt.buffer; int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE; int_pkt->wslot.slot = hpdev->desc.win_slot.slot; @@ -2482,7 +2538,7 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus, comp_pkt.hpdev = hpdev; pkt.init_packet.compl_ctxt = &comp_pkt; pkt.init_packet.completion_func = q_resource_requirements; - res_req = (struct pci_child_message *)&pkt.init_packet.message; + res_req = (struct pci_child_message *)pkt.buffer; res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS; res_req->wslot.slot = desc->win_slot.slot; @@ -2860,7 +2916,7 @@ static void hv_eject_device_work(struct work_struct *work) pci_destroy_slot(hpdev->pci_slot); memset(&ctxt, 0, sizeof(ctxt)); - ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message; + ejct_pkt = (struct pci_eject_response *)ctxt.buffer; ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE; ejct_pkt->wslot.slot = hpdev->desc.win_slot.slot; vmbus_sendpacket(hbus->hdev->channel, ejct_pkt, @@ -3118,7 +3174,7 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev, init_completion(&comp_pkt.host_event); pkt->completion_func = hv_pci_generic_compl; pkt->compl_ctxt = &comp_pkt; - version_req = (struct pci_version_request *)&pkt->message; + version_req = (struct pci_version_request *)(pkt + 1); version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION; for (i = 0; i < num_version; i++) { @@ -3340,7 +3396,7 @@ enter_d0_retry: init_completion(&comp_pkt.host_event); pkt->completion_func = hv_pci_generic_compl; pkt->compl_ctxt = &comp_pkt; - d0_entry = (struct pci_bus_d0_entry *)&pkt->message; + d0_entry = (struct pci_bus_d0_entry *)(pkt + 1); d0_entry->message_type.type = PCI_BUS_D0ENTRY; d0_entry->mmio_base = hbus->mem_config->start; @@ -3498,20 +3554,20 @@ static int hv_send_resources_allocated(struct hv_device *hdev) if (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2) { res_assigned = - (struct pci_resources_assigned *)&pkt->message; + (struct pci_resources_assigned *)(pkt + 1); res_assigned->message_type.type = PCI_RESOURCES_ASSIGNED; res_assigned->wslot.slot = hpdev->desc.win_slot.slot; } else { res_assigned2 = - (struct pci_resources_assigned2 *)&pkt->message; + (struct pci_resources_assigned2 *)(pkt + 1); res_assigned2->message_type.type = PCI_RESOURCES_ASSIGNED2; res_assigned2->wslot.slot = hpdev->desc.win_slot.slot; } put_pcichild(hpdev); - ret = vmbus_sendpacket(hdev->channel, &pkt->message, + ret = vmbus_sendpacket(hdev->channel, pkt + 1, size_res, (unsigned long)pkt, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); @@ -3809,6 +3865,7 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) struct pci_packet teardown_packet; u8 buffer[sizeof(struct pci_message)]; } pkt; + struct pci_message *msg; struct hv_pci_compl comp_pkt; struct hv_pci_dev *hpdev, *tmp; unsigned long flags; @@ -3854,10 +3911,10 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) init_completion(&comp_pkt.host_event); pkt.teardown_packet.completion_func = hv_pci_generic_compl; pkt.teardown_packet.compl_ctxt = &comp_pkt; - pkt.teardown_packet.message[0].type = PCI_BUS_D0EXIT; + msg = (struct pci_message *)pkt.buffer; + msg->type = PCI_BUS_D0EXIT; - ret = vmbus_sendpacket_getid(chan, &pkt.teardown_packet.message, - sizeof(struct pci_message), + ret = vmbus_sendpacket_getid(chan, msg, sizeof(*msg), (unsigned long)&pkt.teardown_packet, &trans_id, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 69c1df0f4ca5..aac67a4413ce 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -243,6 +243,9 @@ hv_uio_probe(struct hv_device *dev, if (!ring_size) ring_size = SZ_2M; + /* Adjust ring size if necessary to have it page aligned */ + ring_size = VMBUS_RING_SIZE(ring_size); + pdata = devm_kzalloc(&dev->device, sizeof(*pdata), GFP_KERNEL); if (!pdata) return -ENOMEM; @@ -274,13 +277,13 @@ hv_uio_probe(struct hv_device *dev, pdata->info.mem[INT_PAGE_MAP].name = "int_page"; pdata->info.mem[INT_PAGE_MAP].addr = (uintptr_t)vmbus_connection.int_page; - pdata->info.mem[INT_PAGE_MAP].size = PAGE_SIZE; + pdata->info.mem[INT_PAGE_MAP].size = HV_HYP_PAGE_SIZE; pdata->info.mem[INT_PAGE_MAP].memtype = UIO_MEM_LOGICAL; pdata->info.mem[MON_PAGE_MAP].name = "monitor_page"; pdata->info.mem[MON_PAGE_MAP].addr = (uintptr_t)vmbus_connection.monitor_pages[1]; - pdata->info.mem[MON_PAGE_MAP].size = PAGE_SIZE; + pdata->info.mem[MON_PAGE_MAP].size = HV_HYP_PAGE_SIZE; pdata->info.mem[MON_PAGE_MAP].memtype = UIO_MEM_LOGICAL; if (channel->device_id == HV_NIC) { diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index ccccb1cbf7df..a729b77983fa 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -236,10 +236,6 @@ int hv_common_cpu_init(unsigned int cpu); int hv_common_cpu_die(unsigned int cpu); void hv_identify_partition_type(void); -void *hv_alloc_hyperv_page(void); -void *hv_alloc_hyperv_zeroed_page(void); -void hv_free_hyperv_page(void *addr); - /** * hv_cpu_number_to_vp_number() - Map CPU to VP. * @cpu_number: CPU number in Linux terms @@ -378,4 +374,10 @@ static inline int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u3 } #endif /* CONFIG_MSHV_ROOT */ +#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) +u8 __init get_vtl(void); +#else +static inline u8 get_vtl(void) { return 0; } +#endif + #endif diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h index 68606fa5fe73..1be7f6a02304 100644 --- a/include/hyperv/hvgdk_mini.h +++ b/include/hyperv/hvgdk_mini.h @@ -475,7 +475,7 @@ union hv_vp_assist_msr_contents { /* HV_REGISTER_VP_ASSIST_PAGE */ #define HVCALL_CREATE_PORT 0x0095 #define HVCALL_CONNECT_PORT 0x0096 #define HVCALL_START_VP 0x0099 -#define HVCALL_GET_VP_ID_FROM_APIC_ID 0x009a +#define HVCALL_GET_VP_INDEX_FROM_APIC_ID 0x009a #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 #define HVCALL_SIGNAL_EVENT_DIRECT 0x00c0 @@ -1228,7 +1228,7 @@ struct hv_send_ipi { /* HV_INPUT_SEND_SYNTHETIC_CLUSTER_IPI */ u64 cpu_mask; } __packed; -#define HV_X64_VTL_MASK GENMASK(3, 0) +#define HV_VTL_MASK GENMASK(3, 0) /* Hyper-V memory host visibility */ enum hv_mem_host_visibility { diff --git a/include/linux/acpi.h b/include/linux/acpi.h index f4b3d442b7df..f102c0fe3431 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -335,8 +335,11 @@ int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi); +typedef struct fwnode_handle *(*acpi_gsi_domain_disp_fn)(u32); + void acpi_set_irq_model(enum acpi_irq_model_id model, - struct fwnode_handle *(*)(u32)); + acpi_gsi_domain_disp_fn fn); +acpi_gsi_domain_disp_fn acpi_get_gsi_dispatcher(void); void acpi_set_gsi_to_irq_fallback(u32 (*)(u32)); struct irq_domain *acpi_irq_create_hierarchy(unsigned int flags, diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index a3863da1510e..784ebe4607a4 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -7,6 +7,11 @@ #include <linux/args.h> #include <linux/init.h> + +#ifndef __ASSEMBLY__ +#include <linux/uuid.h> +#endif + #include <uapi/linux/const.h> /* @@ -107,10 +112,10 @@ ARM_SMCCC_FUNC_QUERY_CALL_UID) /* KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 */ -#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U -#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU -#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U -#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU +#define ARM_SMCCC_VENDOR_HYP_UID_KVM UUID_INIT(\ + 0xb66fb428, 0xc52e, 0xe911, \ + 0xa9, 0xca, 0x4b, 0x56, \ + 0x4d, 0x00, 0x3a, 0x74) /* KVM "vendor specific" services */ #define ARM_SMCCC_KVM_FUNC_FEATURES 0 @@ -348,6 +353,57 @@ s32 arm_smccc_get_soc_id_version(void); */ s32 arm_smccc_get_soc_id_revision(void); +#ifndef __ASSEMBLY__ + +/* + * Returns whether a specific hypervisor UUID is advertised for the + * Vendor Specific Hypervisor Service range. + */ +bool arm_smccc_hypervisor_has_uuid(const uuid_t *uuid); + +static inline uuid_t smccc_res_to_uuid(u32 r0, u32 r1, u32 r2, u32 r3) +{ + uuid_t uuid = { + .b = { + [0] = (r0 >> 0) & 0xff, + [1] = (r0 >> 8) & 0xff, + [2] = (r0 >> 16) & 0xff, + [3] = (r0 >> 24) & 0xff, + + [4] = (r1 >> 0) & 0xff, + [5] = (r1 >> 8) & 0xff, + [6] = (r1 >> 16) & 0xff, + [7] = (r1 >> 24) & 0xff, + + [8] = (r2 >> 0) & 0xff, + [9] = (r2 >> 8) & 0xff, + [10] = (r2 >> 16) & 0xff, + [11] = (r2 >> 24) & 0xff, + + [12] = (r3 >> 0) & 0xff, + [13] = (r3 >> 8) & 0xff, + [14] = (r3 >> 16) & 0xff, + [15] = (r3 >> 24) & 0xff, + }, + }; + + return uuid; +} + +static inline u32 smccc_uuid_to_reg(const uuid_t *uuid, int reg) +{ + u32 val = 0; + + val |= (u32)(uuid->b[4 * reg + 0] << 0); + val |= (u32)(uuid->b[4 * reg + 1] << 8); + val |= (u32)(uuid->b[4 * reg + 2] << 16); + val |= (u32)(uuid->b[4 * reg + 3] << 24); + + return val; +} + +#endif /* !__ASSEMBLY__ */ + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b52ac40d5830..a59c5c3e95fb 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1276,6 +1276,8 @@ static inline void *hv_get_drvdata(struct hv_device *dev) return dev_get_drvdata(&dev->device); } +struct device *hv_get_vmbus_root_device(void); + struct hv_ring_buffer_debug_info { u32 current_interrupt_mask; u32 current_read_index; diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index b9ce3aab15fe..1f64c680be13 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -84,6 +84,7 @@ enum { }; static int in_hand_shake; +static int debug; static char *os_name = ""; static char *os_major = ""; @@ -184,6 +185,20 @@ static void kvp_update_file(int pool) kvp_release_lock(pool); } +static void kvp_dump_initial_pools(int pool) +{ + int i; + + syslog(LOG_DEBUG, "===Start dumping the contents of pool %d ===\n", + pool); + + for (i = 0; i < kvp_file_info[pool].num_records; i++) + syslog(LOG_DEBUG, "pool: %d, %d/%d key=%s val=%s\n", + pool, i + 1, kvp_file_info[pool].num_records, + kvp_file_info[pool].records[i].key, + kvp_file_info[pool].records[i].value); +} + static void kvp_update_mem_state(int pool) { FILE *filep; @@ -271,6 +286,8 @@ static int kvp_file_init(void) return 1; kvp_file_info[i].num_records = 0; kvp_update_mem_state(i); + if (debug) + kvp_dump_initial_pools(i); } return 0; @@ -298,6 +315,9 @@ static int kvp_key_delete(int pool, const __u8 *key, int key_size) * Found a match; just move the remaining * entries up. */ + if (debug) + syslog(LOG_DEBUG, "%s: deleting the KVP: pool=%d key=%s val=%s", + __func__, pool, record[i].key, record[i].value); if (i == (num_records - 1)) { kvp_file_info[pool].num_records--; kvp_update_file(pool); @@ -316,20 +336,36 @@ static int kvp_key_delete(int pool, const __u8 *key, int key_size) kvp_update_file(pool); return 0; } + + if (debug) + syslog(LOG_DEBUG, "%s: could not delete KVP: pool=%d key=%s. Record not found", + __func__, pool, key); + return 1; } static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, const __u8 *value, int value_size) { - int i; - int num_records; struct kvp_record *record; + int num_records; int num_blocks; + int i; + + if (debug) + syslog(LOG_DEBUG, "%s: got a KVP: pool=%d key=%s val=%s", + __func__, pool, key, value); if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) || - (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) + (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE)) { + syslog(LOG_ERR, "%s: Too long key or value: key=%s, val=%s", + __func__, key, value); + + if (debug) + syslog(LOG_DEBUG, "%s: Too long key or value: pool=%d, key=%s, val=%s", + __func__, pool, key, value); return 1; + } /* * First update the in-memory state. @@ -349,6 +385,9 @@ static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, */ memcpy(record[i].value, value, value_size); kvp_update_file(pool); + if (debug) + syslog(LOG_DEBUG, "%s: updated: pool=%d key=%s val=%s", + __func__, pool, key, value); return 0; } @@ -360,8 +399,10 @@ static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, record = realloc(record, sizeof(struct kvp_record) * ENTRIES_PER_BLOCK * (num_blocks + 1)); - if (record == NULL) + if (!record) { + syslog(LOG_ERR, "%s: Memory alloc failure", __func__); return 1; + } kvp_file_info[pool].num_blocks++; } @@ -369,6 +410,11 @@ static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, memcpy(record[i].key, key, key_size); kvp_file_info[pool].records = record; kvp_file_info[pool].num_records++; + + if (debug) + syslog(LOG_DEBUG, "%s: added: pool=%d key=%s val=%s", + __func__, pool, key, value); + kvp_update_file(pool); return 0; } @@ -1722,6 +1768,7 @@ void print_usage(char *argv[]) fprintf(stderr, "Usage: %s [options]\n" "Options are:\n" " -n, --no-daemon stay in foreground, don't daemonize\n" + " -d, --debug Enable debug logs(syslog debug by default)\n" " -h, --help print this help\n", argv[0]); } @@ -1743,10 +1790,11 @@ int main(int argc, char *argv[]) static struct option long_options[] = { {"help", no_argument, 0, 'h' }, {"no-daemon", no_argument, 0, 'n' }, + {"debug", no_argument, 0, 'd' }, {0, 0, 0, 0 } }; - while ((opt = getopt_long(argc, argv, "hn", long_options, + while ((opt = getopt_long(argc, argv, "hnd", long_options, &long_index)) != -1) { switch (opt) { case 'n': @@ -1755,6 +1803,9 @@ int main(int argc, char *argv[]) case 'h': print_usage(argv); exit(0); + case 'd': + debug = 1; + break; default: print_usage(argv); exit(EXIT_FAILURE); @@ -1777,6 +1828,9 @@ int main(int argc, char *argv[]) */ kvp_get_domain_name(full_domain_name, sizeof(full_domain_name)); + if (debug) + syslog(LOG_INFO, "Logging debug info in syslog(debug)"); + if (kvp_file_init()) { syslog(LOG_ERR, "Failed to initialize the pools"); exit(EXIT_FAILURE); |