diff --git a/sys/dev/nvmm/files.nvmm b/sys/dev/nvmm/files.nvmm index 6eeaebd6f655..2319989b85b8 100644 --- a/sys/dev/nvmm/files.nvmm +++ b/sys/dev/nvmm/files.nvmm @@ -6,6 +6,7 @@ file dev/nvmm/nvmm.c nvmm ifdef amd64 file dev/nvmm/x86/nvmm_x86.c nvmm +file dev/nvmm/x86/nvmm_x86_mtrr.c nvmm file dev/nvmm/x86/nvmm_x86_svm.c nvmm file dev/nvmm/x86/nvmm_x86_svmfunc.S nvmm file dev/nvmm/x86/nvmm_x86_vmx.c nvmm diff --git a/sys/dev/nvmm/x86/nvmm_x86.h b/sys/dev/nvmm/x86/nvmm_x86.h index c653fdde5b07..75258360f813 100644 --- a/sys/dev/nvmm/x86/nvmm_x86.h +++ b/sys/dev/nvmm/x86/nvmm_x86.h @@ -323,6 +323,22 @@ extern const struct nvmm_x86_cpuid_mask nvmm_cpuid_80000001; extern const struct nvmm_x86_cpuid_mask nvmm_cpuid_80000007; extern const struct nvmm_x86_cpuid_mask nvmm_cpuid_80000008; bool nvmm_x86_pat_validate(uint64_t); + +/* + * MTRR (Memory Type Range Register) + */ + +struct nvmm_x86_mtrr { + uint64_t var_ranges[16]; /* 8*2, base/mask pairs */ + uint64_t fixed_64k; + uint64_t fixed_16k[2]; + uint64_t fixed_4k[8]; + uint64_t deftype; +}; + +int nvmm_x86_mtrr_rdmsr(struct nvmm_x86_mtrr *, uint32_t, uint64_t *); +int nvmm_x86_mtrr_wrmsr(struct nvmm_x86_mtrr *, uint8_t, uint32_t, uint64_t); + #endif #endif /* ASM_NVMM */ diff --git a/sys/dev/nvmm/x86/nvmm_x86_mtrr.c b/sys/dev/nvmm/x86/nvmm_x86_mtrr.c new file mode 100644 index 000000000000..a165d46f9753 --- /dev/null +++ b/sys/dev/nvmm/x86/nvmm_x86_mtrr.c @@ -0,0 +1,192 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2025 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * MTRR (Memory Type Range Register) virtualization support for NVMM. + * + * Intel 64 and IA-32 Architectures Software Developer's Manual + * Combined Volumes 3A, 3B, 3C, and 3D: System Programming Guide, ch. 13.11 + * https://cdrdv2.intel.com/v1/dl/getContent/671447 + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include +#include + +#include + +#include +#include +#include + + +static bool +nvmm_x86_mtrr_valid_memtype(uint8_t type) +{ + /* + * Type is only one of those + * + * 0 - Uncacheable (UC) + * 1 - Write Combining (WC) + * 2 - Reserved + * 3 - Reserved + * 4 - Write Through (WT) + * 5 - Write-protected (WP) + * 6 - Writeback (WB) + */ + uint8_t valid = __BITS(0,1) | __BITS(4,6); + + if (type > 7 || !((1 << type) & valid)) + return false; + + return true; +} + +static int +nvmm_x86_mtrr_getset(struct nvmm_x86_mtrr *mtrr, uint8_t physbits, + uint32_t msr, uint64_t *val, bool write) +{ + uint64_t *mtrraddr = NULL; + int i; + +#ifdef NVMM_DEBUG + printf("MTRR RECV MSR: %x (%u)\n", msr, write); +#endif + + switch(msr) { + /* 13.11.1 MTRR Feature Identification */ + case MSR_MTRRcap: + /* + * [7:0]: Number of variable range registers (8) + * 8: Fixed range registers supported + * 10: Write-combining memory type supported + */ + *val = 8 | __BIT(8) | __BIT(10); + return 0; + /* 13.11.2.1 IA32_MTRR_DEF_TYPE MSR */ + case MSR_MTRRdefType: + mtrraddr = &mtrr->deftype; + if (!write) + break; + /* We're writing + * + * [7:0]: Memory type + * [9:8]: Reserved + * 10: Fixed-range MTRRs enable/disable + * 11: MTRR enable/disable + * [63:12]: Reserved + */ + if (*val & (__BITS(8,9) | __BITS(12,63))) + return EINVAL; + /* validate memory type */ + if (!nvmm_x86_mtrr_valid_memtype((uint8_t)(*val & 0xff))) + return EINVAL; + break; + /* 13.11.2.2 Fixed Range MTRRs */ + case MSR_MTRRfix64K_00000: + mtrraddr = &mtrr->fixed_64k; + /* FALLTHROUGH */ + case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: + if (mtrraddr == NULL) /* mtrraddr not set by previous case */ + mtrraddr = &mtrr->fixed_16k[msr - MSR_MTRRfix16K_80000]; + /* FALLTHROUGH */ + case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: + if (mtrraddr == NULL) + mtrraddr = &mtrr->fixed_4k[msr - MSR_MTRRfix4K_C0000]; + if (!write) + break; + /* + * The fixed memory ranges are mapped with 11 fixed-range + * registers of 64 bits each. + * Each of these registers is divided into 8-bit fields that are + * used to specify the memory type for each of the sub-ranges the + * register controls. + */ + for (i = 0; i < 8; i++) { + uint8_t type = (uint8_t)((*val >> (i * 8)) & 0xff); + if (!nvmm_x86_mtrr_valid_memtype(type)) + return EINVAL; + } + break; + /* + * 13.11.2.3 Variable Range MTRRs + * 8 PhysBase / PhysMask pairs + */ + case MSR_MTRRphysBase0 ... MSR_MTRRphysMask7: + mtrraddr = &mtrr->var_ranges[msr - MSR_MTRRphysBase0]; + if (!write) + break; + /* 63:MAXPHYADDR reserved */ + if (*val & (~__BITS(0, physbits - 1))) + return EINVAL; + if (msr & 1) { /* odd: phyMask */ + /* [10:0]: Reserved */ + if (*val & (__BITS(0,10))) + return EINVAL; + } else { /* even: phyBase */ + /* [11:8]: Reserved */ + if (*val & (__BITS(8,11))) + return EINVAL; + } + break; + default: + return ENOENT; + } + + if (write) { /* write mtrr */ +#ifdef NVMM_DEBUG + printf("MTRR: writing 0x%016lx at MSR 0x%x\n", *val, msr); +#endif + *mtrraddr = *val; + } else { /* read mtrr */ +#ifdef NVMM_DEBUG + printf("MTRR: reading 0x%016lx at MSR %x\n", *mtrraddr, msr); +#endif + *val = *mtrraddr; + } + + return 0; +} + +int +nvmm_x86_mtrr_rdmsr(struct nvmm_x86_mtrr *mtrr, uint32_t msr, uint64_t *val) +{ + return nvmm_x86_mtrr_getset(mtrr, 0, msr, val, false); + +} + +int +nvmm_x86_mtrr_wrmsr(struct nvmm_x86_mtrr *mtrr, uint8_t physbits, + uint32_t msr, uint64_t val) +{ + return nvmm_x86_mtrr_getset(mtrr, physbits, msr, &val, true); + +} diff --git a/sys/dev/nvmm/x86/nvmm_x86_svm.c b/sys/dev/nvmm/x86/nvmm_x86_svm.c index 7a645c4b7994..547861bba263 100644 --- a/sys/dev/nvmm/x86/nvmm_x86_svm.c +++ b/sys/dev/nvmm/x86/nvmm_x86_svm.c @@ -503,6 +503,8 @@ static uint32_t svm_ctrl_tlb_flush __read_mostly; #define SVM_XCR0_MASK_DEFAULT (XCR0_X87|XCR0_SSE) static uint64_t svm_xcr0_mask __read_mostly; +static uint8_t svm_physbits; + #define SVM_NCPUIDS 32 #define VMCB_NPAGES 1 @@ -603,6 +605,7 @@ struct svm_cpudata { uint64_t drs[NVMM_X64_NDR]; uint64_t gtsc; struct xsave_header gfpu __aligned(64); + struct nvmm_x86_mtrr mtrr; /* VCPU configuration. */ bool cpuidpresent[SVM_NCPUIDS]; @@ -1230,7 +1233,7 @@ svm_inkernel_handle_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, { struct svm_cpudata *cpudata = vcpu->cpudata; struct vmcb *vmcb = cpudata->vmcb; - uint64_t val; + uint64_t val = 0; size_t i; if (exit->reason == NVMM_VCPU_EXIT_RDMSR) { @@ -1246,6 +1249,12 @@ svm_inkernel_handle_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); goto handled; } + if (nvmm_x86_mtrr_rdmsr(&cpudata->mtrr, exit->u.rdmsr.msr, + &val) == 0) { + vmcb->state.rax = (val & 0xFFFFFFFF); + cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); + goto handled; + } for (i = 0; i < __arraycount(msr_ignore_list); i++) { if (msr_ignore_list[i] != exit->u.rdmsr.msr) continue; @@ -1272,6 +1281,10 @@ svm_inkernel_handle_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, cpudata->gtsc_want_update = true; goto handled; } + if (nvmm_x86_mtrr_wrmsr(&cpudata->mtrr, + svm_physbits, exit->u.wrmsr.msr, exit->u.wrmsr.val) == 0) { + goto handled; + } for (i = 0; i < __arraycount(msr_ignore_list); i++) { if (msr_ignore_list[i] != exit->u.wrmsr.msr) continue; @@ -2299,6 +2312,10 @@ svm_vcpu_init(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) cpudata->cstar = rdmsr(MSR_CSTAR); cpudata->sfmask = rdmsr(MSR_SFMASK); + /* Initialize MTRR */ + memset(&cpudata->mtrr, 0, sizeof(cpudata->mtrr)); + cpudata->mtrr.deftype = 0x06; /* WB */ + /* Install the RESET state. */ memcpy(&vcpu->comm->state, &nvmm_x86_reset_state, sizeof(nvmm_x86_reset_state)); @@ -2620,6 +2637,9 @@ svm_init(void) x86_cpuid(0x80000000, descs); svm_cpuid_max_extended = uimin(descs[0], SVM_CPUID_MAX_EXTENDED); + x86_cpuid(0x80000008, descs); + svm_physbits = descs[0] & 0xff; + memset(hsave, 0, sizeof(hsave)); for (CPU_INFO_FOREACH(cii, ci)) { pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); diff --git a/sys/dev/nvmm/x86/nvmm_x86_vmx.c b/sys/dev/nvmm/x86/nvmm_x86_vmx.c index 0310aacdfe95..d39d2bee12fe 100644 --- a/sys/dev/nvmm/x86/nvmm_x86_vmx.c +++ b/sys/dev/nvmm/x86/nvmm_x86_vmx.c @@ -720,6 +720,8 @@ static kmutex_t vmx_asidlock __cacheline_aligned; #define VMX_XCR0_MASK_DEFAULT (XCR0_X87|XCR0_SSE) static uint64_t vmx_xcr0_mask __read_mostly; +static uint8_t vmx_physbits; + #define VMX_NCPUIDS 32 #define VMCS_NPAGES 1 @@ -821,6 +823,7 @@ struct vmx_cpudata { uint64_t drs[NVMM_X64_NDR]; uint64_t gtsc; struct xsave_header gfpu __aligned(64); + struct nvmm_x86_mtrr mtrr; /* VCPU configuration. */ bool cpuidpresent[VMX_NCPUIDS]; @@ -1891,6 +1894,12 @@ vmx_inkernel_handle_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); goto handled; } + if (nvmm_x86_mtrr_rdmsr(&cpudata->mtrr, exit->u.rdmsr.msr, + &val) == 0) { + cpudata->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF); + cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); + goto handled; + } for (i = 0; i < __arraycount(msr_ignore_list); i++) { if (msr_ignore_list[i] != exit->u.rdmsr.msr) continue; @@ -1917,6 +1926,10 @@ vmx_inkernel_handle_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, /* Don't care. */ goto handled; } + if (nvmm_x86_mtrr_wrmsr(&cpudata->mtrr, + vmx_physbits, exit->u.wrmsr.msr, exit->u.wrmsr.val) == 0) { + goto handled; + } for (i = 0; i < __arraycount(msr_ignore_list); i++) { if (msr_ignore_list[i] != exit->u.wrmsr.msr) continue; @@ -3009,6 +3022,10 @@ vmx_vcpu_init(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) cpudata->cstar = rdmsr(MSR_CSTAR); cpudata->sfmask = rdmsr(MSR_SFMASK); + /* Initialize MTRR */ + memset(&cpudata->mtrr, 0, sizeof(cpudata->mtrr)); + cpudata->mtrr.deftype = 0x06; /* WB */ + /* Install the RESET state. */ memcpy(&vcpu->comm->state, &nvmm_x86_reset_state, sizeof(nvmm_x86_reset_state)); @@ -3589,6 +3606,9 @@ vmx_init(void) x86_cpuid(0x80000000, descs); vmx_cpuid_max_extended = uimin(descs[0], VMX_CPUID_MAX_EXTENDED); + x86_cpuid(0x80000008, descs); + vmx_physbits = descs[0] & 0xff; + /* Init the TLB flush op, the EPT flush op and the EPTP type. */ msr = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); if ((msr & IA32_VMX_EPT_VPID_INVVPID_CONTEXT) != 0) { diff --git a/sys/modules/nvmm/Makefile b/sys/modules/nvmm/Makefile index d2b6c53a74f9..903ddb4fc138 100644 --- a/sys/modules/nvmm/Makefile +++ b/sys/modules/nvmm/Makefile @@ -13,7 +13,7 @@ IOCONF= nvmm.ioconf SRCS= nvmm.c .if ${MACHINE_ARCH} == "x86_64" -SRCS+= nvmm_x86.c +SRCS+= nvmm_x86.c nvmm_x86_mtrr.c SRCS+= nvmm_x86_svm.c nvmm_x86_svmfunc.S SRCS+= nvmm_x86_vmx.c nvmm_x86_vmxfunc.S .endif