diff options
author | grehan <grehan@FreeBSD.org> | 2016-05-27 06:22:24 +0000 |
---|---|---|
committer | grehan <grehan@FreeBSD.org> | 2016-05-27 06:22:24 +0000 |
commit | 00d578928eca75be320b36d37543a7e2a4f9fbdb (patch) | |
tree | 0ce32a12a7c06cf5b7b4a892654456db701ccc5d /usr.sbin/bhyve | |
parent | cc4a2417e2fcd8a8b52c17df59c5fc86b94a6978 (diff) | |
download | FreeBSD-src-00d578928eca75be320b36d37543a7e2a4f9fbdb.zip FreeBSD-src-00d578928eca75be320b36d37543a7e2a4f9fbdb.tar.gz |
Create branch for bhyve graphics import.
Diffstat (limited to 'usr.sbin/bhyve')
57 files changed, 0 insertions, 19959 deletions
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile deleted file mode 100644 index 7d2e2f0..0000000 --- a/usr.sbin/bhyve/Makefile +++ /dev/null @@ -1,55 +0,0 @@ -# -# $FreeBSD$ -# - -PROG= bhyve -PACKAGE= bhyve - -DEBUG_FLAGS= -g -O0 - -MAN= bhyve.8 - -BHYVE_SYSDIR?=${SRCTOP} - -SRCS= \ - atkbdc.c \ - acpi.c \ - bhyverun.c \ - block_if.c \ - bootrom.c \ - consport.c \ - dbgport.c \ - fwctl.c \ - inout.c \ - ioapic.c \ - mem.c \ - mevent.c \ - mptbl.c \ - pci_ahci.c \ - pci_emul.c \ - pci_hostbridge.c \ - pci_irq.c \ - pci_lpc.c \ - pci_passthru.c \ - pci_virtio_block.c \ - pci_virtio_net.c \ - pci_virtio_rnd.c \ - pci_uart.c \ - pm.c \ - post.c \ - rtc.c \ - smbiostbl.c \ - task_switch.c \ - uart_emul.c \ - virtio.c \ - xmsr.c \ - spinup_ap.c - -.PATH: ${BHYVE_SYSDIR}/sys/amd64/vmm -SRCS+= vmm_instruction_emul.c - -LIBADD= vmmapi md pthread - -WARNS?= 2 - -.include <bsd.prog.mk> diff --git a/usr.sbin/bhyve/Makefile.depend b/usr.sbin/bhyve/Makefile.depend deleted file mode 100644 index e12ecb9..0000000 --- a/usr.sbin/bhyve/Makefile.depend +++ /dev/null @@ -1,22 +0,0 @@ -# $FreeBSD$ -# Autogenerated - do NOT edit! - -DIRDEPS = \ - gnu/lib/csu \ - gnu/lib/libgcc \ - include \ - include/xlocale \ - lib/${CSU_DIR} \ - lib/libc \ - lib/libcompiler_rt \ - lib/libmd \ - lib/libthr \ - lib/libutil \ - lib/libvmmapi \ - - -.include <dirdeps.mk> - -.if ${DEP_RELDIR} == ${_DEP_RELDIR} -# local dependencies - needed for -jN in clean tree -.endif diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c deleted file mode 100644 index 57fe783..0000000 --- a/usr.sbin/bhyve/acpi.c +++ /dev/null @@ -1,1012 +0,0 @@ -/*- - * Copyright (c) 2012 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -/* - * bhyve ACPI table generator. - * - * Create the minimal set of ACPI tables required to boot FreeBSD (and - * hopefully other o/s's) by writing out ASL template files for each of - * the tables and the compiling them to AML with the Intel iasl compiler. - * The AML files are then read into guest memory. - * - * The tables are placed in the guest's ROM area just below 1MB physical, - * above the MPTable. - * - * Layout - * ------ - * RSDP -> 0xf2400 (36 bytes fixed) - * RSDT -> 0xf2440 (36 bytes + 4*7 table addrs, 4 used) - * XSDT -> 0xf2480 (36 bytes + 8*7 table addrs, 4 used) - * MADT -> 0xf2500 (depends on #CPUs) - * FADT -> 0xf2600 (268 bytes) - * HPET -> 0xf2740 (56 bytes) - * MCFG -> 0xf2780 (60 bytes) - * FACS -> 0xf27C0 (64 bytes) - * DSDT -> 0xf2800 (variable - can go up to 0x100000) - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/errno.h> -#include <sys/stat.h> - -#include <paths.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include <machine/vmm.h> -#include <vmmapi.h> - -#include "bhyverun.h" -#include "acpi.h" -#include "pci_emul.h" - -/* - * Define the base address of the ACPI tables, and the offsets to - * the individual tables - */ -#define BHYVE_ACPI_BASE 0xf2400 -#define RSDT_OFFSET 0x040 -#define XSDT_OFFSET 0x080 -#define MADT_OFFSET 0x100 -#define FADT_OFFSET 0x200 -#define HPET_OFFSET 0x340 -#define MCFG_OFFSET 0x380 -#define FACS_OFFSET 0x3C0 -#define DSDT_OFFSET 0x400 - -#define BHYVE_ASL_TEMPLATE "bhyve.XXXXXXX" -#define BHYVE_ASL_SUFFIX ".aml" -#define BHYVE_ASL_COMPILER "/usr/sbin/iasl" - -static int basl_keep_temps; -static int basl_verbose_iasl; -static int basl_ncpu; -static uint32_t basl_acpi_base = BHYVE_ACPI_BASE; -static uint32_t hpet_capabilities; - -/* - * Contains the full pathname of the template to be passed - * to mkstemp/mktemps(3) - */ -static char basl_template[MAXPATHLEN]; -static char basl_stemplate[MAXPATHLEN]; - -/* - * State for dsdt_line(), dsdt_indent(), and dsdt_unindent(). - */ -static FILE *dsdt_fp; -static int dsdt_indent_level; -static int dsdt_error; - -struct basl_fio { - int fd; - FILE *fp; - char f_name[MAXPATHLEN]; -}; - -#define EFPRINTF(...) \ - err = fprintf(__VA_ARGS__); if (err < 0) goto err_exit; - -#define EFFLUSH(x) \ - err = fflush(x); if (err != 0) goto err_exit; - -static int -basl_fwrite_rsdp(FILE *fp) -{ - int err; - - err = 0; - - EFPRINTF(fp, "/*\n"); - EFPRINTF(fp, " * bhyve RSDP template\n"); - EFPRINTF(fp, " */\n"); - EFPRINTF(fp, "[0008]\t\tSignature : \"RSD PTR \"\n"); - EFPRINTF(fp, "[0001]\t\tChecksum : 43\n"); - EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); - EFPRINTF(fp, "[0001]\t\tRevision : 02\n"); - EFPRINTF(fp, "[0004]\t\tRSDT Address : %08X\n", - basl_acpi_base + RSDT_OFFSET); - EFPRINTF(fp, "[0004]\t\tLength : 00000024\n"); - EFPRINTF(fp, "[0008]\t\tXSDT Address : 00000000%08X\n", - basl_acpi_base + XSDT_OFFSET); - EFPRINTF(fp, "[0001]\t\tExtended Checksum : 00\n"); - EFPRINTF(fp, "[0003]\t\tReserved : 000000\n"); - - EFFLUSH(fp); - - return (0); - -err_exit: - return (errno); -} - -static int -basl_fwrite_rsdt(FILE *fp) -{ - int err; - - err = 0; - - EFPRINTF(fp, "/*\n"); - EFPRINTF(fp, " * bhyve RSDT template\n"); - EFPRINTF(fp, " */\n"); - EFPRINTF(fp, "[0004]\t\tSignature : \"RSDT\"\n"); - EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); - EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); - EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); - EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); - EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVRSDT \"\n"); - EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); - /* iasl will fill in the compiler ID/revision fields */ - EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); - EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); - EFPRINTF(fp, "\n"); - - /* Add in pointers to the MADT, FADT and HPET */ - EFPRINTF(fp, "[0004]\t\tACPI Table Address 0 : %08X\n", - basl_acpi_base + MADT_OFFSET); - EFPRINTF(fp, "[0004]\t\tACPI Table Address 1 : %08X\n", - basl_acpi_base + FADT_OFFSET); - EFPRINTF(fp, "[0004]\t\tACPI Table Address 2 : %08X\n", - basl_acpi_base + HPET_OFFSET); - EFPRINTF(fp, "[0004]\t\tACPI Table Address 3 : %08X\n", - basl_acpi_base + MCFG_OFFSET); - - EFFLUSH(fp); - - return (0); - -err_exit: - return (errno); -} - -static int -basl_fwrite_xsdt(FILE *fp) -{ - int err; - - err = 0; - - EFPRINTF(fp, "/*\n"); - EFPRINTF(fp, " * bhyve XSDT template\n"); - EFPRINTF(fp, " */\n"); - EFPRINTF(fp, "[0004]\t\tSignature : \"XSDT\"\n"); - EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); - EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); - EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); - EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); - EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVXSDT \"\n"); - EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); - /* iasl will fill in the compiler ID/revision fields */ - EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); - EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); - EFPRINTF(fp, "\n"); - - /* Add in pointers to the MADT, FADT and HPET */ - EFPRINTF(fp, "[0004]\t\tACPI Table Address 0 : 00000000%08X\n", - basl_acpi_base + MADT_OFFSET); - EFPRINTF(fp, "[0004]\t\tACPI Table Address 1 : 00000000%08X\n", - basl_acpi_base + FADT_OFFSET); - EFPRINTF(fp, "[0004]\t\tACPI Table Address 2 : 00000000%08X\n", - basl_acpi_base + HPET_OFFSET); - EFPRINTF(fp, "[0004]\t\tACPI Table Address 3 : 00000000%08X\n", - basl_acpi_base + MCFG_OFFSET); - - EFFLUSH(fp); - - return (0); - -err_exit: - return (errno); -} - -static int -basl_fwrite_madt(FILE *fp) -{ - int err; - int i; - - err = 0; - - EFPRINTF(fp, "/*\n"); - EFPRINTF(fp, " * bhyve MADT template\n"); - EFPRINTF(fp, " */\n"); - EFPRINTF(fp, "[0004]\t\tSignature : \"APIC\"\n"); - EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); - EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); - EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); - EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); - EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVMADT \"\n"); - EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); - - /* iasl will fill in the compiler ID/revision fields */ - EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); - EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, "[0004]\t\tLocal Apic Address : FEE00000\n"); - EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n"); - EFPRINTF(fp, "\t\t\tPC-AT Compatibility : 1\n"); - EFPRINTF(fp, "\n"); - - /* Add a Processor Local APIC entry for each CPU */ - for (i = 0; i < basl_ncpu; i++) { - EFPRINTF(fp, "[0001]\t\tSubtable Type : 00\n"); - EFPRINTF(fp, "[0001]\t\tLength : 08\n"); - /* iasl expects hex values for the proc and apic id's */ - EFPRINTF(fp, "[0001]\t\tProcessor ID : %02x\n", i); - EFPRINTF(fp, "[0001]\t\tLocal Apic ID : %02x\n", i); - EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n"); - EFPRINTF(fp, "\t\t\tProcessor Enabled : 1\n"); - EFPRINTF(fp, "\n"); - } - - /* Always a single IOAPIC entry, with ID 0 */ - EFPRINTF(fp, "[0001]\t\tSubtable Type : 01\n"); - EFPRINTF(fp, "[0001]\t\tLength : 0C\n"); - /* iasl expects a hex value for the i/o apic id */ - EFPRINTF(fp, "[0001]\t\tI/O Apic ID : %02x\n", 0); - EFPRINTF(fp, "[0001]\t\tReserved : 00\n"); - EFPRINTF(fp, "[0004]\t\tAddress : fec00000\n"); - EFPRINTF(fp, "[0004]\t\tInterrupt : 00000000\n"); - EFPRINTF(fp, "\n"); - - /* Legacy IRQ0 is connected to pin 2 of the IOAPIC */ - EFPRINTF(fp, "[0001]\t\tSubtable Type : 02\n"); - EFPRINTF(fp, "[0001]\t\tLength : 0A\n"); - EFPRINTF(fp, "[0001]\t\tBus : 00\n"); - EFPRINTF(fp, "[0001]\t\tSource : 00\n"); - EFPRINTF(fp, "[0004]\t\tInterrupt : 00000002\n"); - EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0005\n"); - EFPRINTF(fp, "\t\t\tPolarity : 1\n"); - EFPRINTF(fp, "\t\t\tTrigger Mode : 1\n"); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, "[0001]\t\tSubtable Type : 02\n"); - EFPRINTF(fp, "[0001]\t\tLength : 0A\n"); - EFPRINTF(fp, "[0001]\t\tBus : 00\n"); - EFPRINTF(fp, "[0001]\t\tSource : %02X\n", SCI_INT); - EFPRINTF(fp, "[0004]\t\tInterrupt : %08X\n", SCI_INT); - EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0000\n"); - EFPRINTF(fp, "\t\t\tPolarity : 3\n"); - EFPRINTF(fp, "\t\t\tTrigger Mode : 3\n"); - EFPRINTF(fp, "\n"); - - /* Local APIC NMI is connected to LINT 1 on all CPUs */ - EFPRINTF(fp, "[0001]\t\tSubtable Type : 04\n"); - EFPRINTF(fp, "[0001]\t\tLength : 06\n"); - EFPRINTF(fp, "[0001]\t\tProcessorId : FF\n"); - EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0005\n"); - EFPRINTF(fp, "\t\t\tPolarity : 1\n"); - EFPRINTF(fp, "\t\t\tTrigger Mode : 1\n"); - EFPRINTF(fp, "[0001]\t\tInterrupt : 01\n"); - EFPRINTF(fp, "\n"); - - EFFLUSH(fp); - - return (0); - -err_exit: - return (errno); -} - -static int -basl_fwrite_fadt(FILE *fp) -{ - int err; - - err = 0; - - EFPRINTF(fp, "/*\n"); - EFPRINTF(fp, " * bhyve FADT template\n"); - EFPRINTF(fp, " */\n"); - EFPRINTF(fp, "[0004]\t\tSignature : \"FACP\"\n"); - EFPRINTF(fp, "[0004]\t\tTable Length : 0000010C\n"); - EFPRINTF(fp, "[0001]\t\tRevision : 05\n"); - EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); - EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); - EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVFACP \"\n"); - EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); - /* iasl will fill in the compiler ID/revision fields */ - EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); - EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, "[0004]\t\tFACS Address : %08X\n", - basl_acpi_base + FACS_OFFSET); - EFPRINTF(fp, "[0004]\t\tDSDT Address : %08X\n", - basl_acpi_base + DSDT_OFFSET); - EFPRINTF(fp, "[0001]\t\tModel : 01\n"); - EFPRINTF(fp, "[0001]\t\tPM Profile : 00 [Unspecified]\n"); - EFPRINTF(fp, "[0002]\t\tSCI Interrupt : %04X\n", - SCI_INT); - EFPRINTF(fp, "[0004]\t\tSMI Command Port : %08X\n", - SMI_CMD); - EFPRINTF(fp, "[0001]\t\tACPI Enable Value : %02X\n", - BHYVE_ACPI_ENABLE); - EFPRINTF(fp, "[0001]\t\tACPI Disable Value : %02X\n", - BHYVE_ACPI_DISABLE); - EFPRINTF(fp, "[0001]\t\tS4BIOS Command : 00\n"); - EFPRINTF(fp, "[0001]\t\tP-State Control : 00\n"); - EFPRINTF(fp, "[0004]\t\tPM1A Event Block Address : %08X\n", - PM1A_EVT_ADDR); - EFPRINTF(fp, "[0004]\t\tPM1B Event Block Address : 00000000\n"); - EFPRINTF(fp, "[0004]\t\tPM1A Control Block Address : %08X\n", - PM1A_CNT_ADDR); - EFPRINTF(fp, "[0004]\t\tPM1B Control Block Address : 00000000\n"); - EFPRINTF(fp, "[0004]\t\tPM2 Control Block Address : 00000000\n"); - EFPRINTF(fp, "[0004]\t\tPM Timer Block Address : %08X\n", - IO_PMTMR); - EFPRINTF(fp, "[0004]\t\tGPE0 Block Address : 00000000\n"); - EFPRINTF(fp, "[0004]\t\tGPE1 Block Address : 00000000\n"); - EFPRINTF(fp, "[0001]\t\tPM1 Event Block Length : 04\n"); - EFPRINTF(fp, "[0001]\t\tPM1 Control Block Length : 02\n"); - EFPRINTF(fp, "[0001]\t\tPM2 Control Block Length : 00\n"); - EFPRINTF(fp, "[0001]\t\tPM Timer Block Length : 04\n"); - EFPRINTF(fp, "[0001]\t\tGPE0 Block Length : 00\n"); - EFPRINTF(fp, "[0001]\t\tGPE1 Block Length : 00\n"); - EFPRINTF(fp, "[0001]\t\tGPE1 Base Offset : 00\n"); - EFPRINTF(fp, "[0001]\t\t_CST Support : 00\n"); - EFPRINTF(fp, "[0002]\t\tC2 Latency : 0000\n"); - EFPRINTF(fp, "[0002]\t\tC3 Latency : 0000\n"); - EFPRINTF(fp, "[0002]\t\tCPU Cache Size : 0000\n"); - EFPRINTF(fp, "[0002]\t\tCache Flush Stride : 0000\n"); - EFPRINTF(fp, "[0001]\t\tDuty Cycle Offset : 00\n"); - EFPRINTF(fp, "[0001]\t\tDuty Cycle Width : 00\n"); - EFPRINTF(fp, "[0001]\t\tRTC Day Alarm Index : 00\n"); - EFPRINTF(fp, "[0001]\t\tRTC Month Alarm Index : 00\n"); - EFPRINTF(fp, "[0001]\t\tRTC Century Index : 32\n"); - EFPRINTF(fp, "[0002]\t\tBoot Flags (decoded below) : 0000\n"); - EFPRINTF(fp, "\t\t\tLegacy Devices Supported (V2) : 0\n"); - EFPRINTF(fp, "\t\t\t8042 Present on ports 60/64 (V2) : 0\n"); - EFPRINTF(fp, "\t\t\tVGA Not Present (V4) : 1\n"); - EFPRINTF(fp, "\t\t\tMSI Not Supported (V4) : 0\n"); - EFPRINTF(fp, "\t\t\tPCIe ASPM Not Supported (V4) : 1\n"); - EFPRINTF(fp, "\t\t\tCMOS RTC Not Present (V5) : 0\n"); - EFPRINTF(fp, "[0001]\t\tReserved : 00\n"); - EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000000\n"); - EFPRINTF(fp, "\t\t\tWBINVD instruction is operational (V1) : 1\n"); - EFPRINTF(fp, "\t\t\tWBINVD flushes all caches (V1) : 0\n"); - EFPRINTF(fp, "\t\t\tAll CPUs support C1 (V1) : 1\n"); - EFPRINTF(fp, "\t\t\tC2 works on MP system (V1) : 0\n"); - EFPRINTF(fp, "\t\t\tControl Method Power Button (V1) : 0\n"); - EFPRINTF(fp, "\t\t\tControl Method Sleep Button (V1) : 1\n"); - EFPRINTF(fp, "\t\t\tRTC wake not in fixed reg space (V1) : 0\n"); - EFPRINTF(fp, "\t\t\tRTC can wake system from S4 (V1) : 0\n"); - EFPRINTF(fp, "\t\t\t32-bit PM Timer (V1) : 1\n"); - EFPRINTF(fp, "\t\t\tDocking Supported (V1) : 0\n"); - EFPRINTF(fp, "\t\t\tReset Register Supported (V2) : 1\n"); - EFPRINTF(fp, "\t\t\tSealed Case (V3) : 0\n"); - EFPRINTF(fp, "\t\t\tHeadless - No Video (V3) : 1\n"); - EFPRINTF(fp, "\t\t\tUse native instr after SLP_TYPx (V3) : 0\n"); - EFPRINTF(fp, "\t\t\tPCIEXP_WAK Bits Supported (V4) : 0\n"); - EFPRINTF(fp, "\t\t\tUse Platform Timer (V4) : 0\n"); - EFPRINTF(fp, "\t\t\tRTC_STS valid on S4 wake (V4) : 0\n"); - EFPRINTF(fp, "\t\t\tRemote Power-on capable (V4) : 0\n"); - EFPRINTF(fp, "\t\t\tUse APIC Cluster Model (V4) : 0\n"); - EFPRINTF(fp, "\t\t\tUse APIC Physical Destination Mode (V4) : 1\n"); - EFPRINTF(fp, "\t\t\tHardware Reduced (V5) : 0\n"); - EFPRINTF(fp, "\t\t\tLow Power S0 Idle (V5) : 0\n"); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, - "[0012]\t\tReset Register : [Generic Address Structure]\n"); - EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); - EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); - EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); - EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); - EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000CF9\n"); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, "[0001]\t\tValue to cause reset : 06\n"); - EFPRINTF(fp, "[0002]\t\tARM Flags (decoded below): 0000\n"); - EFPRINTF(fp, "\t\t\tPSCI Compliant : 0\n"); - EFPRINTF(fp, "\t\t\tMust use HVC for PSCI : 0\n"); - EFPRINTF(fp, "[0001]\t\tFADT Minor Revision : 01\n"); - EFPRINTF(fp, "[0008]\t\tFACS Address : 00000000%08X\n", - basl_acpi_base + FACS_OFFSET); - EFPRINTF(fp, "[0008]\t\tDSDT Address : 00000000%08X\n", - basl_acpi_base + DSDT_OFFSET); - EFPRINTF(fp, - "[0012]\t\tPM1A Event Block : [Generic Address Structure]\n"); - EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); - EFPRINTF(fp, "[0001]\t\tBit Width : 20\n"); - EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); - EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n"); - EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n", - PM1A_EVT_ADDR); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, - "[0012]\t\tPM1B Event Block : [Generic Address Structure]\n"); - EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); - EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); - EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); - EFPRINTF(fp, - "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); - EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, - "[0012]\t\tPM1A Control Block : [Generic Address Structure]\n"); - EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); - EFPRINTF(fp, "[0001]\t\tBit Width : 10\n"); - EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); - EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 02 [Word Access:16]\n"); - EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n", - PM1A_CNT_ADDR); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, - "[0012]\t\tPM1B Control Block : [Generic Address Structure]\n"); - EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); - EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); - EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); - EFPRINTF(fp, - "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); - EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, - "[0012]\t\tPM2 Control Block : [Generic Address Structure]\n"); - EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); - EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); - EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); - EFPRINTF(fp, - "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); - EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); - EFPRINTF(fp, "\n"); - - /* Valid for bhyve */ - EFPRINTF(fp, - "[0012]\t\tPM Timer Block : [Generic Address Structure]\n"); - EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); - EFPRINTF(fp, "[0001]\t\tBit Width : 20\n"); - EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); - EFPRINTF(fp, - "[0001]\t\tEncoded Access Width : 03 [DWord Access:32]\n"); - EFPRINTF(fp, "[0008]\t\tAddress : 00000000%08X\n", - IO_PMTMR); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, "[0012]\t\tGPE0 Block : [Generic Address Structure]\n"); - EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); - EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); - EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); - EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); - EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, "[0012]\t\tGPE1 Block : [Generic Address Structure]\n"); - EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); - EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); - EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); - EFPRINTF(fp, - "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); - EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, - "[0012]\t\tSleep Control Register : [Generic Address Structure]\n"); - EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); - EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); - EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); - EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); - EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, - "[0012]\t\tSleep Status Register : [Generic Address Structure]\n"); - EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n"); - EFPRINTF(fp, "[0001]\t\tBit Width : 08\n"); - EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); - EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n"); - EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n"); - - EFFLUSH(fp); - - return (0); - -err_exit: - return (errno); -} - -static int -basl_fwrite_hpet(FILE *fp) -{ - int err; - - err = 0; - - EFPRINTF(fp, "/*\n"); - EFPRINTF(fp, " * bhyve HPET template\n"); - EFPRINTF(fp, " */\n"); - EFPRINTF(fp, "[0004]\t\tSignature : \"HPET\"\n"); - EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); - EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); - EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); - EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); - EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVHPET \"\n"); - EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); - - /* iasl will fill in the compiler ID/revision fields */ - EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); - EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, "[0004]\t\tTimer Block ID : %08X\n", hpet_capabilities); - EFPRINTF(fp, - "[0012]\t\tTimer Block Register : [Generic Address Structure]\n"); - EFPRINTF(fp, "[0001]\t\tSpace ID : 00 [SystemMemory]\n"); - EFPRINTF(fp, "[0001]\t\tBit Width : 00\n"); - EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n"); - EFPRINTF(fp, - "[0001]\t\tEncoded Access Width : 00 [Undefined/Legacy]\n"); - EFPRINTF(fp, "[0008]\t\tAddress : 00000000FED00000\n"); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, "[0001]\t\tHPET Number : 00\n"); - EFPRINTF(fp, "[0002]\t\tMinimum Clock Ticks : 0000\n"); - EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000001\n"); - EFPRINTF(fp, "\t\t\t4K Page Protect : 1\n"); - EFPRINTF(fp, "\t\t\t64K Page Protect : 0\n"); - EFPRINTF(fp, "\n"); - - EFFLUSH(fp); - - return (0); - -err_exit: - return (errno); -} - -static int -basl_fwrite_mcfg(FILE *fp) -{ - int err = 0; - - EFPRINTF(fp, "/*\n"); - EFPRINTF(fp, " * bhyve MCFG template\n"); - EFPRINTF(fp, " */\n"); - EFPRINTF(fp, "[0004]\t\tSignature : \"MCFG\"\n"); - EFPRINTF(fp, "[0004]\t\tTable Length : 00000000\n"); - EFPRINTF(fp, "[0001]\t\tRevision : 01\n"); - EFPRINTF(fp, "[0001]\t\tChecksum : 00\n"); - EFPRINTF(fp, "[0006]\t\tOem ID : \"BHYVE \"\n"); - EFPRINTF(fp, "[0008]\t\tOem Table ID : \"BVMCFG \"\n"); - EFPRINTF(fp, "[0004]\t\tOem Revision : 00000001\n"); - - /* iasl will fill in the compiler ID/revision fields */ - EFPRINTF(fp, "[0004]\t\tAsl Compiler ID : \"xxxx\"\n"); - EFPRINTF(fp, "[0004]\t\tAsl Compiler Revision : 00000000\n"); - EFPRINTF(fp, "[0008]\t\tReserved : 0\n"); - EFPRINTF(fp, "\n"); - - EFPRINTF(fp, "[0008]\t\tBase Address : %016lX\n", pci_ecfg_base()); - EFPRINTF(fp, "[0002]\t\tSegment Group: 0000\n"); - EFPRINTF(fp, "[0001]\t\tStart Bus: 00\n"); - EFPRINTF(fp, "[0001]\t\tEnd Bus: FF\n"); - EFPRINTF(fp, "[0004]\t\tReserved : 0\n"); - EFFLUSH(fp); - return (0); -err_exit: - return (errno); -} - -static int -basl_fwrite_facs(FILE *fp) -{ - int err; - - err = 0; - - EFPRINTF(fp, "/*\n"); - EFPRINTF(fp, " * bhyve FACS template\n"); - EFPRINTF(fp, " */\n"); - EFPRINTF(fp, "[0004]\t\tSignature : \"FACS\"\n"); - EFPRINTF(fp, "[0004]\t\tLength : 00000040\n"); - EFPRINTF(fp, "[0004]\t\tHardware Signature : 00000000\n"); - EFPRINTF(fp, "[0004]\t\t32 Firmware Waking Vector : 00000000\n"); - EFPRINTF(fp, "[0004]\t\tGlobal Lock : 00000000\n"); - EFPRINTF(fp, "[0004]\t\tFlags (decoded below) : 00000000\n"); - EFPRINTF(fp, "\t\t\tS4BIOS Support Present : 0\n"); - EFPRINTF(fp, "\t\t\t64-bit Wake Supported (V2) : 0\n"); - EFPRINTF(fp, - "[0008]\t\t64 Firmware Waking Vector : 0000000000000000\n"); - EFPRINTF(fp, "[0001]\t\tVersion : 02\n"); - EFPRINTF(fp, "[0003]\t\tReserved : 000000\n"); - EFPRINTF(fp, "[0004]\t\tOspmFlags (decoded below) : 00000000\n"); - EFPRINTF(fp, "\t\t\t64-bit Wake Env Required (V2) : 0\n"); - - EFFLUSH(fp); - - return (0); - -err_exit: - return (errno); -} - -/* - * Helper routines for writing to the DSDT from other modules. - */ -void -dsdt_line(const char *fmt, ...) -{ - va_list ap; - int err; - - if (dsdt_error != 0) - return; - - if (strcmp(fmt, "") != 0) { - if (dsdt_indent_level != 0) - EFPRINTF(dsdt_fp, "%*c", dsdt_indent_level * 2, ' '); - va_start(ap, fmt); - if (vfprintf(dsdt_fp, fmt, ap) < 0) - goto err_exit; - va_end(ap); - } - EFPRINTF(dsdt_fp, "\n"); - return; - -err_exit: - dsdt_error = errno; -} - -void -dsdt_indent(int levels) -{ - - dsdt_indent_level += levels; - assert(dsdt_indent_level >= 0); -} - -void -dsdt_unindent(int levels) -{ - - assert(dsdt_indent_level >= levels); - dsdt_indent_level -= levels; -} - -void -dsdt_fixed_ioport(uint16_t iobase, uint16_t length) -{ - - dsdt_line("IO (Decode16,"); - dsdt_line(" 0x%04X, // Range Minimum", iobase); - dsdt_line(" 0x%04X, // Range Maximum", iobase); - dsdt_line(" 0x01, // Alignment"); - dsdt_line(" 0x%02X, // Length", length); - dsdt_line(" )"); -} - -void -dsdt_fixed_irq(uint8_t irq) -{ - - dsdt_line("IRQNoFlags ()"); - dsdt_line(" {%d}", irq); -} - -void -dsdt_fixed_mem32(uint32_t base, uint32_t length) -{ - - dsdt_line("Memory32Fixed (ReadWrite,"); - dsdt_line(" 0x%08X, // Address Base", base); - dsdt_line(" 0x%08X, // Address Length", length); - dsdt_line(" )"); -} - -static int -basl_fwrite_dsdt(FILE *fp) -{ - int err; - - err = 0; - dsdt_fp = fp; - dsdt_error = 0; - dsdt_indent_level = 0; - - dsdt_line("/*"); - dsdt_line(" * bhyve DSDT template"); - dsdt_line(" */"); - dsdt_line("DefinitionBlock (\"bhyve_dsdt.aml\", \"DSDT\", 2," - "\"BHYVE \", \"BVDSDT \", 0x00000001)"); - dsdt_line("{"); - dsdt_line(" Name (_S5, Package ()"); - dsdt_line(" {"); - dsdt_line(" 0x05,"); - dsdt_line(" Zero,"); - dsdt_line(" })"); - - pci_write_dsdt(); - - dsdt_line(""); - dsdt_line(" Scope (_SB.PC00)"); - dsdt_line(" {"); - dsdt_line(" Device (HPET)"); - dsdt_line(" {"); - dsdt_line(" Name (_HID, EISAID(\"PNP0103\"))"); - dsdt_line(" Name (_UID, 0)"); - dsdt_line(" Name (_CRS, ResourceTemplate ()"); - dsdt_line(" {"); - dsdt_indent(4); - dsdt_fixed_mem32(0xFED00000, 0x400); - dsdt_unindent(4); - dsdt_line(" })"); - dsdt_line(" }"); - dsdt_line(" }"); - dsdt_line("}"); - - if (dsdt_error != 0) - return (dsdt_error); - - EFFLUSH(fp); - - return (0); - -err_exit: - return (errno); -} - -static int -basl_open(struct basl_fio *bf, int suffix) -{ - int err; - - err = 0; - - if (suffix) { - strlcpy(bf->f_name, basl_stemplate, MAXPATHLEN); - bf->fd = mkstemps(bf->f_name, strlen(BHYVE_ASL_SUFFIX)); - } else { - strlcpy(bf->f_name, basl_template, MAXPATHLEN); - bf->fd = mkstemp(bf->f_name); - } - - if (bf->fd > 0) { - bf->fp = fdopen(bf->fd, "w+"); - if (bf->fp == NULL) { - unlink(bf->f_name); - close(bf->fd); - } - } else { - err = 1; - } - - return (err); -} - -static void -basl_close(struct basl_fio *bf) -{ - - if (!basl_keep_temps) - unlink(bf->f_name); - fclose(bf->fp); -} - -static int -basl_start(struct basl_fio *in, struct basl_fio *out) -{ - int err; - - err = basl_open(in, 0); - if (!err) { - err = basl_open(out, 1); - if (err) { - basl_close(in); - } - } - - return (err); -} - -static void -basl_end(struct basl_fio *in, struct basl_fio *out) -{ - - basl_close(in); - basl_close(out); -} - -static int -basl_load(struct vmctx *ctx, int fd, uint64_t off) -{ - struct stat sb; - void *gaddr; - - if (fstat(fd, &sb) < 0) - return (errno); - - gaddr = paddr_guest2host(ctx, basl_acpi_base + off, sb.st_size); - if (gaddr == NULL) - return (EFAULT); - - if (read(fd, gaddr, sb.st_size) < 0) - return (errno); - - return (0); -} - -static int -basl_compile(struct vmctx *ctx, int (*fwrite_section)(FILE *), uint64_t offset) -{ - struct basl_fio io[2]; - static char iaslbuf[3*MAXPATHLEN + 10]; - char *fmt; - int err; - - err = basl_start(&io[0], &io[1]); - if (!err) { - err = (*fwrite_section)(io[0].fp); - - if (!err) { - /* - * iasl sends the results of the compilation to - * stdout. Shut this down by using the shell to - * redirect stdout to /dev/null, unless the user - * has requested verbose output for debugging - * purposes - */ - fmt = basl_verbose_iasl ? - "%s -p %s %s" : - "/bin/sh -c \"%s -p %s %s\" 1> /dev/null"; - - snprintf(iaslbuf, sizeof(iaslbuf), - fmt, - BHYVE_ASL_COMPILER, - io[1].f_name, io[0].f_name); - err = system(iaslbuf); - - if (!err) { - /* - * Copy the aml output file into guest - * memory at the specified location - */ - err = basl_load(ctx, io[1].fd, offset); - } - } - basl_end(&io[0], &io[1]); - } - - return (err); -} - -static int -basl_make_templates(void) -{ - const char *tmpdir; - int err; - int len; - - err = 0; - - /* - * - */ - if ((tmpdir = getenv("BHYVE_TMPDIR")) == NULL || *tmpdir == '\0' || - (tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') { - tmpdir = _PATH_TMP; - } - - len = strlen(tmpdir); - - if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1) < MAXPATHLEN) { - strcpy(basl_template, tmpdir); - while (len > 0 && basl_template[len - 1] == '/') - len--; - basl_template[len] = '/'; - strcpy(&basl_template[len + 1], BHYVE_ASL_TEMPLATE); - } else - err = E2BIG; - - if (!err) { - /* - * len has been intialized (and maybe adjusted) above - */ - if ((len + sizeof(BHYVE_ASL_TEMPLATE) + 1 + - sizeof(BHYVE_ASL_SUFFIX)) < MAXPATHLEN) { - strcpy(basl_stemplate, tmpdir); - basl_stemplate[len] = '/'; - strcpy(&basl_stemplate[len + 1], BHYVE_ASL_TEMPLATE); - len = strlen(basl_stemplate); - strcpy(&basl_stemplate[len], BHYVE_ASL_SUFFIX); - } else - err = E2BIG; - } - - return (err); -} - -static struct { - int (*wsect)(FILE *fp); - uint64_t offset; -} basl_ftables[] = -{ - { basl_fwrite_rsdp, 0}, - { basl_fwrite_rsdt, RSDT_OFFSET }, - { basl_fwrite_xsdt, XSDT_OFFSET }, - { basl_fwrite_madt, MADT_OFFSET }, - { basl_fwrite_fadt, FADT_OFFSET }, - { basl_fwrite_hpet, HPET_OFFSET }, - { basl_fwrite_mcfg, MCFG_OFFSET }, - { basl_fwrite_facs, FACS_OFFSET }, - { basl_fwrite_dsdt, DSDT_OFFSET }, - { NULL } -}; - -int -acpi_build(struct vmctx *ctx, int ncpu) -{ - int err; - int i; - - basl_ncpu = ncpu; - - err = vm_get_hpet_capabilities(ctx, &hpet_capabilities); - if (err != 0) - return (err); - - /* - * For debug, allow the user to have iasl compiler output sent - * to stdout rather than /dev/null - */ - if (getenv("BHYVE_ACPI_VERBOSE_IASL")) - basl_verbose_iasl = 1; - - /* - * Allow the user to keep the generated ASL files for debugging - * instead of deleting them following use - */ - if (getenv("BHYVE_ACPI_KEEPTMPS")) - basl_keep_temps = 1; - - i = 0; - err = basl_make_templates(); - - /* - * Run through all the ASL files, compiling them and - * copying them into guest memory - */ - while (!err && basl_ftables[i].wsect != NULL) { - err = basl_compile(ctx, basl_ftables[i].wsect, - basl_ftables[i].offset); - i++; - } - - return (err); -} diff --git a/usr.sbin/bhyve/acpi.h b/usr.sbin/bhyve/acpi.h deleted file mode 100644 index 652164a..0000000 --- a/usr.sbin/bhyve/acpi.h +++ /dev/null @@ -1,54 +0,0 @@ -/*- - * Copyright (c) 2012 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _ACPI_H_ -#define _ACPI_H_ - -#define SCI_INT 9 - -#define SMI_CMD 0xb2 -#define BHYVE_ACPI_ENABLE 0xa0 -#define BHYVE_ACPI_DISABLE 0xa1 - -#define PM1A_EVT_ADDR 0x400 -#define PM1A_CNT_ADDR 0x404 - -#define IO_PMTMR 0x408 /* 4-byte i/o port for the timer */ - -struct vmctx; - -int acpi_build(struct vmctx *ctx, int ncpu); -void dsdt_line(const char *fmt, ...); -void dsdt_fixed_ioport(uint16_t iobase, uint16_t length); -void dsdt_fixed_irq(uint8_t irq); -void dsdt_fixed_mem32(uint32_t base, uint32_t length); -void dsdt_indent(int levels); -void dsdt_unindent(int levels); -void sci_init(struct vmctx *ctx); - -#endif /* _ACPI_H_ */ diff --git a/usr.sbin/bhyve/ahci.h b/usr.sbin/bhyve/ahci.h deleted file mode 100644 index 1fd9f20..0000000 --- a/usr.sbin/bhyve/ahci.h +++ /dev/null @@ -1,322 +0,0 @@ -/*- - * Copyright (c) 1998 - 2008 Søren Schmidt <sos@FreeBSD.org> - * Copyright (c) 2009-2012 Alexander Motin <mav@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer, - * without modification, immediately at the beginning of the file. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _AHCI_H_ -#define _AHCI_H_ - -/* ATA register defines */ -#define ATA_DATA 0 /* (RW) data */ - -#define ATA_FEATURE 1 /* (W) feature */ -#define ATA_F_DMA 0x01 /* enable DMA */ -#define ATA_F_OVL 0x02 /* enable overlap */ - -#define ATA_COUNT 2 /* (W) sector count */ - -#define ATA_SECTOR 3 /* (RW) sector # */ -#define ATA_CYL_LSB 4 /* (RW) cylinder# LSB */ -#define ATA_CYL_MSB 5 /* (RW) cylinder# MSB */ -#define ATA_DRIVE 6 /* (W) Sector/Drive/Head */ -#define ATA_D_LBA 0x40 /* use LBA addressing */ -#define ATA_D_IBM 0xa0 /* 512 byte sectors, ECC */ - -#define ATA_COMMAND 7 /* (W) command */ - -#define ATA_ERROR 8 /* (R) error */ -#define ATA_E_ILI 0x01 /* illegal length */ -#define ATA_E_NM 0x02 /* no media */ -#define ATA_E_ABORT 0x04 /* command aborted */ -#define ATA_E_MCR 0x08 /* media change request */ -#define ATA_E_IDNF 0x10 /* ID not found */ -#define ATA_E_MC 0x20 /* media changed */ -#define ATA_E_UNC 0x40 /* uncorrectable data */ -#define ATA_E_ICRC 0x80 /* UDMA crc error */ -#define ATA_E_ATAPI_SENSE_MASK 0xf0 /* ATAPI sense key mask */ - -#define ATA_IREASON 9 /* (R) interrupt reason */ -#define ATA_I_CMD 0x01 /* cmd (1) | data (0) */ -#define ATA_I_IN 0x02 /* read (1) | write (0) */ -#define ATA_I_RELEASE 0x04 /* released bus (1) */ -#define ATA_I_TAGMASK 0xf8 /* tag mask */ - -#define ATA_STATUS 10 /* (R) status */ -#define ATA_ALTSTAT 11 /* (R) alternate status */ -#define ATA_S_ERROR 0x01 /* error */ -#define ATA_S_INDEX 0x02 /* index */ -#define ATA_S_CORR 0x04 /* data corrected */ -#define ATA_S_DRQ 0x08 /* data request */ -#define ATA_S_DSC 0x10 /* drive seek completed */ -#define ATA_S_SERVICE 0x10 /* drive needs service */ -#define ATA_S_DWF 0x20 /* drive write fault */ -#define ATA_S_DMA 0x20 /* DMA ready */ -#define ATA_S_READY 0x40 /* drive ready */ -#define ATA_S_BUSY 0x80 /* busy */ - -#define ATA_CONTROL 12 /* (W) control */ -#define ATA_A_IDS 0x02 /* disable interrupts */ -#define ATA_A_RESET 0x04 /* RESET controller */ -#define ATA_A_4BIT 0x08 /* 4 head bits */ -#define ATA_A_HOB 0x80 /* High Order Byte enable */ - -/* SATA register defines */ -#define ATA_SSTATUS 13 -#define ATA_SS_DET_MASK 0x0000000f -#define ATA_SS_DET_NO_DEVICE 0x00000000 -#define ATA_SS_DET_DEV_PRESENT 0x00000001 -#define ATA_SS_DET_PHY_ONLINE 0x00000003 -#define ATA_SS_DET_PHY_OFFLINE 0x00000004 - -#define ATA_SS_SPD_MASK 0x000000f0 -#define ATA_SS_SPD_NO_SPEED 0x00000000 -#define ATA_SS_SPD_GEN1 0x00000010 -#define ATA_SS_SPD_GEN2 0x00000020 -#define ATA_SS_SPD_GEN3 0x00000030 - -#define ATA_SS_IPM_MASK 0x00000f00 -#define ATA_SS_IPM_NO_DEVICE 0x00000000 -#define ATA_SS_IPM_ACTIVE 0x00000100 -#define ATA_SS_IPM_PARTIAL 0x00000200 -#define ATA_SS_IPM_SLUMBER 0x00000600 -#define ATA_SS_IPM_DEVSLEEP 0x00000800 - -#define ATA_SERROR 14 -#define ATA_SE_DATA_CORRECTED 0x00000001 -#define ATA_SE_COMM_CORRECTED 0x00000002 -#define ATA_SE_DATA_ERR 0x00000100 -#define ATA_SE_COMM_ERR 0x00000200 -#define ATA_SE_PROT_ERR 0x00000400 -#define ATA_SE_HOST_ERR 0x00000800 -#define ATA_SE_PHY_CHANGED 0x00010000 -#define ATA_SE_PHY_IERROR 0x00020000 -#define ATA_SE_COMM_WAKE 0x00040000 -#define ATA_SE_DECODE_ERR 0x00080000 -#define ATA_SE_PARITY_ERR 0x00100000 -#define ATA_SE_CRC_ERR 0x00200000 -#define ATA_SE_HANDSHAKE_ERR 0x00400000 -#define ATA_SE_LINKSEQ_ERR 0x00800000 -#define ATA_SE_TRANSPORT_ERR 0x01000000 -#define ATA_SE_UNKNOWN_FIS 0x02000000 -#define ATA_SE_EXCHANGED 0x04000000 - -#define ATA_SCONTROL 15 -#define ATA_SC_DET_MASK 0x0000000f -#define ATA_SC_DET_IDLE 0x00000000 -#define ATA_SC_DET_RESET 0x00000001 -#define ATA_SC_DET_DISABLE 0x00000004 - -#define ATA_SC_SPD_MASK 0x000000f0 -#define ATA_SC_SPD_NO_SPEED 0x00000000 -#define ATA_SC_SPD_SPEED_GEN1 0x00000010 -#define ATA_SC_SPD_SPEED_GEN2 0x00000020 -#define ATA_SC_SPD_SPEED_GEN3 0x00000030 - -#define ATA_SC_IPM_MASK 0x00000f00 -#define ATA_SC_IPM_NONE 0x00000000 -#define ATA_SC_IPM_DIS_PARTIAL 0x00000100 -#define ATA_SC_IPM_DIS_SLUMBER 0x00000200 -#define ATA_SC_IPM_DIS_DEVSLEEP 0x00000400 - -#define ATA_SACTIVE 16 - -#define AHCI_MAX_PORTS 32 -#define AHCI_MAX_SLOTS 32 -#define AHCI_MAX_IRQS 16 - -/* SATA AHCI v1.0 register defines */ -#define AHCI_CAP 0x00 -#define AHCI_CAP_NPMASK 0x0000001f -#define AHCI_CAP_SXS 0x00000020 -#define AHCI_CAP_EMS 0x00000040 -#define AHCI_CAP_CCCS 0x00000080 -#define AHCI_CAP_NCS 0x00001F00 -#define AHCI_CAP_NCS_SHIFT 8 -#define AHCI_CAP_PSC 0x00002000 -#define AHCI_CAP_SSC 0x00004000 -#define AHCI_CAP_PMD 0x00008000 -#define AHCI_CAP_FBSS 0x00010000 -#define AHCI_CAP_SPM 0x00020000 -#define AHCI_CAP_SAM 0x00080000 -#define AHCI_CAP_ISS 0x00F00000 -#define AHCI_CAP_ISS_SHIFT 20 -#define AHCI_CAP_SCLO 0x01000000 -#define AHCI_CAP_SAL 0x02000000 -#define AHCI_CAP_SALP 0x04000000 -#define AHCI_CAP_SSS 0x08000000 -#define AHCI_CAP_SMPS 0x10000000 -#define AHCI_CAP_SSNTF 0x20000000 -#define AHCI_CAP_SNCQ 0x40000000 -#define AHCI_CAP_64BIT 0x80000000 - -#define AHCI_GHC 0x04 -#define AHCI_GHC_AE 0x80000000 -#define AHCI_GHC_MRSM 0x00000004 -#define AHCI_GHC_IE 0x00000002 -#define AHCI_GHC_HR 0x00000001 - -#define AHCI_IS 0x08 -#define AHCI_PI 0x0c -#define AHCI_VS 0x10 - -#define AHCI_CCCC 0x14 -#define AHCI_CCCC_TV_MASK 0xffff0000 -#define AHCI_CCCC_TV_SHIFT 16 -#define AHCI_CCCC_CC_MASK 0x0000ff00 -#define AHCI_CCCC_CC_SHIFT 8 -#define AHCI_CCCC_INT_MASK 0x000000f8 -#define AHCI_CCCC_INT_SHIFT 3 -#define AHCI_CCCC_EN 0x00000001 -#define AHCI_CCCP 0x18 - -#define AHCI_EM_LOC 0x1C -#define AHCI_EM_CTL 0x20 -#define AHCI_EM_MR 0x00000001 -#define AHCI_EM_TM 0x00000100 -#define AHCI_EM_RST 0x00000200 -#define AHCI_EM_LED 0x00010000 -#define AHCI_EM_SAFTE 0x00020000 -#define AHCI_EM_SES2 0x00040000 -#define AHCI_EM_SGPIO 0x00080000 -#define AHCI_EM_SMB 0x01000000 -#define AHCI_EM_XMT 0x02000000 -#define AHCI_EM_ALHD 0x04000000 -#define AHCI_EM_PM 0x08000000 - -#define AHCI_CAP2 0x24 -#define AHCI_CAP2_BOH 0x00000001 -#define AHCI_CAP2_NVMP 0x00000002 -#define AHCI_CAP2_APST 0x00000004 -#define AHCI_CAP2_SDS 0x00000008 -#define AHCI_CAP2_SADM 0x00000010 -#define AHCI_CAP2_DESO 0x00000020 - -#define AHCI_OFFSET 0x100 -#define AHCI_STEP 0x80 - -#define AHCI_P_CLB 0x00 -#define AHCI_P_CLBU 0x04 -#define AHCI_P_FB 0x08 -#define AHCI_P_FBU 0x0c -#define AHCI_P_IS 0x10 -#define AHCI_P_IE 0x14 -#define AHCI_P_IX_DHR 0x00000001 -#define AHCI_P_IX_PS 0x00000002 -#define AHCI_P_IX_DS 0x00000004 -#define AHCI_P_IX_SDB 0x00000008 -#define AHCI_P_IX_UF 0x00000010 -#define AHCI_P_IX_DP 0x00000020 -#define AHCI_P_IX_PC 0x00000040 -#define AHCI_P_IX_MP 0x00000080 - -#define AHCI_P_IX_PRC 0x00400000 -#define AHCI_P_IX_IPM 0x00800000 -#define AHCI_P_IX_OF 0x01000000 -#define AHCI_P_IX_INF 0x04000000 -#define AHCI_P_IX_IF 0x08000000 -#define AHCI_P_IX_HBD 0x10000000 -#define AHCI_P_IX_HBF 0x20000000 -#define AHCI_P_IX_TFE 0x40000000 -#define AHCI_P_IX_CPD 0x80000000 - -#define AHCI_P_CMD 0x18 -#define AHCI_P_CMD_ST 0x00000001 -#define AHCI_P_CMD_SUD 0x00000002 -#define AHCI_P_CMD_POD 0x00000004 -#define AHCI_P_CMD_CLO 0x00000008 -#define AHCI_P_CMD_FRE 0x00000010 -#define AHCI_P_CMD_CCS_MASK 0x00001f00 -#define AHCI_P_CMD_CCS_SHIFT 8 -#define AHCI_P_CMD_ISS 0x00002000 -#define AHCI_P_CMD_FR 0x00004000 -#define AHCI_P_CMD_CR 0x00008000 -#define AHCI_P_CMD_CPS 0x00010000 -#define AHCI_P_CMD_PMA 0x00020000 -#define AHCI_P_CMD_HPCP 0x00040000 -#define AHCI_P_CMD_MPSP 0x00080000 -#define AHCI_P_CMD_CPD 0x00100000 -#define AHCI_P_CMD_ESP 0x00200000 -#define AHCI_P_CMD_FBSCP 0x00400000 -#define AHCI_P_CMD_APSTE 0x00800000 -#define AHCI_P_CMD_ATAPI 0x01000000 -#define AHCI_P_CMD_DLAE 0x02000000 -#define AHCI_P_CMD_ALPE 0x04000000 -#define AHCI_P_CMD_ASP 0x08000000 -#define AHCI_P_CMD_ICC_MASK 0xf0000000 -#define AHCI_P_CMD_NOOP 0x00000000 -#define AHCI_P_CMD_ACTIVE 0x10000000 -#define AHCI_P_CMD_PARTIAL 0x20000000 -#define AHCI_P_CMD_SLUMBER 0x60000000 -#define AHCI_P_CMD_DEVSLEEP 0x80000000 - -#define AHCI_P_TFD 0x20 -#define AHCI_P_SIG 0x24 -#define AHCI_P_SSTS 0x28 -#define AHCI_P_SCTL 0x2c -#define AHCI_P_SERR 0x30 -#define AHCI_P_SACT 0x34 -#define AHCI_P_CI 0x38 -#define AHCI_P_SNTF 0x3C -#define AHCI_P_FBS 0x40 -#define AHCI_P_FBS_EN 0x00000001 -#define AHCI_P_FBS_DEC 0x00000002 -#define AHCI_P_FBS_SDE 0x00000004 -#define AHCI_P_FBS_DEV 0x00000f00 -#define AHCI_P_FBS_DEV_SHIFT 8 -#define AHCI_P_FBS_ADO 0x0000f000 -#define AHCI_P_FBS_ADO_SHIFT 12 -#define AHCI_P_FBS_DWE 0x000f0000 -#define AHCI_P_FBS_DWE_SHIFT 16 -#define AHCI_P_DEVSLP 0x44 -#define AHCI_P_DEVSLP_ADSE 0x00000001 -#define AHCI_P_DEVSLP_DSP 0x00000002 -#define AHCI_P_DEVSLP_DETO 0x000003fc -#define AHCI_P_DEVSLP_DETO_SHIFT 2 -#define AHCI_P_DEVSLP_MDAT 0x00007c00 -#define AHCI_P_DEVSLP_MDAT_SHIFT 10 -#define AHCI_P_DEVSLP_DITO 0x01ff8000 -#define AHCI_P_DEVSLP_DITO_SHIFT 15 -#define AHCI_P_DEVSLP_DM 0x0e000000 -#define AHCI_P_DEVSLP_DM_SHIFT 25 - -/* Just to be sure, if building as module. */ -#if MAXPHYS < 512 * 1024 -#undef MAXPHYS -#define MAXPHYS 512 * 1024 -#endif -/* Pessimistic prognosis on number of required S/G entries */ -#define AHCI_SG_ENTRIES (roundup(btoc(MAXPHYS) + 1, 8)) -/* Command list. 32 commands. First, 1Kbyte aligned. */ -#define AHCI_CL_OFFSET 0 -#define AHCI_CL_SIZE 32 -/* Command tables. Up to 32 commands, Each, 128byte aligned. */ -#define AHCI_CT_OFFSET (AHCI_CL_OFFSET + AHCI_CL_SIZE * AHCI_MAX_SLOTS) -#define AHCI_CT_SIZE (128 + AHCI_SG_ENTRIES * 16) -/* Total main work area. */ -#define AHCI_WORK_SIZE (AHCI_CT_OFFSET + AHCI_CT_SIZE * ch->numslots) - -#endif /* _AHCI_H_ */ diff --git a/usr.sbin/bhyve/atkbdc.c b/usr.sbin/bhyve/atkbdc.c deleted file mode 100644 index 930b7af..0000000 --- a/usr.sbin/bhyve/atkbdc.c +++ /dev/null @@ -1,90 +0,0 @@ -/*- - * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> - -#include <machine/vmm.h> - -#include <vmmapi.h> - -#include <assert.h> -#include <errno.h> -#include <stdio.h> - -#include "inout.h" -#include "pci_lpc.h" - -#define KBD_DATA_PORT 0x60 - -#define KBD_STS_CTL_PORT 0x64 -#define KBD_SYS_FLAG 0x4 - -#define KBDC_RESET 0xfe - -static int -atkbdc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - if (bytes != 1) - return (-1); - - *eax = 0; - - return (0); -} - -static int -atkbdc_sts_ctl_handler(struct vmctx *ctx, int vcpu, int in, int port, - int bytes, uint32_t *eax, void *arg) -{ - int error, retval; - - if (bytes != 1) - return (-1); - - retval = 0; - if (in) { - *eax = KBD_SYS_FLAG; /* system passed POST */ - } else { - switch (*eax) { - case KBDC_RESET: /* Pulse "reset" line. */ - error = vm_suspend(ctx, VM_SUSPEND_RESET); - assert(error == 0 || errno == EALREADY); - break; - } - } - - return (retval); -} - -INOUT_PORT(atkdbc, KBD_DATA_PORT, IOPORT_F_INOUT, atkbdc_data_handler); -SYSRES_IO(KBD_DATA_PORT, 1); -INOUT_PORT(atkbdc, KBD_STS_CTL_PORT, IOPORT_F_INOUT, - atkbdc_sts_ctl_handler); -SYSRES_IO(KBD_STS_CTL_PORT, 1); diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 deleted file mode 100644 index 5bd2092..0000000 --- a/usr.sbin/bhyve/bhyve.8 +++ /dev/null @@ -1,373 +0,0 @@ -.\" Copyright (c) 2013 Peter Grehan -.\" All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" $FreeBSD$ -.\" -.Dd April 18, 2016 -.Dt BHYVE 8 -.Os -.Sh NAME -.Nm bhyve -.Nd "run a guest operating system inside a virtual machine" -.Sh SYNOPSIS -.Nm -.Op Fl abehuwxACHPSWY -.Op Fl c Ar numcpus -.Op Fl g Ar gdbport -.Op Fl l Ar lpcdev Ns Op , Ns Ar conf -.Op Fl m Ar size Ns Op Ar K|k|M|m|G|g|T|t -.Op Fl p Ar vcpu:hostcpu -.Op Fl s Ar slot,emulation Ns Op , Ns Ar conf -.Op Fl U Ar uuid -.Ar vmname -.Sh DESCRIPTION -.Nm -is a hypervisor that runs guest operating systems inside a -virtual machine. -.Pp -Parameters such as the number of virtual CPUs, amount of guest memory, and -I/O connectivity can be specified with command-line parameters. -.Pp -The guest operating system must be loaded with -.Xr bhyveload 8 -or a similar boot loader before running -.Nm . -.Pp -.Nm -runs until the guest operating system reboots or an unhandled hypervisor -exit is detected. -.Sh OPTIONS -.Bl -tag -width 10n -.It Fl a -The guest's local APIC is configured in xAPIC mode. -The xAPIC mode is the default setting so this option is redundant. -It will be deprecated in a future version. -.It Fl A -Generate ACPI tables. -Required for -.Fx Ns /amd64 -guests. -.It Fl b -Enable a low-level console device supported by -.Fx -kernels compiled with -.Cd "device bvmconsole" . -This option will be deprecated in a future version. -.It Fl c Ar numcpus -Number of guest virtual CPUs. -The default is 1 and the maximum is 16. -.It Fl C -Include guest memory in core file. -.It Fl e -Force -.Nm -to exit when a guest issues an access to an I/O port that is not emulated. -This is intended for debug purposes. -.It Fl g Ar gdbport -For -.Fx -kernels compiled with -.Cd "device bvmdebug" , -allow a remote kernel kgdb to be relayed to the guest kernel gdb stub -via a local IPv4 address and this port. -This option will be deprecated in a future version. -.It Fl h -Print help message and exit. -.It Fl H -Yield the virtual CPU thread when a HLT instruction is detected. -If this option is not specified, virtual CPUs will use 100% of a host CPU. -.It Fl l Ar lpcdev Ns Op , Ns Ar conf -Allow devices behind the LPC PCI-ISA bridge to be configured. -The only supported devices are the TTY-class devices -.Ar com1 -and -.Ar com2 -and the boot ROM device -.Ar bootrom . -.It Fl m Ar size Ns Op Ar K|k|M|m|G|g|T|t -Guest physical memory size in bytes. -This must be the same size that was given to -.Xr bhyveload 8 . -.Pp -The size argument may be suffixed with one of K, M, G or T (either upper -or lower case) to indicate a multiple of kilobytes, megabytes, gigabytes, -or terabytes. -If no suffix is given, the value is assumed to be in megabytes. -.It Fl p Ar vcpu:hostcpu -Pin guest's virtual CPU -.Em vcpu -to -.Em hostcpu . -.It Fl P -Force the guest virtual CPU to exit when a PAUSE instruction is detected. -.It Fl s Ar slot,emulation Ns Op , Ns Ar conf -Configure a virtual PCI slot and function. -.Pp -.Nm -provides PCI bus emulation and virtual devices that can be attached to -slots on the bus. -There are 32 available slots, with the option of providing up to 8 functions -per slot. -.Bl -tag -width 10n -.It Ar slot -.Ar pcislot[:function] -.Ar bus:pcislot:function -.Pp -The -.Ar pcislot -value is 0 to 31. -The optional -.Ar function -value is 0 to 7. -The optional -.Ar bus -value is 0 to 255. -If not specified, the -.Ar function -value defaults to 0. -If not specified, the -.Ar bus -value defaults to 0. -.It Ar emulation -.Bl -tag -width 10n -.It Li hostbridge | Li amd_hostbridge -.Pp -Provide a simple host bridge. -This is usually configured at slot 0, and is required by most guest -operating systems. -The -.Li amd_hostbridge -emulation is identical but uses a PCI vendor ID of -.Li AMD . -.It Li passthru -PCI pass-through device. -.It Li virtio-net -Virtio network interface. -.It Li virtio-blk -Virtio block storage interface. -.It Li virtio-rnd -Virtio RNG interface. -.It Li ahci-cd -AHCI controller attached to an ATAPI CD/DVD. -.It Li ahci-hd -AHCI controller attached to a SATA hard-drive. -.It Li uart -PCI 16550 serial device. -.It Li lpc -LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports and a boot ROM. -The LPC bridge emulation can only be configured on bus 0. -.El -.It Op Ar conf -This optional parameter describes the backend for device emulations. -If -.Ar conf -is not specified, the device emulation has no backend and can be -considered unconnected. -.Pp -Network devices: -.Bl -tag -width 10n -.It Ar tapN Ns Op , Ns Ar mac=xx:xx:xx:xx:xx:xx -.It Ar vmnetN Ns Op , Ns Ar mac=xx:xx:xx:xx:xx:xx -.Pp -If -.Ar mac -is not specified, the MAC address is derived from a fixed OUI and the -remaining bytes from an MD5 hash of the slot and function numbers and -the device name. -.Pp -The MAC address is an ASCII string in -.Xr ethers 5 -format. -.El -.Pp -Block storage devices: -.Bl -tag -width 10n -.It Pa /filename Ns Oo , Ns Ar block-device-options Oc -.It Pa /dev/xxx Ns Oo , Ns Ar block-device-options Oc -.El -.Pp -The -.Ar block-device-options -are: -.Bl -tag -width 8n -.It Li nocache -Open the file with -.Dv O_DIRECT . -.It Li direct -Open the file using -.Dv O_SYNC . -.It Li ro -Force the file to be opened read-only. -.It Li sectorsize= Ns Ar logical Ns Oo / Ns Ar physical Oc -Specify the logical and physical sector sizes of the emulated disk. -The physical sector size is optional and is equal to the logical sector size -if not explicitly specified. -.El -.Pp -TTY devices: -.Bl -tag -width 10n -.It Li stdio -Connect the serial port to the standard input and output of -the -.Nm -process. -.It Pa /dev/xxx -Use the host TTY device for serial port I/O. -.El -.Pp -Boot ROM device: -.Bl -tag -width 10n -.It Pa romfile -Map -.Ar romfile -in the guest address space reserved for boot firmware. -.El -.Pp -Pass-through devices: -.Bl -tag -width 10n -.It Ns Ar slot Ns / Ns Ar bus Ns / Ns Ar function -Connect to a PCI device on the host at the selector described by -.Ar slot , -.Ar bus , -and -.Ar function -numbers. -.El -.Pp -Guest memory must be wired using the -.Fl S -option when a pass-through device is configured. -.Pp -The host device must have been reserved at boot-time using the -.Va pptdev -loader variable as described in -.Xr vmm 4 . -.El -.It Fl S -Wire guest memory. -.It Fl u -RTC keeps UTC time. -.It Fl U Ar uuid -Set the universally unique identifier -.Pq UUID -in the guest's System Management BIOS System Information structure. -By default a UUID is generated from the host's hostname and -.Ar vmname . -.It Fl w -Ignore accesses to unimplemented Model Specific Registers (MSRs). -This is intended for debug purposes. -.It Fl W -Force virtio PCI device emulations to use MSI interrupts instead of MSI-X -interrupts. -.It Fl x -The guest's local APIC is configured in x2APIC mode. -.It Fl Y -Disable MPtable generation. -.It Ar vmname -Alphanumeric name of the guest. -This should be the same as that created by -.Xr bhyveload 8 . -.El -.Sh SIGNAL HANDLING -.Nm -deals with the following signals: -.Pp -.Bl -tag -width indent -compact -.It SIGTERM -Trigger ACPI poweroff for a VM -.El -.Sh EXIT STATUS -Exit status indicates how the VM was terminated: -.Pp -.Bl -tag -width indent -compact -.It 0 -rebooted -.It 1 -powered off -.It 2 -halted -.It 3 -triple fault -.El -.Sh EXAMPLES -The guest operating system must have been loaded with -.Xr bhyveload 8 -or a similar boot loader before -.Xr bhyve 4 -can be run. -.Pp -To run a virtual machine with 1GB of memory, two virtual CPUs, a virtio -block device backed by the -.Pa /my/image -filesystem image, and a serial port for the console: -.Bd -literal -offset indent -bhyve -c 2 -s 0,hostbridge -s 1,lpc -s 2,virtio-blk,/my/image \\ - -l com1,stdio -A -H -P -m 1G vm1 -.Ed -.Pp -Run a 24GB single-CPU virtual machine with three network ports, one of which -has a MAC address specified: -.Bd -literal -offset indent -bhyve -s 0,hostbridge -s 1,lpc -s 2:0,virtio-net,tap0 \\ - -s 2:1,virtio-net,tap1 \\ - -s 2:2,virtio-net,tap2,mac=00:be:fa:76:45:00 \\ - -s 3,virtio-blk,/my/image -l com1,stdio \\ - -A -H -P -m 24G bigvm -.Ed -.Pp -Run an 8GB quad-CPU virtual machine with 8 AHCI SATA disks, an AHCI ATAPI -CD-ROM, a single virtio network port, an AMD hostbridge, and the console -port connected to an -.Xr nmdm 4 -null-modem device. -.Bd -literal -offset indent -bhyve -c 4 \\ - -s 0,amd_hostbridge -s 1,lpc \\ - -s 1:0,ahci-hd,/images/disk.1 \\ - -s 1:1,ahci-hd,/images/disk.2 \\ - -s 1:2,ahci-hd,/images/disk.3 \\ - -s 1:3,ahci-hd,/images/disk.4 \\ - -s 1:4,ahci-hd,/images/disk.5 \\ - -s 1:5,ahci-hd,/images/disk.6 \\ - -s 1:6,ahci-hd,/images/disk.7 \\ - -s 1:7,ahci-hd,/images/disk.8 \\ - -s 2,ahci-cd,/images/install.iso \\ - -s 3,virtio-net,tap0 \\ - -l com1,/dev/nmdm0A \\ - -A -H -P -m 8G -.Ed -.Sh SEE ALSO -.Xr bhyve 4 , -.Xr nmdm 4 , -.Xr vmm 4 , -.Xr ethers 5 , -.Xr bhyvectl 8 , -.Xr bhyveload 8 -.Sh HISTORY -.Nm -first appeared in -.Fx 10.0 . -.Sh AUTHORS -.An Neel Natu Aq Mt neel@freebsd.org -.An Peter Grehan Aq Mt grehan@freebsd.org diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c deleted file mode 100644 index bfa135b..0000000 --- a/usr.sbin/bhyve/bhyverun.c +++ /dev/null @@ -1,971 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> -#include <sys/mman.h> -#include <sys/time.h> - -#include <machine/atomic.h> -#include <machine/segments.h> - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <err.h> -#include <libgen.h> -#include <unistd.h> -#include <assert.h> -#include <errno.h> -#include <pthread.h> -#include <pthread_np.h> -#include <sysexits.h> -#include <stdbool.h> - -#include <machine/vmm.h> -#include <vmmapi.h> - -#include "bhyverun.h" -#include "acpi.h" -#include "inout.h" -#include "dbgport.h" -#include "fwctl.h" -#include "ioapic.h" -#include "mem.h" -#include "mevent.h" -#include "mptbl.h" -#include "pci_emul.h" -#include "pci_irq.h" -#include "pci_lpc.h" -#include "smbiostbl.h" -#include "xmsr.h" -#include "spinup_ap.h" -#include "rtc.h" - -#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */ - -#define MB (1024UL * 1024) -#define GB (1024UL * MB) - -typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu); -extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu); - -char *vmname; - -int guest_ncpus; -char *guest_uuid_str; - -static int guest_vmexit_on_hlt, guest_vmexit_on_pause; -static int virtio_msix = 1; -static int x2apic_mode = 0; /* default is xAPIC */ - -static int strictio; -static int strictmsr = 1; - -static int acpi; - -static char *progname; -static const int BSP = 0; - -static cpuset_t cpumask; - -static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip); - -static struct vm_exit vmexit[VM_MAXCPU]; - -struct bhyvestats { - uint64_t vmexit_bogus; - uint64_t vmexit_reqidle; - uint64_t vmexit_hlt; - uint64_t vmexit_pause; - uint64_t vmexit_mtrap; - uint64_t vmexit_inst_emul; - uint64_t cpu_switch_rotate; - uint64_t cpu_switch_direct; -} stats; - -struct mt_vmm_info { - pthread_t mt_thr; - struct vmctx *mt_ctx; - int mt_vcpu; -} mt_vmm_info[VM_MAXCPU]; - -static cpuset_t *vcpumap[VM_MAXCPU] = { NULL }; - -static void -usage(int code) -{ - - fprintf(stderr, - "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" - " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n" - " -a: local apic is in xAPIC mode (deprecated)\n" - " -A: create ACPI tables\n" - " -c: # cpus (default 1)\n" - " -C: include guest memory in core file\n" - " -e: exit on unhandled I/O access\n" - " -g: gdb port\n" - " -h: help\n" - " -H: vmexit from the guest on hlt\n" - " -l: LPC device configuration\n" - " -m: memory size in MB\n" - " -p: pin 'vcpu' to 'hostcpu'\n" - " -P: vmexit from the guest on pause\n" - " -s: <slot,driver,configinfo> PCI slot config\n" - " -S: guest memory cannot be swapped\n" - " -u: RTC keeps UTC time\n" - " -U: uuid\n" - " -w: ignore unimplemented MSRs\n" - " -W: force virtio to use single-vector MSI\n" - " -x: local apic is in x2APIC mode\n" - " -Y: disable MPtable generation\n", - progname, (int)strlen(progname), ""); - - exit(code); -} - -static int -pincpu_parse(const char *opt) -{ - int vcpu, pcpu; - - if (sscanf(opt, "%d:%d", &vcpu, &pcpu) != 2) { - fprintf(stderr, "invalid format: %s\n", opt); - return (-1); - } - - if (vcpu < 0 || vcpu >= VM_MAXCPU) { - fprintf(stderr, "vcpu '%d' outside valid range from 0 to %d\n", - vcpu, VM_MAXCPU - 1); - return (-1); - } - - if (pcpu < 0 || pcpu >= CPU_SETSIZE) { - fprintf(stderr, "hostcpu '%d' outside valid range from " - "0 to %d\n", pcpu, CPU_SETSIZE - 1); - return (-1); - } - - if (vcpumap[vcpu] == NULL) { - if ((vcpumap[vcpu] = malloc(sizeof(cpuset_t))) == NULL) { - perror("malloc"); - return (-1); - } - CPU_ZERO(vcpumap[vcpu]); - } - CPU_SET(pcpu, vcpumap[vcpu]); - return (0); -} - -void -vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid, - int errcode) -{ - struct vmctx *ctx; - int error, restart_instruction; - - ctx = arg; - restart_instruction = 1; - - error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode, - restart_instruction); - assert(error == 0); -} - -void * -paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) -{ - - return (vm_map_gpa(ctx, gaddr, len)); -} - -int -fbsdrun_vmexit_on_pause(void) -{ - - return (guest_vmexit_on_pause); -} - -int -fbsdrun_vmexit_on_hlt(void) -{ - - return (guest_vmexit_on_hlt); -} - -int -fbsdrun_virtio_msix(void) -{ - - return (virtio_msix); -} - -static void * -fbsdrun_start_thread(void *param) -{ - char tname[MAXCOMLEN + 1]; - struct mt_vmm_info *mtp; - int vcpu; - - mtp = param; - vcpu = mtp->mt_vcpu; - - snprintf(tname, sizeof(tname), "vcpu %d", vcpu); - pthread_set_name_np(mtp->mt_thr, tname); - - vm_loop(mtp->mt_ctx, vcpu, vmexit[vcpu].rip); - - /* not reached */ - exit(1); - return (NULL); -} - -void -fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip) -{ - int error; - - assert(fromcpu == BSP); - - /* - * The 'newcpu' must be activated in the context of 'fromcpu'. If - * vm_activate_cpu() is delayed until newcpu's pthread starts running - * then vmm.ko is out-of-sync with bhyve and this can create a race - * with vm_suspend(). - */ - error = vm_activate_cpu(ctx, newcpu); - if (error != 0) - err(EX_OSERR, "could not activate CPU %d", newcpu); - - CPU_SET_ATOMIC(newcpu, &cpumask); - - /* - * Set up the vmexit struct to allow execution to start - * at the given RIP - */ - vmexit[newcpu].rip = rip; - vmexit[newcpu].inst_length = 0; - - mt_vmm_info[newcpu].mt_ctx = ctx; - mt_vmm_info[newcpu].mt_vcpu = newcpu; - - error = pthread_create(&mt_vmm_info[newcpu].mt_thr, NULL, - fbsdrun_start_thread, &mt_vmm_info[newcpu]); - assert(error == 0); -} - -static int -fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) -{ - - if (!CPU_ISSET(vcpu, &cpumask)) { - fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu); - exit(1); - } - - CPU_CLR_ATOMIC(vcpu, &cpumask); - return (CPU_EMPTY(&cpumask)); -} - -static int -vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu, - uint32_t eax) -{ -#if BHYVE_DEBUG - /* - * put guest-driven debug here - */ -#endif - return (VMEXIT_CONTINUE); -} - -static int -vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) -{ - int error; - int bytes, port, in, out; - int vcpu; - - vcpu = *pvcpu; - - port = vme->u.inout.port; - bytes = vme->u.inout.bytes; - in = vme->u.inout.in; - out = !in; - - /* Extra-special case of host notifications */ - if (out && port == GUEST_NIO_PORT) { - error = vmexit_handle_notify(ctx, vme, pvcpu, vme->u.inout.eax); - return (error); - } - - error = emulate_inout(ctx, vcpu, vme, strictio); - if (error) { - fprintf(stderr, "Unhandled %s%c 0x%04x at 0x%lx\n", - in ? "in" : "out", - bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), - port, vmexit->rip); - return (VMEXIT_ABORT); - } else { - return (VMEXIT_CONTINUE); - } -} - -static int -vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) -{ - uint64_t val; - uint32_t eax, edx; - int error; - - val = 0; - error = emulate_rdmsr(ctx, *pvcpu, vme->u.msr.code, &val); - if (error != 0) { - fprintf(stderr, "rdmsr to register %#x on vcpu %d\n", - vme->u.msr.code, *pvcpu); - if (strictmsr) { - vm_inject_gp(ctx, *pvcpu); - return (VMEXIT_CONTINUE); - } - } - - eax = val; - error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RAX, eax); - assert(error == 0); - - edx = val >> 32; - error = vm_set_register(ctx, *pvcpu, VM_REG_GUEST_RDX, edx); - assert(error == 0); - - return (VMEXIT_CONTINUE); -} - -static int -vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) -{ - int error; - - error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval); - if (error != 0) { - fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n", - vme->u.msr.code, vme->u.msr.wval, *pvcpu); - if (strictmsr) { - vm_inject_gp(ctx, *pvcpu); - return (VMEXIT_CONTINUE); - } - } - return (VMEXIT_CONTINUE); -} - -static int -vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) -{ - int newcpu; - int retval = VMEXIT_CONTINUE; - - newcpu = spinup_ap(ctx, *pvcpu, - vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip); - - return (retval); -} - -#define DEBUG_EPT_MISCONFIG -#ifdef DEBUG_EPT_MISCONFIG -#define EXIT_REASON_EPT_MISCONFIG 49 -#define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 -#define VMCS_IDENT(x) ((x) | 0x80000000) - -static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4]; -static int ept_misconfig_ptenum; -#endif - -static int -vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - - fprintf(stderr, "vm exit[%d]\n", *pvcpu); - fprintf(stderr, "\treason\t\tVMX\n"); - fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); - fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); - fprintf(stderr, "\tstatus\t\t%d\n", vmexit->u.vmx.status); - fprintf(stderr, "\texit_reason\t%u\n", vmexit->u.vmx.exit_reason); - fprintf(stderr, "\tqualification\t0x%016lx\n", - vmexit->u.vmx.exit_qualification); - fprintf(stderr, "\tinst_type\t\t%d\n", vmexit->u.vmx.inst_type); - fprintf(stderr, "\tinst_error\t\t%d\n", vmexit->u.vmx.inst_error); -#ifdef DEBUG_EPT_MISCONFIG - if (vmexit->u.vmx.exit_reason == EXIT_REASON_EPT_MISCONFIG) { - vm_get_register(ctx, *pvcpu, - VMCS_IDENT(VMCS_GUEST_PHYSICAL_ADDRESS), - &ept_misconfig_gpa); - vm_get_gpa_pmap(ctx, ept_misconfig_gpa, ept_misconfig_pte, - &ept_misconfig_ptenum); - fprintf(stderr, "\tEPT misconfiguration:\n"); - fprintf(stderr, "\t\tGPA: %#lx\n", ept_misconfig_gpa); - fprintf(stderr, "\t\tPTE(%d): %#lx %#lx %#lx %#lx\n", - ept_misconfig_ptenum, ept_misconfig_pte[0], - ept_misconfig_pte[1], ept_misconfig_pte[2], - ept_misconfig_pte[3]); - } -#endif /* DEBUG_EPT_MISCONFIG */ - return (VMEXIT_ABORT); -} - -static int -vmexit_svm(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - - fprintf(stderr, "vm exit[%d]\n", *pvcpu); - fprintf(stderr, "\treason\t\tSVM\n"); - fprintf(stderr, "\trip\t\t0x%016lx\n", vmexit->rip); - fprintf(stderr, "\tinst_length\t%d\n", vmexit->inst_length); - fprintf(stderr, "\texitcode\t%#lx\n", vmexit->u.svm.exitcode); - fprintf(stderr, "\texitinfo1\t%#lx\n", vmexit->u.svm.exitinfo1); - fprintf(stderr, "\texitinfo2\t%#lx\n", vmexit->u.svm.exitinfo2); - return (VMEXIT_ABORT); -} - -static int -vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - - assert(vmexit->inst_length == 0); - - stats.vmexit_bogus++; - - return (VMEXIT_CONTINUE); -} - -static int -vmexit_reqidle(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - - assert(vmexit->inst_length == 0); - - stats.vmexit_reqidle++; - - return (VMEXIT_CONTINUE); -} - -static int -vmexit_hlt(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - - stats.vmexit_hlt++; - - /* - * Just continue execution with the next instruction. We use - * the HLT VM exit as a way to be friendly with the host - * scheduler. - */ - return (VMEXIT_CONTINUE); -} - -static int -vmexit_pause(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - - stats.vmexit_pause++; - - return (VMEXIT_CONTINUE); -} - -static int -vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - - assert(vmexit->inst_length == 0); - - stats.vmexit_mtrap++; - - return (VMEXIT_CONTINUE); -} - -static int -vmexit_inst_emul(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - int err, i; - struct vie *vie; - - stats.vmexit_inst_emul++; - - vie = &vmexit->u.inst_emul.vie; - err = emulate_mem(ctx, *pvcpu, vmexit->u.inst_emul.gpa, - vie, &vmexit->u.inst_emul.paging); - - if (err) { - if (err == ESRCH) { - fprintf(stderr, "Unhandled memory access to 0x%lx\n", - vmexit->u.inst_emul.gpa); - } - - fprintf(stderr, "Failed to emulate instruction ["); - for (i = 0; i < vie->num_valid; i++) { - fprintf(stderr, "0x%02x%s", vie->inst[i], - i != (vie->num_valid - 1) ? " " : ""); - } - fprintf(stderr, "] at 0x%lx\n", vmexit->rip); - return (VMEXIT_ABORT); - } - - return (VMEXIT_CONTINUE); -} - -static pthread_mutex_t resetcpu_mtx = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t resetcpu_cond = PTHREAD_COND_INITIALIZER; - -static int -vmexit_suspend(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - enum vm_suspend_how how; - - how = vmexit->u.suspended.how; - - fbsdrun_deletecpu(ctx, *pvcpu); - - if (*pvcpu != BSP) { - pthread_mutex_lock(&resetcpu_mtx); - pthread_cond_signal(&resetcpu_cond); - pthread_mutex_unlock(&resetcpu_mtx); - pthread_exit(NULL); - } - - pthread_mutex_lock(&resetcpu_mtx); - while (!CPU_EMPTY(&cpumask)) { - pthread_cond_wait(&resetcpu_cond, &resetcpu_mtx); - } - pthread_mutex_unlock(&resetcpu_mtx); - - switch (how) { - case VM_SUSPEND_RESET: - exit(0); - case VM_SUSPEND_POWEROFF: - exit(1); - case VM_SUSPEND_HALT: - exit(2); - case VM_SUSPEND_TRIPLEFAULT: - exit(3); - default: - fprintf(stderr, "vmexit_suspend: invalid reason %d\n", how); - exit(100); - } - return (0); /* NOTREACHED */ -} - -static vmexit_handler_t handler[VM_EXITCODE_MAX] = { - [VM_EXITCODE_INOUT] = vmexit_inout, - [VM_EXITCODE_INOUT_STR] = vmexit_inout, - [VM_EXITCODE_VMX] = vmexit_vmx, - [VM_EXITCODE_SVM] = vmexit_svm, - [VM_EXITCODE_BOGUS] = vmexit_bogus, - [VM_EXITCODE_REQIDLE] = vmexit_reqidle, - [VM_EXITCODE_RDMSR] = vmexit_rdmsr, - [VM_EXITCODE_WRMSR] = vmexit_wrmsr, - [VM_EXITCODE_MTRAP] = vmexit_mtrap, - [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, - [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, - [VM_EXITCODE_SUSPENDED] = vmexit_suspend, - [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch, -}; - -static void -vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip) -{ - int error, rc; - enum vm_exitcode exitcode; - cpuset_t active_cpus; - - if (vcpumap[vcpu] != NULL) { - error = pthread_setaffinity_np(pthread_self(), - sizeof(cpuset_t), vcpumap[vcpu]); - assert(error == 0); - } - - error = vm_active_cpus(ctx, &active_cpus); - assert(CPU_ISSET(vcpu, &active_cpus)); - - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip); - assert(error == 0); - - while (1) { - error = vm_run(ctx, vcpu, &vmexit[vcpu]); - if (error != 0) - break; - - exitcode = vmexit[vcpu].exitcode; - if (exitcode >= VM_EXITCODE_MAX || handler[exitcode] == NULL) { - fprintf(stderr, "vm_loop: unexpected exitcode 0x%x\n", - exitcode); - exit(1); - } - - rc = (*handler[exitcode])(ctx, &vmexit[vcpu], &vcpu); - - switch (rc) { - case VMEXIT_CONTINUE: - break; - case VMEXIT_ABORT: - abort(); - default: - exit(1); - } - } - fprintf(stderr, "vm_run error %d, errno %d\n", error, errno); -} - -static int -num_vcpus_allowed(struct vmctx *ctx) -{ - int tmp, error; - - error = vm_get_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, &tmp); - - /* - * The guest is allowed to spinup more than one processor only if the - * UNRESTRICTED_GUEST capability is available. - */ - if (error == 0) - return (VM_MAXCPU); - else - return (1); -} - -void -fbsdrun_set_capabilities(struct vmctx *ctx, int cpu) -{ - int err, tmp; - - if (fbsdrun_vmexit_on_hlt()) { - err = vm_get_capability(ctx, cpu, VM_CAP_HALT_EXIT, &tmp); - if (err < 0) { - fprintf(stderr, "VM exit on HLT not supported\n"); - exit(1); - } - vm_set_capability(ctx, cpu, VM_CAP_HALT_EXIT, 1); - if (cpu == BSP) - handler[VM_EXITCODE_HLT] = vmexit_hlt; - } - - if (fbsdrun_vmexit_on_pause()) { - /* - * pause exit support required for this mode - */ - err = vm_get_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, &tmp); - if (err < 0) { - fprintf(stderr, - "SMP mux requested, no pause support\n"); - exit(1); - } - vm_set_capability(ctx, cpu, VM_CAP_PAUSE_EXIT, 1); - if (cpu == BSP) - handler[VM_EXITCODE_PAUSE] = vmexit_pause; - } - - if (x2apic_mode) - err = vm_set_x2apic_state(ctx, cpu, X2APIC_ENABLED); - else - err = vm_set_x2apic_state(ctx, cpu, X2APIC_DISABLED); - - if (err) { - fprintf(stderr, "Unable to set x2apic state (%d)\n", err); - exit(1); - } - - vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); -} - -static struct vmctx * -do_open(const char *vmname) -{ - struct vmctx *ctx; - int error; - bool reinit, romboot; - - reinit = romboot = false; - - if (lpc_bootrom()) - romboot = true; - - error = vm_create(vmname); - if (error) { - if (errno == EEXIST) { - if (romboot) { - reinit = true; - } else { - /* - * The virtual machine has been setup by the - * userspace bootloader. - */ - } - } else { - perror("vm_create"); - exit(1); - } - } else { - if (!romboot) { - /* - * If the virtual machine was just created then a - * bootrom must be configured to boot it. - */ - fprintf(stderr, "virtual machine cannot be booted\n"); - exit(1); - } - } - - ctx = vm_open(vmname); - if (ctx == NULL) { - perror("vm_open"); - exit(1); - } - - if (reinit) { - error = vm_reinit(ctx); - if (error) { - perror("vm_reinit"); - exit(1); - } - } - return (ctx); -} - -int -main(int argc, char *argv[]) -{ - int c, error, gdb_port, err, bvmcons; - int max_vcpus, mptgen, memflags; - int rtc_localtime; - struct vmctx *ctx; - uint64_t rip; - size_t memsize; - char *optstr; - - bvmcons = 0; - progname = basename(argv[0]); - gdb_port = 0; - guest_ncpus = 1; - memsize = 256 * MB; - mptgen = 1; - rtc_localtime = 1; - memflags = 0; - - optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:"; - while ((c = getopt(argc, argv, optstr)) != -1) { - switch (c) { - case 'a': - x2apic_mode = 0; - break; - case 'A': - acpi = 1; - break; - case 'b': - bvmcons = 1; - break; - case 'p': - if (pincpu_parse(optarg) != 0) { - errx(EX_USAGE, "invalid vcpu pinning " - "configuration '%s'", optarg); - } - break; - case 'c': - guest_ncpus = atoi(optarg); - break; - case 'C': - memflags |= VM_MEM_F_INCORE; - break; - case 'g': - gdb_port = atoi(optarg); - break; - case 'l': - if (lpc_device_parse(optarg) != 0) { - errx(EX_USAGE, "invalid lpc device " - "configuration '%s'", optarg); - } - break; - case 's': - if (pci_parse_slot(optarg) != 0) - exit(1); - else - break; - case 'S': - memflags |= VM_MEM_F_WIRED; - break; - case 'm': - error = vm_parse_memsize(optarg, &memsize); - if (error) - errx(EX_USAGE, "invalid memsize '%s'", optarg); - break; - case 'H': - guest_vmexit_on_hlt = 1; - break; - case 'I': - /* - * The "-I" option was used to add an ioapic to the - * virtual machine. - * - * An ioapic is now provided unconditionally for each - * virtual machine and this option is now deprecated. - */ - break; - case 'P': - guest_vmexit_on_pause = 1; - break; - case 'e': - strictio = 1; - break; - case 'u': - rtc_localtime = 0; - break; - case 'U': - guest_uuid_str = optarg; - break; - case 'w': - strictmsr = 0; - break; - case 'W': - virtio_msix = 0; - break; - case 'x': - x2apic_mode = 1; - break; - case 'Y': - mptgen = 0; - break; - case 'h': - usage(0); - default: - usage(1); - } - } - argc -= optind; - argv += optind; - - if (argc != 1) - usage(1); - - vmname = argv[0]; - ctx = do_open(vmname); - - if (guest_ncpus < 1) { - fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); - exit(1); - } - - max_vcpus = num_vcpus_allowed(ctx); - if (guest_ncpus > max_vcpus) { - fprintf(stderr, "%d vCPUs requested but only %d available\n", - guest_ncpus, max_vcpus); - exit(1); - } - - fbsdrun_set_capabilities(ctx, BSP); - - vm_set_memflags(ctx, memflags); - err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); - if (err) { - fprintf(stderr, "Unable to setup memory (%d)\n", errno); - exit(1); - } - - error = init_msr(); - if (error) { - fprintf(stderr, "init_msr error %d", error); - exit(1); - } - - init_mem(); - init_inout(); - pci_irq_init(ctx); - ioapic_init(ctx); - - rtc_init(ctx, rtc_localtime); - sci_init(ctx); - - /* - * Exit if a device emulation finds an error in it's initilization - */ - if (init_pci(ctx) != 0) - exit(1); - - if (gdb_port != 0) - init_dbgport(gdb_port); - - if (bvmcons) - init_bvmcons(); - - if (lpc_bootrom()) { - if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { - fprintf(stderr, "ROM boot failed: unrestricted guest " - "capability not available\n"); - exit(1); - } - error = vcpu_reset(ctx, BSP); - assert(error == 0); - } - - error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); - assert(error == 0); - - /* - * build the guest tables, MP etc. - */ - if (mptgen) { - error = mptable_build(ctx, guest_ncpus); - if (error) - exit(1); - } - - error = smbios_build(ctx); - assert(error == 0); - - if (acpi) { - error = acpi_build(ctx, guest_ncpus); - assert(error == 0); - } - - if (lpc_bootrom()) - fwctl_init(); - - /* - * Change the proc title to include the VM name. - */ - setproctitle("%s", vmname); - - /* - * Add CPU 0 - */ - fbsdrun_addcpu(ctx, BSP, BSP, rip); - - /* - * Head off to the main event dispatch loop - */ - mevent_dispatch(); - - exit(1); -} diff --git a/usr.sbin/bhyve/bhyverun.h b/usr.sbin/bhyve/bhyverun.h deleted file mode 100644 index c51bf48..0000000 --- a/usr.sbin/bhyve/bhyverun.h +++ /dev/null @@ -1,55 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _FBSDRUN_H_ -#define _FBSDRUN_H_ - -#ifndef CTASSERT /* Allow lint to override */ -#define CTASSERT(x) _CTASSERT(x, __LINE__) -#define _CTASSERT(x, y) __CTASSERT(x, y) -#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1] -#endif - -#define VMEXIT_CONTINUE (0) -#define VMEXIT_ABORT (-1) - -struct vmctx; -extern int guest_ncpus; -extern char *guest_uuid_str; -extern char *vmname; - -void *paddr_guest2host(struct vmctx *ctx, uintptr_t addr, size_t len); - -void fbsdrun_set_capabilities(struct vmctx *ctx, int cpu); -void fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip); -int fbsdrun_muxed(void); -int fbsdrun_vmexit_on_hlt(void); -int fbsdrun_vmexit_on_pause(void); -int fbsdrun_disable_x2apic(void); -int fbsdrun_virtio_msix(void); -#endif diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c deleted file mode 100644 index b6cb409..0000000 --- a/usr.sbin/bhyve/block_if.c +++ /dev/null @@ -1,820 +0,0 @@ -/*- - * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/queue.h> -#include <sys/errno.h> -#include <sys/stat.h> -#include <sys/ioctl.h> -#include <sys/disk.h> - -#include <assert.h> -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <pthread.h> -#include <pthread_np.h> -#include <signal.h> -#include <unistd.h> - -#include <machine/atomic.h> - -#include "bhyverun.h" -#include "mevent.h" -#include "block_if.h" - -#define BLOCKIF_SIG 0xb109b109 - -#define BLOCKIF_NUMTHR 8 -#define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR) - -enum blockop { - BOP_READ, - BOP_WRITE, - BOP_FLUSH, - BOP_DELETE -}; - -enum blockstat { - BST_FREE, - BST_BLOCK, - BST_PEND, - BST_BUSY, - BST_DONE -}; - -struct blockif_elem { - TAILQ_ENTRY(blockif_elem) be_link; - struct blockif_req *be_req; - enum blockop be_op; - enum blockstat be_status; - pthread_t be_tid; - off_t be_block; -}; - -struct blockif_ctxt { - int bc_magic; - int bc_fd; - int bc_ischr; - int bc_isgeom; - int bc_candelete; - int bc_rdonly; - off_t bc_size; - int bc_sectsz; - int bc_psectsz; - int bc_psectoff; - int bc_closing; - pthread_t bc_btid[BLOCKIF_NUMTHR]; - pthread_mutex_t bc_mtx; - pthread_cond_t bc_cond; - - /* Request elements and free/pending/busy queues */ - TAILQ_HEAD(, blockif_elem) bc_freeq; - TAILQ_HEAD(, blockif_elem) bc_pendq; - TAILQ_HEAD(, blockif_elem) bc_busyq; - struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; -}; - -static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; - -struct blockif_sig_elem { - pthread_mutex_t bse_mtx; - pthread_cond_t bse_cond; - int bse_pending; - struct blockif_sig_elem *bse_next; -}; - -static struct blockif_sig_elem *blockif_bse_head; - -static int -blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, - enum blockop op) -{ - struct blockif_elem *be, *tbe; - off_t off; - int i; - - be = TAILQ_FIRST(&bc->bc_freeq); - assert(be != NULL); - assert(be->be_status == BST_FREE); - TAILQ_REMOVE(&bc->bc_freeq, be, be_link); - be->be_req = breq; - be->be_op = op; - switch (op) { - case BOP_READ: - case BOP_WRITE: - case BOP_DELETE: - off = breq->br_offset; - for (i = 0; i < breq->br_iovcnt; i++) - off += breq->br_iov[i].iov_len; - break; - default: - off = OFF_MAX; - } - be->be_block = off; - TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { - if (tbe->be_block == breq->br_offset) - break; - } - if (tbe == NULL) { - TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { - if (tbe->be_block == breq->br_offset) - break; - } - } - if (tbe == NULL) - be->be_status = BST_PEND; - else - be->be_status = BST_BLOCK; - TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); - return (be->be_status == BST_PEND); -} - -static int -blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) -{ - struct blockif_elem *be; - - TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { - if (be->be_status == BST_PEND) - break; - assert(be->be_status == BST_BLOCK); - } - if (be == NULL) - return (0); - TAILQ_REMOVE(&bc->bc_pendq, be, be_link); - be->be_status = BST_BUSY; - be->be_tid = t; - TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); - *bep = be; - return (1); -} - -static void -blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) -{ - struct blockif_elem *tbe; - - if (be->be_status == BST_DONE || be->be_status == BST_BUSY) - TAILQ_REMOVE(&bc->bc_busyq, be, be_link); - else - TAILQ_REMOVE(&bc->bc_pendq, be, be_link); - TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { - if (tbe->be_req->br_offset == be->be_block) - tbe->be_status = BST_PEND; - } - be->be_tid = 0; - be->be_status = BST_FREE; - be->be_req = NULL; - TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); -} - -static void -blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) -{ - struct blockif_req *br; - off_t arg[2]; - ssize_t clen, len, off, boff, voff; - int i, err; - - br = be->be_req; - if (br->br_iovcnt <= 1) - buf = NULL; - err = 0; - switch (be->be_op) { - case BOP_READ: - if (buf == NULL) { - if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, - br->br_offset)) < 0) - err = errno; - else - br->br_resid -= len; - break; - } - i = 0; - off = voff = 0; - while (br->br_resid > 0) { - len = MIN(br->br_resid, MAXPHYS); - if (pread(bc->bc_fd, buf, len, br->br_offset + - off) < 0) { - err = errno; - break; - } - boff = 0; - do { - clen = MIN(len - boff, br->br_iov[i].iov_len - - voff); - memcpy(br->br_iov[i].iov_base + voff, - buf + boff, clen); - if (clen < br->br_iov[i].iov_len - voff) - voff += clen; - else { - i++; - voff = 0; - } - boff += clen; - } while (boff < len); - off += len; - br->br_resid -= len; - } - break; - case BOP_WRITE: - if (bc->bc_rdonly) { - err = EROFS; - break; - } - if (buf == NULL) { - if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, - br->br_offset)) < 0) - err = errno; - else - br->br_resid -= len; - break; - } - i = 0; - off = voff = 0; - while (br->br_resid > 0) { - len = MIN(br->br_resid, MAXPHYS); - boff = 0; - do { - clen = MIN(len - boff, br->br_iov[i].iov_len - - voff); - memcpy(buf + boff, - br->br_iov[i].iov_base + voff, clen); - if (clen < br->br_iov[i].iov_len - voff) - voff += clen; - else { - i++; - voff = 0; - } - boff += clen; - } while (boff < len); - if (pwrite(bc->bc_fd, buf, len, br->br_offset + - off) < 0) { - err = errno; - break; - } - off += len; - br->br_resid -= len; - } - break; - case BOP_FLUSH: - if (bc->bc_ischr) { - if (ioctl(bc->bc_fd, DIOCGFLUSH)) - err = errno; - } else if (fsync(bc->bc_fd)) - err = errno; - break; - case BOP_DELETE: - if (!bc->bc_candelete) - err = EOPNOTSUPP; - else if (bc->bc_rdonly) - err = EROFS; - else if (bc->bc_ischr) { - arg[0] = br->br_offset; - arg[1] = br->br_resid; - if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) - err = errno; - else - br->br_resid = 0; - } else - err = EOPNOTSUPP; - break; - default: - err = EINVAL; - break; - } - - be->be_status = BST_DONE; - - (*br->br_callback)(br, err); -} - -static void * -blockif_thr(void *arg) -{ - struct blockif_ctxt *bc; - struct blockif_elem *be; - pthread_t t; - uint8_t *buf; - - bc = arg; - if (bc->bc_isgeom) - buf = malloc(MAXPHYS); - else - buf = NULL; - t = pthread_self(); - - pthread_mutex_lock(&bc->bc_mtx); - for (;;) { - while (blockif_dequeue(bc, t, &be)) { - pthread_mutex_unlock(&bc->bc_mtx); - blockif_proc(bc, be, buf); - pthread_mutex_lock(&bc->bc_mtx); - blockif_complete(bc, be); - } - /* Check ctxt status here to see if exit requested */ - if (bc->bc_closing) - break; - pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); - } - pthread_mutex_unlock(&bc->bc_mtx); - - if (buf) - free(buf); - pthread_exit(NULL); - return (NULL); -} - -static void -blockif_sigcont_handler(int signal, enum ev_type type, void *arg) -{ - struct blockif_sig_elem *bse; - - for (;;) { - /* - * Process the entire list even if not intended for - * this thread. - */ - do { - bse = blockif_bse_head; - if (bse == NULL) - return; - } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, - (uintptr_t)bse, - (uintptr_t)bse->bse_next)); - - pthread_mutex_lock(&bse->bse_mtx); - bse->bse_pending = 0; - pthread_cond_signal(&bse->bse_cond); - pthread_mutex_unlock(&bse->bse_mtx); - } -} - -static void -blockif_init(void) -{ - mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); - (void) signal(SIGCONT, SIG_IGN); -} - -struct blockif_ctxt * -blockif_open(const char *optstr, const char *ident) -{ - char tname[MAXCOMLEN + 1]; - char name[MAXPATHLEN]; - char *nopt, *xopts, *cp; - struct blockif_ctxt *bc; - struct stat sbuf; - struct diocgattr_arg arg; - off_t size, psectsz, psectoff; - int extra, fd, i, sectsz; - int nocache, sync, ro, candelete, geom, ssopt, pssopt; - - pthread_once(&blockif_once, blockif_init); - - fd = -1; - ssopt = 0; - nocache = 0; - sync = 0; - ro = 0; - - /* - * The first element in the optstring is always a pathname. - * Optional elements follow - */ - nopt = xopts = strdup(optstr); - while (xopts != NULL) { - cp = strsep(&xopts, ","); - if (cp == nopt) /* file or device pathname */ - continue; - else if (!strcmp(cp, "nocache")) - nocache = 1; - else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) - sync = 1; - else if (!strcmp(cp, "ro")) - ro = 1; - else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) - ; - else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) - pssopt = ssopt; - else { - fprintf(stderr, "Invalid device option \"%s\"\n", cp); - goto err; - } - } - - extra = 0; - if (nocache) - extra |= O_DIRECT; - if (sync) - extra |= O_SYNC; - - fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); - if (fd < 0 && !ro) { - /* Attempt a r/w fail with a r/o open */ - fd = open(nopt, O_RDONLY | extra); - ro = 1; - } - - if (fd < 0) { - perror("Could not open backing file"); - goto err; - } - - if (fstat(fd, &sbuf) < 0) { - perror("Could not stat backing file"); - goto err; - } - - /* - * Deal with raw devices - */ - size = sbuf.st_size; - sectsz = DEV_BSIZE; - psectsz = psectoff = 0; - candelete = geom = 0; - if (S_ISCHR(sbuf.st_mode)) { - if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || - ioctl(fd, DIOCGSECTORSIZE, §sz)) { - perror("Could not fetch dev blk/sector size"); - goto err; - } - assert(size != 0); - assert(sectsz != 0); - if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) - ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); - strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); - arg.len = sizeof(arg.value.i); - if (ioctl(fd, DIOCGATTR, &arg) == 0) - candelete = arg.value.i; - if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) - geom = 1; - } else - psectsz = sbuf.st_blksize; - - if (ssopt != 0) { - if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || - ssopt > pssopt) { - fprintf(stderr, "Invalid sector size %d/%d\n", - ssopt, pssopt); - goto err; - } - - /* - * Some backend drivers (e.g. cd0, ada0) require that the I/O - * size be a multiple of the device's sector size. - * - * Validate that the emulated sector size complies with this - * requirement. - */ - if (S_ISCHR(sbuf.st_mode)) { - if (ssopt < sectsz || (ssopt % sectsz) != 0) { - fprintf(stderr, "Sector size %d incompatible " - "with underlying device sector size %d\n", - ssopt, sectsz); - goto err; - } - } - - sectsz = ssopt; - psectsz = pssopt; - psectoff = 0; - } - - bc = calloc(1, sizeof(struct blockif_ctxt)); - if (bc == NULL) { - perror("calloc"); - goto err; - } - - bc->bc_magic = BLOCKIF_SIG; - bc->bc_fd = fd; - bc->bc_ischr = S_ISCHR(sbuf.st_mode); - bc->bc_isgeom = geom; - bc->bc_candelete = candelete; - bc->bc_rdonly = ro; - bc->bc_size = size; - bc->bc_sectsz = sectsz; - bc->bc_psectsz = psectsz; - bc->bc_psectoff = psectoff; - pthread_mutex_init(&bc->bc_mtx, NULL); - pthread_cond_init(&bc->bc_cond, NULL); - TAILQ_INIT(&bc->bc_freeq); - TAILQ_INIT(&bc->bc_pendq); - TAILQ_INIT(&bc->bc_busyq); - for (i = 0; i < BLOCKIF_MAXREQ; i++) { - bc->bc_reqs[i].be_status = BST_FREE; - TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); - } - - for (i = 0; i < BLOCKIF_NUMTHR; i++) { - pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); - snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); - pthread_set_name_np(bc->bc_btid[i], tname); - } - - return (bc); -err: - if (fd >= 0) - close(fd); - return (NULL); -} - -static int -blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, - enum blockop op) -{ - int err; - - err = 0; - - pthread_mutex_lock(&bc->bc_mtx); - if (!TAILQ_EMPTY(&bc->bc_freeq)) { - /* - * Enqueue and inform the block i/o thread - * that there is work available - */ - if (blockif_enqueue(bc, breq, op)) - pthread_cond_signal(&bc->bc_cond); - } else { - /* - * Callers are not allowed to enqueue more than - * the specified blockif queue limit. Return an - * error to indicate that the queue length has been - * exceeded. - */ - err = E2BIG; - } - pthread_mutex_unlock(&bc->bc_mtx); - - return (err); -} - -int -blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) -{ - - assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_READ)); -} - -int -blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) -{ - - assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_WRITE)); -} - -int -blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) -{ - - assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_FLUSH)); -} - -int -blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) -{ - - assert(bc->bc_magic == BLOCKIF_SIG); - return (blockif_request(bc, breq, BOP_DELETE)); -} - -int -blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) -{ - struct blockif_elem *be; - - assert(bc->bc_magic == BLOCKIF_SIG); - - pthread_mutex_lock(&bc->bc_mtx); - /* - * Check pending requests. - */ - TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { - if (be->be_req == breq) - break; - } - if (be != NULL) { - /* - * Found it. - */ - blockif_complete(bc, be); - pthread_mutex_unlock(&bc->bc_mtx); - - return (0); - } - - /* - * Check in-flight requests. - */ - TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { - if (be->be_req == breq) - break; - } - if (be == NULL) { - /* - * Didn't find it. - */ - pthread_mutex_unlock(&bc->bc_mtx); - return (EINVAL); - } - - /* - * Interrupt the processing thread to force it return - * prematurely via it's normal callback path. - */ - while (be->be_status == BST_BUSY) { - struct blockif_sig_elem bse, *old_head; - - pthread_mutex_init(&bse.bse_mtx, NULL); - pthread_cond_init(&bse.bse_cond, NULL); - - bse.bse_pending = 1; - - do { - old_head = blockif_bse_head; - bse.bse_next = old_head; - } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, - (uintptr_t)old_head, - (uintptr_t)&bse)); - - pthread_kill(be->be_tid, SIGCONT); - - pthread_mutex_lock(&bse.bse_mtx); - while (bse.bse_pending) - pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); - pthread_mutex_unlock(&bse.bse_mtx); - } - - pthread_mutex_unlock(&bc->bc_mtx); - - /* - * The processing thread has been interrupted. Since it's not - * clear if the callback has been invoked yet, return EBUSY. - */ - return (EBUSY); -} - -int -blockif_close(struct blockif_ctxt *bc) -{ - void *jval; - int i; - - assert(bc->bc_magic == BLOCKIF_SIG); - - /* - * Stop the block i/o thread - */ - pthread_mutex_lock(&bc->bc_mtx); - bc->bc_closing = 1; - pthread_mutex_unlock(&bc->bc_mtx); - pthread_cond_broadcast(&bc->bc_cond); - for (i = 0; i < BLOCKIF_NUMTHR; i++) - pthread_join(bc->bc_btid[i], &jval); - - /* XXX Cancel queued i/o's ??? */ - - /* - * Release resources - */ - bc->bc_magic = 0; - close(bc->bc_fd); - free(bc); - - return (0); -} - -/* - * Return virtual C/H/S values for a given block. Use the algorithm - * outlined in the VHD specification to calculate values. - */ -void -blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) -{ - off_t sectors; /* total sectors of the block dev */ - off_t hcyl; /* cylinders times heads */ - uint16_t secpt; /* sectors per track */ - uint8_t heads; - - assert(bc->bc_magic == BLOCKIF_SIG); - - sectors = bc->bc_size / bc->bc_sectsz; - - /* Clamp the size to the largest possible with CHS */ - if (sectors > 65535UL*16*255) - sectors = 65535UL*16*255; - - if (sectors >= 65536UL*16*63) { - secpt = 255; - heads = 16; - hcyl = sectors / secpt; - } else { - secpt = 17; - hcyl = sectors / secpt; - heads = (hcyl + 1023) / 1024; - - if (heads < 4) - heads = 4; - - if (hcyl >= (heads * 1024) || heads > 16) { - secpt = 31; - heads = 16; - hcyl = sectors / secpt; - } - if (hcyl >= (heads * 1024)) { - secpt = 63; - heads = 16; - hcyl = sectors / secpt; - } - } - - *c = hcyl / heads; - *h = heads; - *s = secpt; -} - -/* - * Accessors - */ -off_t -blockif_size(struct blockif_ctxt *bc) -{ - - assert(bc->bc_magic == BLOCKIF_SIG); - return (bc->bc_size); -} - -int -blockif_sectsz(struct blockif_ctxt *bc) -{ - - assert(bc->bc_magic == BLOCKIF_SIG); - return (bc->bc_sectsz); -} - -void -blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) -{ - - assert(bc->bc_magic == BLOCKIF_SIG); - *size = bc->bc_psectsz; - *off = bc->bc_psectoff; -} - -int -blockif_queuesz(struct blockif_ctxt *bc) -{ - - assert(bc->bc_magic == BLOCKIF_SIG); - return (BLOCKIF_MAXREQ - 1); -} - -int -blockif_is_ro(struct blockif_ctxt *bc) -{ - - assert(bc->bc_magic == BLOCKIF_SIG); - return (bc->bc_rdonly); -} - -int -blockif_candelete(struct blockif_ctxt *bc) -{ - - assert(bc->bc_magic == BLOCKIF_SIG); - return (bc->bc_candelete); -} diff --git a/usr.sbin/bhyve/block_if.h b/usr.sbin/bhyve/block_if.h deleted file mode 100644 index 8e63407..0000000 --- a/usr.sbin/bhyve/block_if.h +++ /dev/null @@ -1,70 +0,0 @@ -/*- - * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -/* - * The block API to be used by bhyve block-device emulations. The routines - * are thread safe, with no assumptions about the context of the completion - * callback - it may occur in the caller's context, or asynchronously in - * another thread. - */ - -#ifndef _BLOCK_IF_H_ -#define _BLOCK_IF_H_ - -#include <sys/uio.h> -#include <sys/unistd.h> - -#define BLOCKIF_IOV_MAX 33 /* not practical to be IOV_MAX */ - -struct blockif_req { - struct iovec br_iov[BLOCKIF_IOV_MAX]; - int br_iovcnt; - off_t br_offset; - ssize_t br_resid; - void (*br_callback)(struct blockif_req *req, int err); - void *br_param; -}; - -struct blockif_ctxt; -struct blockif_ctxt *blockif_open(const char *optstr, const char *ident); -off_t blockif_size(struct blockif_ctxt *bc); -void blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, - uint8_t *s); -int blockif_sectsz(struct blockif_ctxt *bc); -void blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off); -int blockif_queuesz(struct blockif_ctxt *bc); -int blockif_is_ro(struct blockif_ctxt *bc); -int blockif_candelete(struct blockif_ctxt *bc); -int blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq); -int blockif_close(struct blockif_ctxt *bc); - -#endif /* _BLOCK_IF_H_ */ diff --git a/usr.sbin/bhyve/bootrom.c b/usr.sbin/bhyve/bootrom.c deleted file mode 100644 index 5e4e0e9..0000000 --- a/usr.sbin/bhyve/bootrom.c +++ /dev/null @@ -1,111 +0,0 @@ -/*- - * Copyright (c) 2015 Neel Natu <neel@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/param.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> -#include <sys/mman.h> -#include <sys/stat.h> - -#include <machine/vmm.h> - -#include <errno.h> -#include <fcntl.h> -#include <stdio.h> -#include <string.h> -#include <unistd.h> -#include <stdbool.h> - -#include <vmmapi.h> -#include "bhyverun.h" -#include "bootrom.h" - -#define MAX_BOOTROM_SIZE (16 * 1024 * 1024) /* 16 MB */ - -int -bootrom_init(struct vmctx *ctx, const char *romfile) -{ - struct stat sbuf; - vm_paddr_t gpa; - ssize_t rlen; - char *ptr; - int fd, i, rv, prot; - - rv = -1; - fd = open(romfile, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "Error opening bootrom \"%s\": %s\n", - romfile, strerror(errno)); - goto done; - } - - if (fstat(fd, &sbuf) < 0) { - fprintf(stderr, "Could not fstat bootrom file \"%s\": %s\n", - romfile, strerror(errno)); - goto done; - } - - /* - * Limit bootrom size to 16MB so it doesn't encroach into reserved - * MMIO space (e.g. APIC, HPET, MSI). - */ - if (sbuf.st_size > MAX_BOOTROM_SIZE || sbuf.st_size < PAGE_SIZE) { - fprintf(stderr, "Invalid bootrom size %ld\n", sbuf.st_size); - goto done; - } - - if (sbuf.st_size & PAGE_MASK) { - fprintf(stderr, "Bootrom size %ld is not a multiple of the " - "page size\n", sbuf.st_size); - goto done; - } - - ptr = vm_create_devmem(ctx, VM_BOOTROM, "bootrom", sbuf.st_size); - if (ptr == MAP_FAILED) - goto done; - - /* Map the bootrom into the guest address space */ - prot = PROT_READ | PROT_EXEC; - gpa = (1ULL << 32) - sbuf.st_size; - if (vm_mmap_memseg(ctx, gpa, VM_BOOTROM, 0, sbuf.st_size, prot) != 0) - goto done; - - /* Read 'romfile' into the guest address space */ - for (i = 0; i < sbuf.st_size / PAGE_SIZE; i++) { - rlen = read(fd, ptr + i * PAGE_SIZE, PAGE_SIZE); - if (rlen != PAGE_SIZE) { - fprintf(stderr, "Incomplete read of page %d of bootrom " - "file %s: %ld bytes\n", i, romfile, rlen); - goto done; - } - } - rv = 0; -done: - if (fd >= 0) - close(fd); - return (rv); -} diff --git a/usr.sbin/bhyve/bootrom.h b/usr.sbin/bhyve/bootrom.h deleted file mode 100644 index af150d3..0000000 --- a/usr.sbin/bhyve/bootrom.h +++ /dev/null @@ -1,38 +0,0 @@ -/*- - * Copyright (c) 2015 Neel Natu <neel@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _BOOTROM_H_ -#define _BOOTROM_H_ - -#include <stdbool.h> - -struct vmctx; - -int bootrom_init(struct vmctx *ctx, const char *romfile); - -#endif diff --git a/usr.sbin/bhyve/consport.c b/usr.sbin/bhyve/consport.c deleted file mode 100644 index 4074e95..0000000 --- a/usr.sbin/bhyve/consport.c +++ /dev/null @@ -1,153 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> -#include <sys/select.h> - -#include <stdio.h> -#include <stdlib.h> -#include <termios.h> -#include <unistd.h> -#include <stdbool.h> - -#include "inout.h" -#include "pci_lpc.h" - -#define BVM_CONSOLE_PORT 0x220 -#define BVM_CONS_SIG ('b' << 8 | 'v') - -static struct termios tio_orig, tio_new; - -static void -ttyclose(void) -{ - tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig); -} - -static void -ttyopen(void) -{ - tcgetattr(STDIN_FILENO, &tio_orig); - - cfmakeraw(&tio_new); - tcsetattr(STDIN_FILENO, TCSANOW, &tio_new); - - atexit(ttyclose); -} - -static bool -tty_char_available(void) -{ - fd_set rfds; - struct timeval tv; - - FD_ZERO(&rfds); - FD_SET(STDIN_FILENO, &rfds); - tv.tv_sec = 0; - tv.tv_usec = 0; - if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) { - return (true); - } else { - return (false); - } -} - -static int -ttyread(void) -{ - char rb; - - if (tty_char_available()) { - read(STDIN_FILENO, &rb, 1); - return (rb & 0xff); - } else { - return (-1); - } -} - -static void -ttywrite(unsigned char wb) -{ - (void) write(STDOUT_FILENO, &wb, 1); -} - -static int -console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - static int opened; - - if (bytes == 2 && in) { - *eax = BVM_CONS_SIG; - return (0); - } - - /* - * Guests might probe this port to look for old ISA devices - * using single-byte reads. Return 0xff for those. - */ - if (bytes == 1 && in) { - *eax = 0xff; - return (0); - } - - if (bytes != 4) - return (-1); - - if (!opened) { - ttyopen(); - opened = 1; - } - - if (in) - *eax = ttyread(); - else - ttywrite(*eax); - - return (0); -} - -SYSRES_IO(BVM_CONSOLE_PORT, 4); - -static struct inout_port consport = { - "bvmcons", - BVM_CONSOLE_PORT, - 1, - IOPORT_F_INOUT, - console_handler -}; - -void -init_bvmcons(void) -{ - - register_inout(&consport); -} diff --git a/usr.sbin/bhyve/dbgport.c b/usr.sbin/bhyve/dbgport.c deleted file mode 100644 index 5be0ceb..0000000 --- a/usr.sbin/bhyve/dbgport.c +++ /dev/null @@ -1,151 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <sys/uio.h> - -#include <stdio.h> -#include <stdlib.h> -#include <fcntl.h> -#include <unistd.h> -#include <errno.h> - -#include "inout.h" -#include "dbgport.h" -#include "pci_lpc.h" - -#define BVM_DBG_PORT 0x224 -#define BVM_DBG_SIG ('B' << 8 | 'V') - -static int listen_fd, conn_fd; - -static struct sockaddr_in sin; - -static int -dbg_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - char ch; - int nwritten, nread, printonce; - - if (bytes == 2 && in) { - *eax = BVM_DBG_SIG; - return (0); - } - - if (bytes != 4) - return (-1); - -again: - printonce = 0; - while (conn_fd < 0) { - if (!printonce) { - printf("Waiting for connection from gdb\r\n"); - printonce = 1; - } - conn_fd = accept(listen_fd, NULL, NULL); - if (conn_fd >= 0) - fcntl(conn_fd, F_SETFL, O_NONBLOCK); - else if (errno != EINTR) - perror("accept"); - } - - if (in) { - nread = read(conn_fd, &ch, 1); - if (nread == -1 && errno == EAGAIN) - *eax = -1; - else if (nread == 1) - *eax = ch; - else { - close(conn_fd); - conn_fd = -1; - goto again; - } - } else { - ch = *eax; - nwritten = write(conn_fd, &ch, 1); - if (nwritten != 1) { - close(conn_fd); - conn_fd = -1; - goto again; - } - } - return (0); -} - -static struct inout_port dbgport = { - "bvmdbg", - BVM_DBG_PORT, - 1, - IOPORT_F_INOUT, - dbg_handler -}; - -SYSRES_IO(BVM_DBG_PORT, 4); - -void -init_dbgport(int sport) -{ - int reuse; - - conn_fd = -1; - - if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { - perror("socket"); - exit(1); - } - - sin.sin_len = sizeof(sin); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = htonl(INADDR_ANY); - sin.sin_port = htons(sport); - - reuse = 1; - if (setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &reuse, - sizeof(reuse)) < 0) { - perror("setsockopt"); - exit(1); - } - - if (bind(listen_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) { - perror("bind"); - exit(1); - } - - if (listen(listen_fd, 1) < 0) { - perror("listen"); - exit(1); - } - - register_inout(&dbgport); -} diff --git a/usr.sbin/bhyve/dbgport.h b/usr.sbin/bhyve/dbgport.h deleted file mode 100644 index 2ddcbf8..0000000 --- a/usr.sbin/bhyve/dbgport.h +++ /dev/null @@ -1,34 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _DBGPORT_H_ -#define _DBGPORT_H_ - -void init_dbgport(int port); - -#endif diff --git a/usr.sbin/bhyve/fwctl.c b/usr.sbin/bhyve/fwctl.c deleted file mode 100644 index 9e90c1a..0000000 --- a/usr.sbin/bhyve/fwctl.c +++ /dev/null @@ -1,549 +0,0 @@ -/*- - * Copyright (c) 2015 Peter Grehan <grehan@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -/* - * Guest firmware interface. Uses i/o ports x510/x511 as Qemu does, - * but with a request/response messaging protocol. - */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/types.h> -#include <sys/errno.h> -#include <sys/uio.h> - -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "bhyverun.h" -#include "inout.h" -#include "fwctl.h" - -/* - * Messaging protocol base operations - */ -#define OP_NULL 1 -#define OP_ECHO 2 -#define OP_GET 3 -#define OP_GET_LEN 4 -#define OP_SET 5 -#define OP_MAX OP_SET - -/* I/O ports */ -#define FWCTL_OUT 0x510 -#define FWCTL_IN 0x511 - -/* - * Back-end state-machine - */ -enum state { - DORMANT, - IDENT_WAIT, - IDENT_SEND, - REQ, - RESP -} be_state = DORMANT; - -static uint8_t sig[] = { 'B', 'H', 'Y', 'V' }; -static u_int ident_idx; - -struct op_info { - int op; - int (*op_start)(int len); - void (*op_data)(uint32_t data, int len); - int (*op_result)(struct iovec **data); - void (*op_done)(struct iovec *data); -}; -static struct op_info *ops[OP_MAX+1]; - -/* Return 0-padded uint32_t */ -static uint32_t -fwctl_send_rest(uint32_t *data, size_t len) -{ - union { - uint8_t c[4]; - uint32_t w; - } u; - uint8_t *cdata; - int i; - - cdata = (uint8_t *) data; - u.w = 0; - - for (i = 0, u.w = 0; i < len; i++) - u.c[i] = *cdata++; - - return (u.w); -} - -/* - * error op dummy proto - drop all data sent and return an error -*/ -static int errop_code; - -static void -errop_set(int err) -{ - - errop_code = err; -} - -static int -errop_start(int len) -{ - errop_code = ENOENT; - - /* accept any length */ - return (errop_code); -} - -static void -errop_data(uint32_t data, int len) -{ - - /* ignore */ -} - -static int -errop_result(struct iovec **data) -{ - - /* no data to send back; always successful */ - *data = NULL; - return (errop_code); -} - -static void -errop_done(struct iovec *data) -{ - - /* assert data is NULL */ -} - -static struct op_info errop_info = { - .op_start = errop_start, - .op_data = errop_data, - .op_result = errop_result, - .op_done = errop_done -}; - -/* OID search */ -SET_DECLARE(ctl_set, struct ctl); - -CTL_NODE("hw.ncpu", &guest_ncpus, sizeof(guest_ncpus)); - -static struct ctl * -ctl_locate(const char *str, int maxlen) -{ - struct ctl *cp, **cpp; - - SET_FOREACH(cpp, ctl_set) { - cp = *cpp; - if (!strncmp(str, cp->c_oid, maxlen)) - return (cp); - } - - return (NULL); -} - -/* uefi-sysctl get-len */ -#define FGET_STRSZ 80 -static struct iovec fget_biov[2]; -static char fget_str[FGET_STRSZ]; -static struct { - size_t f_sz; - uint32_t f_data[1024]; -} fget_buf; -static int fget_cnt; -static size_t fget_size; - -static int -fget_start(int len) -{ - - if (len > FGET_STRSZ) - return(E2BIG); - - fget_cnt = 0; - - return (0); -} - -static void -fget_data(uint32_t data, int len) -{ - - *((uint32_t *) &fget_str[fget_cnt]) = data; - fget_cnt += sizeof(uint32_t); -} - -static int -fget_result(struct iovec **data, int val) -{ - struct ctl *cp; - int err; - - err = 0; - - /* Locate the OID */ - cp = ctl_locate(fget_str, fget_cnt); - if (cp == NULL) { - *data = NULL; - err = ENOENT; - } else { - if (val) { - /* For now, copy the len/data into a buffer */ - memset(&fget_buf, 0, sizeof(fget_buf)); - fget_buf.f_sz = cp->c_len; - memcpy(fget_buf.f_data, cp->c_data, cp->c_len); - fget_biov[0].iov_base = (char *)&fget_buf; - fget_biov[0].iov_len = sizeof(fget_buf.f_sz) + - cp->c_len; - } else { - fget_size = cp->c_len; - fget_biov[0].iov_base = (char *)&fget_size; - fget_biov[0].iov_len = sizeof(fget_size); - } - - fget_biov[1].iov_base = NULL; - fget_biov[1].iov_len = 0; - *data = fget_biov; - } - - return (err); -} - -static void -fget_done(struct iovec *data) -{ - - /* nothing needs to be freed */ -} - -static int -fget_len_result(struct iovec **data) -{ - return (fget_result(data, 0)); -} - -static int -fget_val_result(struct iovec **data) -{ - return (fget_result(data, 1)); -} - -static struct op_info fgetlen_info = { - .op_start = fget_start, - .op_data = fget_data, - .op_result = fget_len_result, - .op_done = fget_done -}; - -static struct op_info fgetval_info = { - .op_start = fget_start, - .op_data = fget_data, - .op_result = fget_val_result, - .op_done = fget_done -}; - -static struct req_info { - int req_error; - u_int req_count; - uint32_t req_size; - uint32_t req_type; - uint32_t req_txid; - struct op_info *req_op; - int resp_error; - int resp_count; - int resp_size; - int resp_off; - struct iovec *resp_biov; -} rinfo; - -static void -fwctl_response_done(void) -{ - - (*rinfo.req_op->op_done)(rinfo.resp_biov); - - /* reinit the req data struct */ - memset(&rinfo, 0, sizeof(rinfo)); -} - -static void -fwctl_request_done(void) -{ - - rinfo.resp_error = (*rinfo.req_op->op_result)(&rinfo.resp_biov); - - /* XXX only a single vector supported at the moment */ - rinfo.resp_off = 0; - if (rinfo.resp_biov == NULL) { - rinfo.resp_size = 0; - } else { - rinfo.resp_size = rinfo.resp_biov[0].iov_len; - } -} - -static int -fwctl_request_start(void) -{ - int err; - - /* Data size doesn't include header */ - rinfo.req_size -= 12; - - rinfo.req_op = &errop_info; - if (rinfo.req_type <= OP_MAX && ops[rinfo.req_type] != NULL) - rinfo.req_op = ops[rinfo.req_type]; - - err = (*rinfo.req_op->op_start)(rinfo.req_size); - - if (err) { - errop_set(err); - rinfo.req_op = &errop_info; - } - - /* Catch case of zero-length message here */ - if (rinfo.req_size == 0) { - fwctl_request_done(); - return (1); - } - - return (0); -} - -static int -fwctl_request_data(uint32_t value) -{ - int remlen; - - /* Make sure remaining size is >= 0 */ - rinfo.req_size -= sizeof(uint32_t); - remlen = MAX(rinfo.req_size, 0); - - (*rinfo.req_op->op_data)(value, remlen); - - if (rinfo.req_size < sizeof(uint32_t)) { - fwctl_request_done(); - return (1); - } - - return (0); -} - -static int -fwctl_request(uint32_t value) -{ - - int ret; - - ret = 0; - - switch (rinfo.req_count) { - case 0: - /* Verify size */ - if (value < 12) { - printf("msg size error"); - exit(1); - } - rinfo.req_size = value; - rinfo.req_count = 1; - break; - case 1: - rinfo.req_type = value; - rinfo.req_count++; - break; - case 2: - rinfo.req_txid = value; - rinfo.req_count++; - ret = fwctl_request_start(); - break; - default: - ret = fwctl_request_data(value); - break; - } - - return (ret); -} - -static int -fwctl_response(uint32_t *retval) -{ - uint32_t *dp; - int remlen; - - switch(rinfo.resp_count) { - case 0: - /* 4 x u32 header len + data */ - *retval = 4*sizeof(uint32_t) + - roundup(rinfo.resp_size, sizeof(uint32_t)); - rinfo.resp_count++; - break; - case 1: - *retval = rinfo.req_type; - rinfo.resp_count++; - break; - case 2: - *retval = rinfo.req_txid; - rinfo.resp_count++; - break; - case 3: - *retval = rinfo.resp_error; - rinfo.resp_count++; - break; - default: - remlen = rinfo.resp_size - rinfo.resp_off; - dp = (uint32_t *) - ((uint8_t *)rinfo.resp_biov->iov_base + rinfo.resp_off); - if (remlen >= sizeof(uint32_t)) { - *retval = *dp; - } else if (remlen > 0) { - *retval = fwctl_send_rest(dp, remlen); - } - rinfo.resp_off += sizeof(uint32_t); - break; - } - - if (rinfo.resp_count > 3 && - rinfo.resp_size - rinfo.resp_off <= 0) { - fwctl_response_done(); - return (1); - } - - return (0); -} - - -/* - * i/o port handling. - */ -static uint8_t -fwctl_inb(void) -{ - uint8_t retval; - - retval = 0xff; - - switch (be_state) { - case IDENT_SEND: - retval = sig[ident_idx++]; - if (ident_idx >= sizeof(sig)) - be_state = REQ; - break; - default: - break; - } - - return (retval); -} - -static void -fwctl_outw(uint16_t val) -{ - switch (be_state) { - case IDENT_WAIT: - if (val == 0) { - be_state = IDENT_SEND; - ident_idx = 0; - } - break; - default: - /* ignore */ - break; - } -} - -static uint32_t -fwctl_inl(void) -{ - uint32_t retval; - - switch (be_state) { - case RESP: - if (fwctl_response(&retval)) - be_state = REQ; - break; - default: - retval = 0xffffffff; - break; - } - - return (retval); -} - -static void -fwctl_outl(uint32_t val) -{ - - switch (be_state) { - case REQ: - if (fwctl_request(val)) - be_state = RESP; - default: - break; - } - -} - -static int -fwctl_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - - if (in) { - if (bytes == 1) - *eax = fwctl_inb(); - else if (bytes == 4) - *eax = fwctl_inl(); - else - *eax = 0xffff; - } else { - if (bytes == 2) - fwctl_outw(*eax); - else if (bytes == 4) - fwctl_outl(*eax); - } - - return (0); -} -INOUT_PORT(fwctl_wreg, FWCTL_OUT, IOPORT_F_INOUT, fwctl_handler); -INOUT_PORT(fwctl_rreg, FWCTL_IN, IOPORT_F_IN, fwctl_handler); - -void -fwctl_init(void) -{ - - ops[OP_GET_LEN] = &fgetlen_info; - ops[OP_GET] = &fgetval_info; - - be_state = IDENT_WAIT; -} diff --git a/usr.sbin/bhyve/fwctl.h b/usr.sbin/bhyve/fwctl.h deleted file mode 100644 index f5f8d13..0000000 --- a/usr.sbin/bhyve/fwctl.h +++ /dev/null @@ -1,54 +0,0 @@ -/*- - * Copyright (c) 2015 Peter Grehan <grehan@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _FWCTL_H_ -#define _FWCTL_H_ - -#include <sys/linker_set.h> - -/* - * Linker set api for export of information to guest firmware via - * a sysctl-like OID interface - */ -struct ctl { - const char *c_oid; - const void *c_data; - const int c_len; -}; - -#define CTL_NODE(oid, data, len) \ - static struct ctl __CONCAT(__ctl, __LINE__) = { \ - oid, \ - (data), \ - (len), \ - }; \ - DATA_SET(ctl_set, __CONCAT(__ctl, __LINE__)) - -void fwctl_init(void); - -#endif /* _FWCTL_H_ */ diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c deleted file mode 100644 index 929bb3c..0000000 --- a/usr.sbin/bhyve/inout.c +++ /dev/null @@ -1,297 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/linker_set.h> -#include <sys/_iovec.h> -#include <sys/mman.h> - -#include <x86/psl.h> -#include <x86/segments.h> - -#include <machine/vmm.h> -#include <machine/vmm_instruction_emul.h> -#include <vmmapi.h> - -#include <stdio.h> -#include <string.h> -#include <assert.h> - -#include "bhyverun.h" -#include "inout.h" - -SET_DECLARE(inout_port_set, struct inout_port); - -#define MAX_IOPORTS (1 << 16) - -#define VERIFY_IOPORT(port, size) \ - assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS) - -static struct { - const char *name; - int flags; - inout_func_t handler; - void *arg; -} inout_handlers[MAX_IOPORTS]; - -static int -default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - if (in) { - switch (bytes) { - case 4: - *eax = 0xffffffff; - break; - case 2: - *eax = 0xffff; - break; - case 1: - *eax = 0xff; - break; - } - } - - return (0); -} - -static void -register_default_iohandler(int start, int size) -{ - struct inout_port iop; - - VERIFY_IOPORT(start, size); - - bzero(&iop, sizeof(iop)); - iop.name = "default"; - iop.port = start; - iop.size = size; - iop.flags = IOPORT_F_INOUT | IOPORT_F_DEFAULT; - iop.handler = default_inout; - - register_inout(&iop); -} - -int -emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) -{ - int addrsize, bytes, flags, in, port, prot, rep; - uint32_t eax, val; - inout_func_t handler; - void *arg; - int error, fault, retval; - enum vm_reg_name idxreg; - uint64_t gla, index, iterations, count; - struct vm_inout_str *vis; - struct iovec iov[2]; - - bytes = vmexit->u.inout.bytes; - in = vmexit->u.inout.in; - port = vmexit->u.inout.port; - - assert(port < MAX_IOPORTS); - assert(bytes == 1 || bytes == 2 || bytes == 4); - - handler = inout_handlers[port].handler; - - if (strict && handler == default_inout) - return (-1); - - flags = inout_handlers[port].flags; - arg = inout_handlers[port].arg; - - if (in) { - if (!(flags & IOPORT_F_IN)) - return (-1); - } else { - if (!(flags & IOPORT_F_OUT)) - return (-1); - } - - retval = 0; - if (vmexit->u.inout.string) { - vis = &vmexit->u.inout_str; - rep = vis->inout.rep; - addrsize = vis->addrsize; - prot = in ? PROT_WRITE : PROT_READ; - assert(addrsize == 2 || addrsize == 4 || addrsize == 8); - - /* Index register */ - idxreg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI; - index = vis->index & vie_size2mask(addrsize); - - /* Count register */ - count = vis->count & vie_size2mask(addrsize); - - /* Limit number of back-to-back in/out emulations to 16 */ - iterations = MIN(count, 16); - while (iterations > 0) { - assert(retval == 0); - if (vie_calculate_gla(vis->paging.cpu_mode, - vis->seg_name, &vis->seg_desc, index, bytes, - addrsize, prot, &gla)) { - vm_inject_gp(ctx, vcpu); - break; - } - - error = vm_copy_setup(ctx, vcpu, &vis->paging, gla, - bytes, prot, iov, nitems(iov), &fault); - if (error) { - retval = -1; /* Unrecoverable error */ - break; - } else if (fault) { - retval = 0; /* Resume guest to handle fault */ - break; - } - - if (vie_alignment_check(vis->paging.cpl, bytes, - vis->cr0, vis->rflags, gla)) { - vm_inject_ac(ctx, vcpu, 0); - break; - } - - val = 0; - if (!in) - vm_copyin(ctx, vcpu, iov, &val, bytes); - - retval = handler(ctx, vcpu, in, port, bytes, &val, arg); - if (retval != 0) - break; - - if (in) - vm_copyout(ctx, vcpu, &val, iov, bytes); - - /* Update index */ - if (vis->rflags & PSL_D) - index -= bytes; - else - index += bytes; - - count--; - iterations--; - } - - /* Update index register */ - error = vie_update_register(ctx, vcpu, idxreg, index, addrsize); - assert(error == 0); - - /* - * Update count register only if the instruction had a repeat - * prefix. - */ - if (rep) { - error = vie_update_register(ctx, vcpu, VM_REG_GUEST_RCX, - count, addrsize); - assert(error == 0); - } - - /* Restart the instruction if more iterations remain */ - if (retval == 0 && count != 0) { - error = vm_restart_instruction(ctx, vcpu); - assert(error == 0); - } - } else { - eax = vmexit->u.inout.eax; - val = eax & vie_size2mask(bytes); - retval = handler(ctx, vcpu, in, port, bytes, &val, arg); - if (retval == 0 && in) { - eax &= ~vie_size2mask(bytes); - eax |= val & vie_size2mask(bytes); - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX, - eax); - assert(error == 0); - } - } - return (retval); -} - -void -init_inout(void) -{ - struct inout_port **iopp, *iop; - - /* - * Set up the default handler for all ports - */ - register_default_iohandler(0, MAX_IOPORTS); - - /* - * Overwrite with specified handlers - */ - SET_FOREACH(iopp, inout_port_set) { - iop = *iopp; - assert(iop->port < MAX_IOPORTS); - inout_handlers[iop->port].name = iop->name; - inout_handlers[iop->port].flags = iop->flags; - inout_handlers[iop->port].handler = iop->handler; - inout_handlers[iop->port].arg = NULL; - } -} - -int -register_inout(struct inout_port *iop) -{ - int i; - - VERIFY_IOPORT(iop->port, iop->size); - - /* - * Verify that the new registration is not overwriting an already - * allocated i/o range. - */ - if ((iop->flags & IOPORT_F_DEFAULT) == 0) { - for (i = iop->port; i < iop->port + iop->size; i++) { - if ((inout_handlers[i].flags & IOPORT_F_DEFAULT) == 0) - return (-1); - } - } - - for (i = iop->port; i < iop->port + iop->size; i++) { - inout_handlers[i].name = iop->name; - inout_handlers[i].flags = iop->flags; - inout_handlers[i].handler = iop->handler; - inout_handlers[i].arg = iop->arg; - } - - return (0); -} - -int -unregister_inout(struct inout_port *iop) -{ - - VERIFY_IOPORT(iop->port, iop->size); - assert(inout_handlers[iop->port].name == iop->name); - - register_default_iohandler(iop->port, iop->size); - - return (0); -} diff --git a/usr.sbin/bhyve/inout.h b/usr.sbin/bhyve/inout.h deleted file mode 100644 index 7f39095..0000000 --- a/usr.sbin/bhyve/inout.h +++ /dev/null @@ -1,79 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _INOUT_H_ -#define _INOUT_H_ - -#include <sys/linker_set.h> - -struct vmctx; -struct vm_exit; - -/* - * inout emulation handlers return 0 on success and -1 on failure. - */ -typedef int (*inout_func_t)(struct vmctx *ctx, int vcpu, int in, int port, - int bytes, uint32_t *eax, void *arg); - -struct inout_port { - const char *name; - int port; - int size; - int flags; - inout_func_t handler; - void *arg; -}; -#define IOPORT_F_IN 0x1 -#define IOPORT_F_OUT 0x2 -#define IOPORT_F_INOUT (IOPORT_F_IN | IOPORT_F_OUT) - -/* - * The following flags are used internally and must not be used by - * device models. - */ -#define IOPORT_F_DEFAULT 0x80000000 /* claimed by default handler */ - -#define INOUT_PORT(name, port, flags, handler) \ - static struct inout_port __CONCAT(__inout_port, __LINE__) = { \ - #name, \ - (port), \ - 1, \ - (flags), \ - (handler), \ - 0 \ - }; \ - DATA_SET(inout_port_set, __CONCAT(__inout_port, __LINE__)) - -void init_inout(void); -int emulate_inout(struct vmctx *, int vcpu, struct vm_exit *vmexit, - int strict); -int register_inout(struct inout_port *iop); -int unregister_inout(struct inout_port *iop); -void init_bvmcons(void); - -#endif /* _INOUT_H_ */ diff --git a/usr.sbin/bhyve/ioapic.c b/usr.sbin/bhyve/ioapic.c deleted file mode 100644 index 0ad69d9..0000000 --- a/usr.sbin/bhyve/ioapic.c +++ /dev/null @@ -1,74 +0,0 @@ -/*- - * Copyright (c) 2014 Hudson River Trading LLC - * Written by: John H. Baldwin <jhb@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> - -#include <machine/vmm.h> -#include <vmmapi.h> - -#include "ioapic.h" - -/* - * Assign PCI INTx interrupts to I/O APIC pins in a round-robin - * fashion. Note that we have no idea what the HPET is using, but the - * HPET is also programmable whereas this is intended for hardwired - * PCI interrupts. - * - * This assumes a single I/O APIC where pins >= 16 are permitted for - * PCI devices. - */ -static int pci_pins; - -void -ioapic_init(struct vmctx *ctx) -{ - - if (vm_ioapic_pincount(ctx, &pci_pins) < 0) { - pci_pins = 0; - return; - } - - /* Ignore the first 16 pins. */ - if (pci_pins <= 16) { - pci_pins = 0; - return; - } - pci_pins -= 16; -} - -int -ioapic_pci_alloc_irq(void) -{ - static int last_pin; - - if (pci_pins == 0) - return (-1); - return (16 + (last_pin++ % pci_pins)); -} diff --git a/usr.sbin/bhyve/ioapic.h b/usr.sbin/bhyve/ioapic.h deleted file mode 100644 index efdd3c6..0000000 --- a/usr.sbin/bhyve/ioapic.h +++ /dev/null @@ -1,39 +0,0 @@ -/*- - * Copyright (c) 2014 Hudson River Trading LLC - * Written by: John H. Baldwin <jhb@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _IOAPIC_H_ -#define _IOAPIC_H_ - -/* - * Allocate a PCI IRQ from the I/O APIC. - */ -void ioapic_init(struct vmctx *ctx); -int ioapic_pci_alloc_irq(void); - -#endif diff --git a/usr.sbin/bhyve/mem.c b/usr.sbin/bhyve/mem.c deleted file mode 100644 index 2a9f430..0000000 --- a/usr.sbin/bhyve/mem.c +++ /dev/null @@ -1,291 +0,0 @@ -/*- - * Copyright (c) 2012 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -/* - * Memory ranges are represented with an RB tree. On insertion, the range - * is checked for overlaps. On lookup, the key has the same base and limit - * so it can be searched within the range. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> -#include <sys/tree.h> -#include <sys/errno.h> -#include <machine/vmm.h> -#include <machine/vmm_instruction_emul.h> - -#include <stdio.h> -#include <stdlib.h> -#include <assert.h> -#include <pthread.h> - -#include "mem.h" - -struct mmio_rb_range { - RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */ - struct mem_range mr_param; - uint64_t mr_base; - uint64_t mr_end; -}; - -struct mmio_rb_tree; -RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); - -RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback; - -/* - * Per-vCPU cache. Since most accesses from a vCPU will be to - * consecutive addresses in a range, it makes sense to cache the - * result of a lookup. - */ -static struct mmio_rb_range *mmio_hint[VM_MAXCPU]; - -static pthread_rwlock_t mmio_rwlock; - -static int -mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b) -{ - if (a->mr_end < b->mr_base) - return (-1); - else if (a->mr_base > b->mr_end) - return (1); - return (0); -} - -static int -mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr, - struct mmio_rb_range **entry) -{ - struct mmio_rb_range find, *res; - - find.mr_base = find.mr_end = addr; - - res = RB_FIND(mmio_rb_tree, rbt, &find); - - if (res != NULL) { - *entry = res; - return (0); - } - - return (ENOENT); -} - -static int -mmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new) -{ - struct mmio_rb_range *overlap; - - overlap = RB_INSERT(mmio_rb_tree, rbt, new); - - if (overlap != NULL) { -#ifdef RB_DEBUG - printf("overlap detected: new %lx:%lx, tree %lx:%lx\n", - new->mr_base, new->mr_end, - overlap->mr_base, overlap->mr_end); -#endif - - return (EEXIST); - } - - return (0); -} - -#if 0 -static void -mmio_rb_dump(struct mmio_rb_tree *rbt) -{ - struct mmio_rb_range *np; - - pthread_rwlock_rdlock(&mmio_rwlock); - RB_FOREACH(np, mmio_rb_tree, rbt) { - printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end, - np->mr_param.name); - } - pthread_rwlock_unlock(&mmio_rwlock); -} -#endif - -RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare); - -static int -mem_read(void *ctx, int vcpu, uint64_t gpa, uint64_t *rval, int size, void *arg) -{ - int error; - struct mem_range *mr = arg; - - error = (*mr->handler)(ctx, vcpu, MEM_F_READ, gpa, size, - rval, mr->arg1, mr->arg2); - return (error); -} - -static int -mem_write(void *ctx, int vcpu, uint64_t gpa, uint64_t wval, int size, void *arg) -{ - int error; - struct mem_range *mr = arg; - - error = (*mr->handler)(ctx, vcpu, MEM_F_WRITE, gpa, size, - &wval, mr->arg1, mr->arg2); - return (error); -} - -int -emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie, - struct vm_guest_paging *paging) - -{ - struct mmio_rb_range *entry; - int err, immutable; - - pthread_rwlock_rdlock(&mmio_rwlock); - /* - * First check the per-vCPU cache - */ - if (mmio_hint[vcpu] && - paddr >= mmio_hint[vcpu]->mr_base && - paddr <= mmio_hint[vcpu]->mr_end) { - entry = mmio_hint[vcpu]; - } else - entry = NULL; - - if (entry == NULL) { - if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) { - /* Update the per-vCPU cache */ - mmio_hint[vcpu] = entry; - } else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) { - pthread_rwlock_unlock(&mmio_rwlock); - return (ESRCH); - } - } - - assert(entry != NULL); - - /* - * An 'immutable' memory range is guaranteed to be never removed - * so there is no need to hold 'mmio_rwlock' while calling the - * handler. - * - * XXX writes to the PCIR_COMMAND register can cause register_mem() - * to be called. If the guest is using PCI extended config space - * to modify the PCIR_COMMAND register then register_mem() can - * deadlock on 'mmio_rwlock'. However by registering the extended - * config space window as 'immutable' the deadlock can be avoided. - */ - immutable = (entry->mr_param.flags & MEM_F_IMMUTABLE); - if (immutable) - pthread_rwlock_unlock(&mmio_rwlock); - - err = vmm_emulate_instruction(ctx, vcpu, paddr, vie, paging, - mem_read, mem_write, &entry->mr_param); - - if (!immutable) - pthread_rwlock_unlock(&mmio_rwlock); - - return (err); -} - -static int -register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp) -{ - struct mmio_rb_range *entry, *mrp; - int err; - - err = 0; - - mrp = malloc(sizeof(struct mmio_rb_range)); - - if (mrp != NULL) { - mrp->mr_param = *memp; - mrp->mr_base = memp->base; - mrp->mr_end = memp->base + memp->size - 1; - pthread_rwlock_wrlock(&mmio_rwlock); - if (mmio_rb_lookup(rbt, memp->base, &entry) != 0) - err = mmio_rb_add(rbt, mrp); - pthread_rwlock_unlock(&mmio_rwlock); - if (err) - free(mrp); - } else - err = ENOMEM; - - return (err); -} - -int -register_mem(struct mem_range *memp) -{ - - return (register_mem_int(&mmio_rb_root, memp)); -} - -int -register_mem_fallback(struct mem_range *memp) -{ - - return (register_mem_int(&mmio_rb_fallback, memp)); -} - -int -unregister_mem(struct mem_range *memp) -{ - struct mem_range *mr; - struct mmio_rb_range *entry = NULL; - int err, i; - - pthread_rwlock_wrlock(&mmio_rwlock); - err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry); - if (err == 0) { - mr = &entry->mr_param; - assert(mr->name == memp->name); - assert(mr->base == memp->base && mr->size == memp->size); - assert((mr->flags & MEM_F_IMMUTABLE) == 0); - RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry); - - /* flush Per-vCPU cache */ - for (i=0; i < VM_MAXCPU; i++) { - if (mmio_hint[i] == entry) - mmio_hint[i] = NULL; - } - } - pthread_rwlock_unlock(&mmio_rwlock); - - if (entry) - free(entry); - - return (err); -} - -void -init_mem(void) -{ - - RB_INIT(&mmio_rb_root); - RB_INIT(&mmio_rb_fallback); - pthread_rwlock_init(&mmio_rwlock, NULL); -} diff --git a/usr.sbin/bhyve/mem.h b/usr.sbin/bhyve/mem.h deleted file mode 100644 index f671eae..0000000 --- a/usr.sbin/bhyve/mem.h +++ /dev/null @@ -1,61 +0,0 @@ -/*- - * Copyright (c) 2012 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _MEM_H_ -#define _MEM_H_ - -#include <sys/linker_set.h> - -struct vmctx; - -typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, - int size, uint64_t *val, void *arg1, long arg2); - -struct mem_range { - const char *name; - int flags; - mem_func_t handler; - void *arg1; - long arg2; - uint64_t base; - uint64_t size; -}; -#define MEM_F_READ 0x1 -#define MEM_F_WRITE 0x2 -#define MEM_F_RW 0x3 -#define MEM_F_IMMUTABLE 0x4 /* mem_range cannot be unregistered */ - -void init_mem(void); -int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie, - struct vm_guest_paging *paging); - -int register_mem(struct mem_range *memp); -int register_mem_fallback(struct mem_range *memp); -int unregister_mem(struct mem_range *memp); - -#endif /* _MEM_H_ */ diff --git a/usr.sbin/bhyve/mevent.c b/usr.sbin/bhyve/mevent.c deleted file mode 100644 index 07d3baf..0000000 --- a/usr.sbin/bhyve/mevent.c +++ /dev/null @@ -1,456 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -/* - * Micro event library for FreeBSD, designed for a single i/o thread - * using kqueue, and having events be persistent by default. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <assert.h> -#include <errno.h> -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <unistd.h> - -#include <sys/types.h> -#include <sys/event.h> -#include <sys/time.h> - -#include <pthread.h> -#include <pthread_np.h> - -#include "mevent.h" - -#define MEVENT_MAX 64 - -#define MEV_ADD 1 -#define MEV_ENABLE 2 -#define MEV_DISABLE 3 -#define MEV_DEL_PENDING 4 - -extern char *vmname; - -static pthread_t mevent_tid; -static int mevent_timid = 43; -static int mevent_pipefd[2]; -static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER; - -struct mevent { - void (*me_func)(int, enum ev_type, void *); -#define me_msecs me_fd - int me_fd; - int me_timid; - enum ev_type me_type; - void *me_param; - int me_cq; - int me_state; - int me_closefd; - LIST_ENTRY(mevent) me_list; -}; - -static LIST_HEAD(listhead, mevent) global_head, change_head; - -static void -mevent_qlock(void) -{ - pthread_mutex_lock(&mevent_lmutex); -} - -static void -mevent_qunlock(void) -{ - pthread_mutex_unlock(&mevent_lmutex); -} - -static void -mevent_pipe_read(int fd, enum ev_type type, void *param) -{ - char buf[MEVENT_MAX]; - int status; - - /* - * Drain the pipe read side. The fd is non-blocking so this is - * safe to do. - */ - do { - status = read(fd, buf, sizeof(buf)); - } while (status == MEVENT_MAX); -} - -static void -mevent_notify(void) -{ - char c; - - /* - * If calling from outside the i/o thread, write a byte on the - * pipe to force the i/o thread to exit the blocking kevent call. - */ - if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) { - write(mevent_pipefd[1], &c, 1); - } -} - -static int -mevent_kq_filter(struct mevent *mevp) -{ - int retval; - - retval = 0; - - if (mevp->me_type == EVF_READ) - retval = EVFILT_READ; - - if (mevp->me_type == EVF_WRITE) - retval = EVFILT_WRITE; - - if (mevp->me_type == EVF_TIMER) - retval = EVFILT_TIMER; - - if (mevp->me_type == EVF_SIGNAL) - retval = EVFILT_SIGNAL; - - return (retval); -} - -static int -mevent_kq_flags(struct mevent *mevp) -{ - int ret; - - switch (mevp->me_state) { - case MEV_ADD: - ret = EV_ADD; /* implicitly enabled */ - break; - case MEV_ENABLE: - ret = EV_ENABLE; - break; - case MEV_DISABLE: - ret = EV_DISABLE; - break; - case MEV_DEL_PENDING: - ret = EV_DELETE; - break; - default: - assert(0); - break; - } - - return (ret); -} - -static int -mevent_kq_fflags(struct mevent *mevp) -{ - /* XXX nothing yet, perhaps EV_EOF for reads ? */ - return (0); -} - -static int -mevent_build(int mfd, struct kevent *kev) -{ - struct mevent *mevp, *tmpp; - int i; - - i = 0; - - mevent_qlock(); - - LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) { - if (mevp->me_closefd) { - /* - * A close of the file descriptor will remove the - * event - */ - close(mevp->me_fd); - } else { - if (mevp->me_type == EVF_TIMER) { - kev[i].ident = mevp->me_timid; - kev[i].data = mevp->me_msecs; - } else { - kev[i].ident = mevp->me_fd; - kev[i].data = 0; - } - kev[i].filter = mevent_kq_filter(mevp); - kev[i].flags = mevent_kq_flags(mevp); - kev[i].fflags = mevent_kq_fflags(mevp); - kev[i].udata = mevp; - i++; - } - - mevp->me_cq = 0; - LIST_REMOVE(mevp, me_list); - - if (mevp->me_state == MEV_DEL_PENDING) { - free(mevp); - } else { - LIST_INSERT_HEAD(&global_head, mevp, me_list); - } - - assert(i < MEVENT_MAX); - } - - mevent_qunlock(); - - return (i); -} - -static void -mevent_handle(struct kevent *kev, int numev) -{ - struct mevent *mevp; - int i; - - for (i = 0; i < numev; i++) { - mevp = kev[i].udata; - - /* XXX check for EV_ERROR ? */ - - (*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param); - } -} - -struct mevent * -mevent_add(int tfd, enum ev_type type, - void (*func)(int, enum ev_type, void *), void *param) -{ - struct mevent *lp, *mevp; - - if (tfd < 0 || func == NULL) { - return (NULL); - } - - mevp = NULL; - - mevent_qlock(); - - /* - * Verify that the fd/type tuple is not present in any list - */ - LIST_FOREACH(lp, &global_head, me_list) { - if (type != EVF_TIMER && lp->me_fd == tfd && - lp->me_type == type) { - goto exit; - } - } - - LIST_FOREACH(lp, &change_head, me_list) { - if (type != EVF_TIMER && lp->me_fd == tfd && - lp->me_type == type) { - goto exit; - } - } - - /* - * Allocate an entry, populate it, and add it to the change list. - */ - mevp = calloc(1, sizeof(struct mevent)); - if (mevp == NULL) { - goto exit; - } - - if (type == EVF_TIMER) { - mevp->me_msecs = tfd; - mevp->me_timid = mevent_timid++; - } else - mevp->me_fd = tfd; - mevp->me_type = type; - mevp->me_func = func; - mevp->me_param = param; - - LIST_INSERT_HEAD(&change_head, mevp, me_list); - mevp->me_cq = 1; - mevp->me_state = MEV_ADD; - mevent_notify(); - -exit: - mevent_qunlock(); - - return (mevp); -} - -static int -mevent_update(struct mevent *evp, int newstate) -{ - /* - * It's not possible to enable/disable a deleted event - */ - if (evp->me_state == MEV_DEL_PENDING) - return (EINVAL); - - /* - * No update needed if state isn't changing - */ - if (evp->me_state == newstate) - return (0); - - mevent_qlock(); - - evp->me_state = newstate; - - /* - * Place the entry onto the changed list if not already there. - */ - if (evp->me_cq == 0) { - evp->me_cq = 1; - LIST_REMOVE(evp, me_list); - LIST_INSERT_HEAD(&change_head, evp, me_list); - mevent_notify(); - } - - mevent_qunlock(); - - return (0); -} - -int -mevent_enable(struct mevent *evp) -{ - - return (mevent_update(evp, MEV_ENABLE)); -} - -int -mevent_disable(struct mevent *evp) -{ - - return (mevent_update(evp, MEV_DISABLE)); -} - -static int -mevent_delete_event(struct mevent *evp, int closefd) -{ - mevent_qlock(); - - /* - * Place the entry onto the changed list if not already there, and - * mark as to be deleted. - */ - if (evp->me_cq == 0) { - evp->me_cq = 1; - LIST_REMOVE(evp, me_list); - LIST_INSERT_HEAD(&change_head, evp, me_list); - mevent_notify(); - } - evp->me_state = MEV_DEL_PENDING; - - if (closefd) - evp->me_closefd = 1; - - mevent_qunlock(); - - return (0); -} - -int -mevent_delete(struct mevent *evp) -{ - - return (mevent_delete_event(evp, 0)); -} - -int -mevent_delete_close(struct mevent *evp) -{ - - return (mevent_delete_event(evp, 1)); -} - -static void -mevent_set_name(void) -{ - - pthread_set_name_np(mevent_tid, "mevent"); -} - -void -mevent_dispatch(void) -{ - struct kevent changelist[MEVENT_MAX]; - struct kevent eventlist[MEVENT_MAX]; - struct mevent *pipev; - int mfd; - int numev; - int ret; - - mevent_tid = pthread_self(); - mevent_set_name(); - - mfd = kqueue(); - assert(mfd > 0); - - /* - * Open the pipe that will be used for other threads to force - * the blocking kqueue call to exit by writing to it. Set the - * descriptor to non-blocking. - */ - ret = pipe(mevent_pipefd); - if (ret < 0) { - perror("pipe"); - exit(0); - } - - /* - * Add internal event handler for the pipe write fd - */ - pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL); - assert(pipev != NULL); - - for (;;) { - /* - * Build changelist if required. - * XXX the changelist can be put into the blocking call - * to eliminate the extra syscall. Currently better for - * debug. - */ - numev = mevent_build(mfd, changelist); - if (numev) { - ret = kevent(mfd, changelist, numev, NULL, 0, NULL); - if (ret == -1) { - perror("Error return from kevent change"); - } - } - - /* - * Block awaiting events - */ - ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL); - if (ret == -1 && errno != EINTR) { - perror("Error return from kevent monitor"); - } - - /* - * Handle reported events - */ - mevent_handle(eventlist, ret); - } -} diff --git a/usr.sbin/bhyve/mevent.h b/usr.sbin/bhyve/mevent.h deleted file mode 100644 index d6a59c6..0000000 --- a/usr.sbin/bhyve/mevent.h +++ /dev/null @@ -1,51 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _MEVENT_H_ -#define _MEVENT_H_ - -enum ev_type { - EVF_READ, - EVF_WRITE, - EVF_TIMER, - EVF_SIGNAL -}; - -struct mevent; - -struct mevent *mevent_add(int fd, enum ev_type type, - void (*func)(int, enum ev_type, void *), - void *param); -int mevent_enable(struct mevent *evp); -int mevent_disable(struct mevent *evp); -int mevent_delete(struct mevent *evp); -int mevent_delete_close(struct mevent *evp); - -void mevent_dispatch(void); - -#endif /* _MEVENT_H_ */ diff --git a/usr.sbin/bhyve/mevent_test.c b/usr.sbin/bhyve/mevent_test.c deleted file mode 100644 index 9c68ff7..0000000 --- a/usr.sbin/bhyve/mevent_test.c +++ /dev/null @@ -1,256 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -/* - * Test program for the micro event library. Set up a simple TCP echo - * service. - * - * cc mevent_test.c mevent.c -lpthread - */ - -#include <sys/types.h> -#include <sys/stdint.h> -#include <sys/sysctl.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <machine/cpufunc.h> - -#include <stdio.h> -#include <stdlib.h> -#include <pthread.h> -#include <unistd.h> - -#include "mevent.h" - -#define TEST_PORT 4321 - -static pthread_mutex_t accept_mutex = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t accept_condvar = PTHREAD_COND_INITIALIZER; - -static struct mevent *tevp; - -char *vmname = "test vm"; - - -#define MEVENT_ECHO - -/* Number of timer events to capture */ -#define TEVSZ 4096 -uint64_t tevbuf[TEVSZ]; - -static void -timer_print(void) -{ - uint64_t min, max, diff, sum, tsc_freq; - size_t len; - int j; - - min = UINT64_MAX; - max = 0; - sum = 0; - - len = sizeof(tsc_freq); - sysctlbyname("machdep.tsc_freq", &tsc_freq, &len, NULL, 0); - - for (j = 1; j < TEVSZ; j++) { - /* Convert a tsc diff into microseconds */ - diff = (tevbuf[j] - tevbuf[j-1]) * 1000000 / tsc_freq; - sum += diff; - if (min > diff) - min = diff; - if (max < diff) - max = diff; - } - - printf("timers done: usecs, min %ld, max %ld, mean %ld\n", min, max, - sum/(TEVSZ - 1)); -} - -static void -timer_callback(int fd, enum ev_type type, void *param) -{ - static int i; - - if (i >= TEVSZ) - abort(); - - tevbuf[i++] = rdtsc(); - - if (i == TEVSZ) { - mevent_delete(tevp); - timer_print(); - } -} - - -#ifdef MEVENT_ECHO -struct esync { - pthread_mutex_t e_mt; - pthread_cond_t e_cond; -}; - -static void -echoer_callback(int fd, enum ev_type type, void *param) -{ - struct esync *sync = param; - - pthread_mutex_lock(&sync->e_mt); - pthread_cond_signal(&sync->e_cond); - pthread_mutex_unlock(&sync->e_mt); -} - -static void * -echoer(void *param) -{ - struct esync sync; - struct mevent *mev; - char buf[128]; - int fd = (int)(uintptr_t) param; - int len; - - pthread_mutex_init(&sync.e_mt, NULL); - pthread_cond_init(&sync.e_cond, NULL); - - pthread_mutex_lock(&sync.e_mt); - - mev = mevent_add(fd, EVF_READ, echoer_callback, &sync); - if (mev == NULL) { - printf("Could not allocate echoer event\n"); - exit(1); - } - - while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) { - len = read(fd, buf, sizeof(buf)); - if (len > 0) { - write(fd, buf, len); - write(0, buf, len); - } else { - break; - } - } - - mevent_delete_close(mev); - - pthread_mutex_unlock(&sync.e_mt); - pthread_mutex_destroy(&sync.e_mt); - pthread_cond_destroy(&sync.e_cond); - - return (NULL); -} - -#else - -static void * -echoer(void *param) -{ - char buf[128]; - int fd = (int)(uintptr_t) param; - int len; - - while ((len = read(fd, buf, sizeof(buf))) > 0) { - write(1, buf, len); - } - - return (NULL); -} -#endif /* MEVENT_ECHO */ - -static void -acceptor_callback(int fd, enum ev_type type, void *param) -{ - pthread_mutex_lock(&accept_mutex); - pthread_cond_signal(&accept_condvar); - pthread_mutex_unlock(&accept_mutex); -} - -static void * -acceptor(void *param) -{ - struct sockaddr_in sin; - pthread_t tid; - int news; - int s; - static int first; - - if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) { - perror("socket"); - exit(1); - } - - sin.sin_len = sizeof(sin); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = htonl(INADDR_ANY); - sin.sin_port = htons(TEST_PORT); - - if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) { - perror("bind"); - exit(1); - } - - if (listen(s, 1) < 0) { - perror("listen"); - exit(1); - } - - (void) mevent_add(s, EVF_READ, acceptor_callback, NULL); - - pthread_mutex_lock(&accept_mutex); - - while (!pthread_cond_wait(&accept_condvar, &accept_mutex)) { - news = accept(s, NULL, NULL); - if (news < 0) { - perror("accept error"); - } else { - static int first = 1; - - if (first) { - /* - * Start a timer - */ - first = 0; - tevp = mevent_add(1, EVF_TIMER, timer_callback, - NULL); - } - - printf("incoming connection, spawning thread\n"); - pthread_create(&tid, NULL, echoer, - (void *)(uintptr_t)news); - } - } - - return (NULL); -} - -main() -{ - pthread_t tid; - - pthread_create(&tid, NULL, acceptor, NULL); - - mevent_dispatch(); -} diff --git a/usr.sbin/bhyve/mptbl.c b/usr.sbin/bhyve/mptbl.c deleted file mode 100644 index 904d103..0000000 --- a/usr.sbin/bhyve/mptbl.c +++ /dev/null @@ -1,377 +0,0 @@ -/*- - * Copyright (c) 2012 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> -#include <sys/errno.h> -#include <x86/mptable.h> - -#include <stdio.h> -#include <string.h> - -#include "acpi.h" -#include "bhyverun.h" -#include "mptbl.h" -#include "pci_emul.h" - -#define MPTABLE_BASE 0xF0000 - -/* floating pointer length + maximum length of configuration table */ -#define MPTABLE_MAX_LENGTH (65536 + 16) - -#define LAPIC_PADDR 0xFEE00000 -#define LAPIC_VERSION 16 - -#define IOAPIC_PADDR 0xFEC00000 -#define IOAPIC_VERSION 0x11 - -#define MP_SPECREV 4 -#define MPFP_SIG "_MP_" - -/* Configuration header defines */ -#define MPCH_SIG "PCMP" -#define MPCH_OEMID "BHyVe " -#define MPCH_OEMID_LEN 8 -#define MPCH_PRODID "Hypervisor " -#define MPCH_PRODID_LEN 12 - -/* Processor entry defines */ -#define MPEP_SIG_FAMILY 6 /* XXX bhyve should supply this */ -#define MPEP_SIG_MODEL 26 -#define MPEP_SIG_STEPPING 5 -#define MPEP_SIG \ - ((MPEP_SIG_FAMILY << 8) | \ - (MPEP_SIG_MODEL << 4) | \ - (MPEP_SIG_STEPPING)) - -#define MPEP_FEATURES (0xBFEBFBFF) /* XXX Intel i7 */ - -/* Number of local intr entries */ -#define MPEII_NUM_LOCAL_IRQ 2 - -/* Bus entry defines */ -#define MPE_NUM_BUSES 2 -#define MPE_BUSNAME_LEN 6 -#define MPE_BUSNAME_ISA "ISA " -#define MPE_BUSNAME_PCI "PCI " - -static void *oem_tbl_start; -static int oem_tbl_size; - -static uint8_t -mpt_compute_checksum(void *base, size_t len) -{ - uint8_t *bytes; - uint8_t sum; - - for(bytes = base, sum = 0; len > 0; len--) { - sum += *bytes++; - } - - return (256 - sum); -} - -static void -mpt_build_mpfp(mpfps_t mpfp, vm_paddr_t gpa) -{ - - memset(mpfp, 0, sizeof(*mpfp)); - memcpy(mpfp->signature, MPFP_SIG, 4); - mpfp->pap = gpa + sizeof(*mpfp); - mpfp->length = 1; - mpfp->spec_rev = MP_SPECREV; - mpfp->checksum = mpt_compute_checksum(mpfp, sizeof(*mpfp)); -} - -static void -mpt_build_mpch(mpcth_t mpch) -{ - - memset(mpch, 0, sizeof(*mpch)); - memcpy(mpch->signature, MPCH_SIG, 4); - mpch->spec_rev = MP_SPECREV; - memcpy(mpch->oem_id, MPCH_OEMID, MPCH_OEMID_LEN); - memcpy(mpch->product_id, MPCH_PRODID, MPCH_PRODID_LEN); - mpch->apic_address = LAPIC_PADDR; -} - -static void -mpt_build_proc_entries(proc_entry_ptr mpep, int ncpu) -{ - int i; - - for (i = 0; i < ncpu; i++) { - memset(mpep, 0, sizeof(*mpep)); - mpep->type = MPCT_ENTRY_PROCESSOR; - mpep->apic_id = i; // XXX - mpep->apic_version = LAPIC_VERSION; - mpep->cpu_flags = PROCENTRY_FLAG_EN; - if (i == 0) - mpep->cpu_flags |= PROCENTRY_FLAG_BP; - mpep->cpu_signature = MPEP_SIG; - mpep->feature_flags = MPEP_FEATURES; - mpep++; - } -} - -static void -mpt_build_localint_entries(int_entry_ptr mpie) -{ - - /* Hardcode LINT0 as ExtINT on all CPUs. */ - memset(mpie, 0, sizeof(*mpie)); - mpie->type = MPCT_ENTRY_LOCAL_INT; - mpie->int_type = INTENTRY_TYPE_EXTINT; - mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM | - INTENTRY_FLAGS_TRIGGER_CONFORM; - mpie->dst_apic_id = 0xff; - mpie->dst_apic_int = 0; - mpie++; - - /* Hardcode LINT1 as NMI on all CPUs. */ - memset(mpie, 0, sizeof(*mpie)); - mpie->type = MPCT_ENTRY_LOCAL_INT; - mpie->int_type = INTENTRY_TYPE_NMI; - mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM | - INTENTRY_FLAGS_TRIGGER_CONFORM; - mpie->dst_apic_id = 0xff; - mpie->dst_apic_int = 1; -} - -static void -mpt_build_bus_entries(bus_entry_ptr mpeb) -{ - - memset(mpeb, 0, sizeof(*mpeb)); - mpeb->type = MPCT_ENTRY_BUS; - mpeb->bus_id = 0; - memcpy(mpeb->bus_type, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN); - mpeb++; - - memset(mpeb, 0, sizeof(*mpeb)); - mpeb->type = MPCT_ENTRY_BUS; - mpeb->bus_id = 1; - memcpy(mpeb->bus_type, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN); -} - -static void -mpt_build_ioapic_entries(io_apic_entry_ptr mpei, int id) -{ - - memset(mpei, 0, sizeof(*mpei)); - mpei->type = MPCT_ENTRY_IOAPIC; - mpei->apic_id = id; - mpei->apic_version = IOAPIC_VERSION; - mpei->apic_flags = IOAPICENTRY_FLAG_EN; - mpei->apic_address = IOAPIC_PADDR; -} - -static int -mpt_count_ioint_entries(void) -{ - int bus, count; - - count = 0; - for (bus = 0; bus <= PCI_BUSMAX; bus++) - count += pci_count_lintr(bus); - - /* - * Always include entries for the first 16 pins along with a entry - * for each active PCI INTx pin. - */ - return (16 + count); -} - -static void -mpt_generate_pci_int(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, - void *arg) -{ - int_entry_ptr *mpiep, mpie; - - mpiep = arg; - mpie = *mpiep; - memset(mpie, 0, sizeof(*mpie)); - - /* - * This is always after another I/O interrupt entry, so cheat - * and fetch the I/O APIC ID from the prior entry. - */ - mpie->type = MPCT_ENTRY_INT; - mpie->int_type = INTENTRY_TYPE_INT; - mpie->src_bus_id = bus; - mpie->src_bus_irq = slot << 2 | (pin - 1); - mpie->dst_apic_id = mpie[-1].dst_apic_id; - mpie->dst_apic_int = ioapic_irq; - - *mpiep = mpie + 1; -} - -static void -mpt_build_ioint_entries(int_entry_ptr mpie, int id) -{ - int pin, bus; - - /* - * The following config is taken from kernel mptable.c - * mptable_parse_default_config_ints(...), for now - * just use the default config, tweek later if needed. - */ - - /* First, generate the first 16 pins. */ - for (pin = 0; pin < 16; pin++) { - memset(mpie, 0, sizeof(*mpie)); - mpie->type = MPCT_ENTRY_INT; - mpie->src_bus_id = 1; - mpie->dst_apic_id = id; - - /* - * All default configs route IRQs from bus 0 to the first 16 - * pins of the first I/O APIC with an APIC ID of 2. - */ - mpie->dst_apic_int = pin; - switch (pin) { - case 0: - /* Pin 0 is an ExtINT pin. */ - mpie->int_type = INTENTRY_TYPE_EXTINT; - break; - case 2: - /* IRQ 0 is routed to pin 2. */ - mpie->int_type = INTENTRY_TYPE_INT; - mpie->src_bus_irq = 0; - break; - case SCI_INT: - /* ACPI SCI is level triggered and active-lo. */ - mpie->int_flags = INTENTRY_FLAGS_POLARITY_ACTIVELO | - INTENTRY_FLAGS_TRIGGER_LEVEL; - mpie->int_type = INTENTRY_TYPE_INT; - mpie->src_bus_irq = SCI_INT; - break; - default: - /* All other pins are identity mapped. */ - mpie->int_type = INTENTRY_TYPE_INT; - mpie->src_bus_irq = pin; - break; - } - mpie++; - } - - /* Next, generate entries for any PCI INTx interrupts. */ - for (bus = 0; bus <= PCI_BUSMAX; bus++) - pci_walk_lintr(bus, mpt_generate_pci_int, &mpie); -} - -void -mptable_add_oemtbl(void *tbl, int tblsz) -{ - - oem_tbl_start = tbl; - oem_tbl_size = tblsz; -} - -int -mptable_build(struct vmctx *ctx, int ncpu) -{ - mpcth_t mpch; - bus_entry_ptr mpeb; - io_apic_entry_ptr mpei; - proc_entry_ptr mpep; - mpfps_t mpfp; - int_entry_ptr mpie; - int ioints, bus; - char *curraddr; - char *startaddr; - - startaddr = paddr_guest2host(ctx, MPTABLE_BASE, MPTABLE_MAX_LENGTH); - if (startaddr == NULL) { - fprintf(stderr, "mptable requires mapped mem\n"); - return (ENOMEM); - } - - /* - * There is no way to advertise multiple PCI hierarchies via MPtable - * so require that there is no PCI hierarchy with a non-zero bus - * number. - */ - for (bus = 1; bus <= PCI_BUSMAX; bus++) { - if (pci_bus_configured(bus)) { - fprintf(stderr, "MPtable is incompatible with " - "multiple PCI hierarchies.\r\n"); - fprintf(stderr, "MPtable generation can be disabled " - "by passing the -Y option to bhyve(8).\r\n"); - return (EINVAL); - } - } - - curraddr = startaddr; - mpfp = (mpfps_t)curraddr; - mpt_build_mpfp(mpfp, MPTABLE_BASE); - curraddr += sizeof(*mpfp); - - mpch = (mpcth_t)curraddr; - mpt_build_mpch(mpch); - curraddr += sizeof(*mpch); - - mpep = (proc_entry_ptr)curraddr; - mpt_build_proc_entries(mpep, ncpu); - curraddr += sizeof(*mpep) * ncpu; - mpch->entry_count += ncpu; - - mpeb = (bus_entry_ptr) curraddr; - mpt_build_bus_entries(mpeb); - curraddr += sizeof(*mpeb) * MPE_NUM_BUSES; - mpch->entry_count += MPE_NUM_BUSES; - - mpei = (io_apic_entry_ptr)curraddr; - mpt_build_ioapic_entries(mpei, 0); - curraddr += sizeof(*mpei); - mpch->entry_count++; - - mpie = (int_entry_ptr) curraddr; - ioints = mpt_count_ioint_entries(); - mpt_build_ioint_entries(mpie, 0); - curraddr += sizeof(*mpie) * ioints; - mpch->entry_count += ioints; - - mpie = (int_entry_ptr)curraddr; - mpt_build_localint_entries(mpie); - curraddr += sizeof(*mpie) * MPEII_NUM_LOCAL_IRQ; - mpch->entry_count += MPEII_NUM_LOCAL_IRQ; - - if (oem_tbl_start) { - mpch->oem_table_pointer = curraddr - startaddr + MPTABLE_BASE; - mpch->oem_table_size = oem_tbl_size; - memcpy(curraddr, oem_tbl_start, oem_tbl_size); - } - - mpch->base_table_length = curraddr - (char *)mpch; - mpch->checksum = mpt_compute_checksum(mpch, mpch->base_table_length); - - return (0); -} diff --git a/usr.sbin/bhyve/mptbl.h b/usr.sbin/bhyve/mptbl.h deleted file mode 100644 index e9e1c42..0000000 --- a/usr.sbin/bhyve/mptbl.h +++ /dev/null @@ -1,35 +0,0 @@ -/*- - * Copyright (c) 2012 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _MPTBL_H_ -#define _MPTBL_H_ - -int mptable_build(struct vmctx *ctx, int ncpu); -void mptable_add_oemtbl(void *tbl, int tblsz); - -#endif /* _MPTBL_H_ */ diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c deleted file mode 100644 index 9acb0da..0000000 --- a/usr.sbin/bhyve/pci_ahci.c +++ /dev/null @@ -1,2347 +0,0 @@ -/*- - * Copyright (c) 2013 Zhixiang Yu <zcore@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/linker_set.h> -#include <sys/stat.h> -#include <sys/uio.h> -#include <sys/ioctl.h> -#include <sys/disk.h> -#include <sys/ata.h> -#include <sys/endian.h> - -#include <errno.h> -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <string.h> -#include <strings.h> -#include <unistd.h> -#include <assert.h> -#include <pthread.h> -#include <pthread_np.h> -#include <inttypes.h> -#include <md5.h> - -#include "bhyverun.h" -#include "pci_emul.h" -#include "ahci.h" -#include "block_if.h" - -#define MAX_PORTS 6 /* Intel ICH8 AHCI supports 6 ports */ - -#define PxSIG_ATA 0x00000101 /* ATA drive */ -#define PxSIG_ATAPI 0xeb140101 /* ATAPI drive */ - -enum sata_fis_type { - FIS_TYPE_REGH2D = 0x27, /* Register FIS - host to device */ - FIS_TYPE_REGD2H = 0x34, /* Register FIS - device to host */ - FIS_TYPE_DMAACT = 0x39, /* DMA activate FIS - device to host */ - FIS_TYPE_DMASETUP = 0x41, /* DMA setup FIS - bidirectional */ - FIS_TYPE_DATA = 0x46, /* Data FIS - bidirectional */ - FIS_TYPE_BIST = 0x58, /* BIST activate FIS - bidirectional */ - FIS_TYPE_PIOSETUP = 0x5F, /* PIO setup FIS - device to host */ - FIS_TYPE_SETDEVBITS = 0xA1, /* Set dev bits FIS - device to host */ -}; - -/* - * SCSI opcodes - */ -#define TEST_UNIT_READY 0x00 -#define REQUEST_SENSE 0x03 -#define INQUIRY 0x12 -#define START_STOP_UNIT 0x1B -#define PREVENT_ALLOW 0x1E -#define READ_CAPACITY 0x25 -#define READ_10 0x28 -#define POSITION_TO_ELEMENT 0x2B -#define READ_TOC 0x43 -#define GET_EVENT_STATUS_NOTIFICATION 0x4A -#define MODE_SENSE_10 0x5A -#define REPORT_LUNS 0xA0 -#define READ_12 0xA8 -#define READ_CD 0xBE - -/* - * SCSI mode page codes - */ -#define MODEPAGE_RW_ERROR_RECOVERY 0x01 -#define MODEPAGE_CD_CAPABILITIES 0x2A - -/* - * ATA commands - */ -#define ATA_SF_ENAB_SATA_SF 0x10 -#define ATA_SATA_SF_AN 0x05 -#define ATA_SF_DIS_SATA_SF 0x90 - -/* - * Debug printf - */ -#ifdef AHCI_DEBUG -static FILE *dbg; -#define DPRINTF(format, arg...) do{fprintf(dbg, format, ##arg);fflush(dbg);}while(0) -#else -#define DPRINTF(format, arg...) -#endif -#define WPRINTF(format, arg...) printf(format, ##arg) - -struct ahci_ioreq { - struct blockif_req io_req; - struct ahci_port *io_pr; - STAILQ_ENTRY(ahci_ioreq) io_flist; - TAILQ_ENTRY(ahci_ioreq) io_blist; - uint8_t *cfis; - uint32_t len; - uint32_t done; - int slot; - int more; -}; - -struct ahci_port { - struct blockif_ctxt *bctx; - struct pci_ahci_softc *pr_sc; - uint8_t *cmd_lst; - uint8_t *rfis; - char ident[20 + 1]; - int atapi; - int reset; - int waitforclear; - int mult_sectors; - uint8_t xfermode; - uint8_t err_cfis[20]; - uint8_t sense_key; - uint8_t asc; - u_int ccs; - uint32_t pending; - - uint32_t clb; - uint32_t clbu; - uint32_t fb; - uint32_t fbu; - uint32_t is; - uint32_t ie; - uint32_t cmd; - uint32_t unused0; - uint32_t tfd; - uint32_t sig; - uint32_t ssts; - uint32_t sctl; - uint32_t serr; - uint32_t sact; - uint32_t ci; - uint32_t sntf; - uint32_t fbs; - - /* - * i/o request info - */ - struct ahci_ioreq *ioreq; - int ioqsz; - STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd; - TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd; -}; - -struct ahci_cmd_hdr { - uint16_t flags; - uint16_t prdtl; - uint32_t prdbc; - uint64_t ctba; - uint32_t reserved[4]; -}; - -struct ahci_prdt_entry { - uint64_t dba; - uint32_t reserved; -#define DBCMASK 0x3fffff - uint32_t dbc; -}; - -struct pci_ahci_softc { - struct pci_devinst *asc_pi; - pthread_mutex_t mtx; - int ports; - uint32_t cap; - uint32_t ghc; - uint32_t is; - uint32_t pi; - uint32_t vs; - uint32_t ccc_ctl; - uint32_t ccc_pts; - uint32_t em_loc; - uint32_t em_ctl; - uint32_t cap2; - uint32_t bohc; - uint32_t lintr; - struct ahci_port port[MAX_PORTS]; -}; -#define ahci_ctx(sc) ((sc)->asc_pi->pi_vmctx) - -static void ahci_handle_port(struct ahci_port *p); - -static inline void lba_to_msf(uint8_t *buf, int lba) -{ - lba += 150; - buf[0] = (lba / 75) / 60; - buf[1] = (lba / 75) % 60; - buf[2] = lba % 75; -} - -/* - * generate HBA intr depending on whether or not ports within - * the controller have an interrupt pending. - */ -static void -ahci_generate_intr(struct pci_ahci_softc *sc) -{ - struct pci_devinst *pi; - int i; - - pi = sc->asc_pi; - - for (i = 0; i < sc->ports; i++) { - struct ahci_port *pr; - pr = &sc->port[i]; - if (pr->is & pr->ie) - sc->is |= (1 << i); - } - - DPRINTF("%s %x\n", __func__, sc->is); - - if (sc->is && (sc->ghc & AHCI_GHC_IE)) { - if (pci_msi_enabled(pi)) { - /* - * Generate an MSI interrupt on every edge - */ - pci_generate_msi(pi, 0); - } else if (!sc->lintr) { - /* - * Only generate a pin-based interrupt if one wasn't - * in progress - */ - sc->lintr = 1; - pci_lintr_assert(pi); - } - } else if (sc->lintr) { - /* - * No interrupts: deassert pin-based signal if it had - * been asserted - */ - pci_lintr_deassert(pi); - sc->lintr = 0; - } -} - -static void -ahci_write_fis(struct ahci_port *p, enum sata_fis_type ft, uint8_t *fis) -{ - int offset, len, irq; - - if (p->rfis == NULL || !(p->cmd & AHCI_P_CMD_FRE)) - return; - - switch (ft) { - case FIS_TYPE_REGD2H: - offset = 0x40; - len = 20; - irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_DHR : 0; - break; - case FIS_TYPE_SETDEVBITS: - offset = 0x58; - len = 8; - irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_SDB : 0; - break; - case FIS_TYPE_PIOSETUP: - offset = 0x20; - len = 20; - irq = (fis[1] & (1 << 6)) ? AHCI_P_IX_PS : 0; - break; - default: - WPRINTF("unsupported fis type %d\n", ft); - return; - } - if (fis[2] & ATA_S_ERROR) { - p->waitforclear = 1; - irq |= AHCI_P_IX_TFE; - } - memcpy(p->rfis + offset, fis, len); - if (irq) { - p->is |= irq; - ahci_generate_intr(p->pr_sc); - } -} - -static void -ahci_write_fis_piosetup(struct ahci_port *p) -{ - uint8_t fis[20]; - - memset(fis, 0, sizeof(fis)); - fis[0] = FIS_TYPE_PIOSETUP; - ahci_write_fis(p, FIS_TYPE_PIOSETUP, fis); -} - -static void -ahci_write_fis_sdb(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd) -{ - uint8_t fis[8]; - uint8_t error; - - error = (tfd >> 8) & 0xff; - tfd &= 0x77; - memset(fis, 0, sizeof(fis)); - fis[0] = FIS_TYPE_SETDEVBITS; - fis[1] = (1 << 6); - fis[2] = tfd; - fis[3] = error; - if (fis[2] & ATA_S_ERROR) { - p->err_cfis[0] = slot; - p->err_cfis[2] = tfd; - p->err_cfis[3] = error; - memcpy(&p->err_cfis[4], cfis + 4, 16); - } else { - *(uint32_t *)(fis + 4) = (1 << slot); - p->sact &= ~(1 << slot); - } - p->tfd &= ~0x77; - p->tfd |= tfd; - ahci_write_fis(p, FIS_TYPE_SETDEVBITS, fis); -} - -static void -ahci_write_fis_d2h(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t tfd) -{ - uint8_t fis[20]; - uint8_t error; - - error = (tfd >> 8) & 0xff; - memset(fis, 0, sizeof(fis)); - fis[0] = FIS_TYPE_REGD2H; - fis[1] = (1 << 6); - fis[2] = tfd & 0xff; - fis[3] = error; - fis[4] = cfis[4]; - fis[5] = cfis[5]; - fis[6] = cfis[6]; - fis[7] = cfis[7]; - fis[8] = cfis[8]; - fis[9] = cfis[9]; - fis[10] = cfis[10]; - fis[11] = cfis[11]; - fis[12] = cfis[12]; - fis[13] = cfis[13]; - if (fis[2] & ATA_S_ERROR) { - p->err_cfis[0] = 0x80; - p->err_cfis[2] = tfd & 0xff; - p->err_cfis[3] = error; - memcpy(&p->err_cfis[4], cfis + 4, 16); - } else - p->ci &= ~(1 << slot); - p->tfd = tfd; - ahci_write_fis(p, FIS_TYPE_REGD2H, fis); -} - -static void -ahci_write_fis_d2h_ncq(struct ahci_port *p, int slot) -{ - uint8_t fis[20]; - - p->tfd = ATA_S_READY | ATA_S_DSC; - memset(fis, 0, sizeof(fis)); - fis[0] = FIS_TYPE_REGD2H; - fis[1] = 0; /* No interrupt */ - fis[2] = p->tfd; /* Status */ - fis[3] = 0; /* No error */ - p->ci &= ~(1 << slot); - ahci_write_fis(p, FIS_TYPE_REGD2H, fis); -} - -static void -ahci_write_reset_fis_d2h(struct ahci_port *p) -{ - uint8_t fis[20]; - - memset(fis, 0, sizeof(fis)); - fis[0] = FIS_TYPE_REGD2H; - fis[3] = 1; - fis[4] = 1; - if (p->atapi) { - fis[5] = 0x14; - fis[6] = 0xeb; - } - fis[12] = 1; - ahci_write_fis(p, FIS_TYPE_REGD2H, fis); -} - -static void -ahci_check_stopped(struct ahci_port *p) -{ - /* - * If we are no longer processing the command list and nothing - * is in-flight, clear the running bit, the current command - * slot, the command issue and active bits. - */ - if (!(p->cmd & AHCI_P_CMD_ST)) { - if (p->pending == 0) { - p->ccs = 0; - p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK); - p->ci = 0; - p->sact = 0; - p->waitforclear = 0; - } - } -} - -static void -ahci_port_stop(struct ahci_port *p) -{ - struct ahci_ioreq *aior; - uint8_t *cfis; - int slot; - int error; - - assert(pthread_mutex_isowned_np(&p->pr_sc->mtx)); - - TAILQ_FOREACH(aior, &p->iobhd, io_blist) { - /* - * Try to cancel the outstanding blockif request. - */ - error = blockif_cancel(p->bctx, &aior->io_req); - if (error != 0) - continue; - - slot = aior->slot; - cfis = aior->cfis; - if (cfis[2] == ATA_WRITE_FPDMA_QUEUED || - cfis[2] == ATA_READ_FPDMA_QUEUED || - cfis[2] == ATA_SEND_FPDMA_QUEUED) - p->sact &= ~(1 << slot); /* NCQ */ - else - p->ci &= ~(1 << slot); - - /* - * This command is now done. - */ - p->pending &= ~(1 << slot); - - /* - * Delete the blockif request from the busy list - */ - TAILQ_REMOVE(&p->iobhd, aior, io_blist); - - /* - * Move the blockif request back to the free list - */ - STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist); - } - - ahci_check_stopped(p); -} - -static void -ahci_port_reset(struct ahci_port *pr) -{ - pr->serr = 0; - pr->sact = 0; - pr->xfermode = ATA_UDMA6; - pr->mult_sectors = 128; - - if (!pr->bctx) { - pr->ssts = ATA_SS_DET_NO_DEVICE; - pr->sig = 0xFFFFFFFF; - pr->tfd = 0x7F; - return; - } - pr->ssts = ATA_SS_DET_PHY_ONLINE | ATA_SS_IPM_ACTIVE; - if (pr->sctl & ATA_SC_SPD_MASK) - pr->ssts |= (pr->sctl & ATA_SC_SPD_MASK); - else - pr->ssts |= ATA_SS_SPD_GEN3; - pr->tfd = (1 << 8) | ATA_S_DSC | ATA_S_DMA; - if (!pr->atapi) { - pr->sig = PxSIG_ATA; - pr->tfd |= ATA_S_READY; - } else - pr->sig = PxSIG_ATAPI; - ahci_write_reset_fis_d2h(pr); -} - -static void -ahci_reset(struct pci_ahci_softc *sc) -{ - int i; - - sc->ghc = AHCI_GHC_AE; - sc->is = 0; - - if (sc->lintr) { - pci_lintr_deassert(sc->asc_pi); - sc->lintr = 0; - } - - for (i = 0; i < sc->ports; i++) { - sc->port[i].ie = 0; - sc->port[i].is = 0; - sc->port[i].cmd = (AHCI_P_CMD_SUD | AHCI_P_CMD_POD); - if (sc->port[i].bctx) - sc->port[i].cmd |= AHCI_P_CMD_CPS; - sc->port[i].sctl = 0; - ahci_port_reset(&sc->port[i]); - } -} - -static void -ata_string(uint8_t *dest, const char *src, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (*src) - dest[i ^ 1] = *src++; - else - dest[i ^ 1] = ' '; - } -} - -static void -atapi_string(uint8_t *dest, const char *src, int len) -{ - int i; - - for (i = 0; i < len; i++) { - if (*src) - dest[i] = *src++; - else - dest[i] = ' '; - } -} - -/* - * Build up the iovec based on the PRDT, 'done' and 'len'. - */ -static void -ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior, - struct ahci_prdt_entry *prdt, uint16_t prdtl) -{ - struct blockif_req *breq = &aior->io_req; - int i, j, skip, todo, left, extra; - uint32_t dbcsz; - - /* Copy part of PRDT between 'done' and 'len' bytes into the iov. */ - skip = aior->done; - left = aior->len - aior->done; - todo = 0; - for (i = 0, j = 0; i < prdtl && j < BLOCKIF_IOV_MAX && left > 0; - i++, prdt++) { - dbcsz = (prdt->dbc & DBCMASK) + 1; - /* Skip already done part of the PRDT */ - if (dbcsz <= skip) { - skip -= dbcsz; - continue; - } - dbcsz -= skip; - if (dbcsz > left) - dbcsz = left; - breq->br_iov[j].iov_base = paddr_guest2host(ahci_ctx(p->pr_sc), - prdt->dba + skip, dbcsz); - breq->br_iov[j].iov_len = dbcsz; - todo += dbcsz; - left -= dbcsz; - skip = 0; - j++; - } - - /* If we got limited by IOV length, round I/O down to sector size. */ - if (j == BLOCKIF_IOV_MAX) { - extra = todo % blockif_sectsz(p->bctx); - todo -= extra; - assert(todo > 0); - while (extra > 0) { - if (breq->br_iov[j - 1].iov_len > extra) { - breq->br_iov[j - 1].iov_len -= extra; - break; - } - extra -= breq->br_iov[j - 1].iov_len; - j--; - } - } - - breq->br_iovcnt = j; - breq->br_resid = todo; - aior->done += todo; - aior->more = (aior->done < aior->len && i < prdtl); -} - -static void -ahci_handle_rw(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done) -{ - struct ahci_ioreq *aior; - struct blockif_req *breq; - struct ahci_prdt_entry *prdt; - struct ahci_cmd_hdr *hdr; - uint64_t lba; - uint32_t len; - int err, first, ncq, readop; - - prdt = (struct ahci_prdt_entry *)(cfis + 0x80); - hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); - ncq = 0; - readop = 1; - first = (done == 0); - - if (cfis[2] == ATA_WRITE || cfis[2] == ATA_WRITE48 || - cfis[2] == ATA_WRITE_MUL || cfis[2] == ATA_WRITE_MUL48 || - cfis[2] == ATA_WRITE_DMA || cfis[2] == ATA_WRITE_DMA48 || - cfis[2] == ATA_WRITE_FPDMA_QUEUED) - readop = 0; - - if (cfis[2] == ATA_WRITE_FPDMA_QUEUED || - cfis[2] == ATA_READ_FPDMA_QUEUED) { - lba = ((uint64_t)cfis[10] << 40) | - ((uint64_t)cfis[9] << 32) | - ((uint64_t)cfis[8] << 24) | - ((uint64_t)cfis[6] << 16) | - ((uint64_t)cfis[5] << 8) | - cfis[4]; - len = cfis[11] << 8 | cfis[3]; - if (!len) - len = 65536; - ncq = 1; - } else if (cfis[2] == ATA_READ48 || cfis[2] == ATA_WRITE48 || - cfis[2] == ATA_READ_MUL48 || cfis[2] == ATA_WRITE_MUL48 || - cfis[2] == ATA_READ_DMA48 || cfis[2] == ATA_WRITE_DMA48) { - lba = ((uint64_t)cfis[10] << 40) | - ((uint64_t)cfis[9] << 32) | - ((uint64_t)cfis[8] << 24) | - ((uint64_t)cfis[6] << 16) | - ((uint64_t)cfis[5] << 8) | - cfis[4]; - len = cfis[13] << 8 | cfis[12]; - if (!len) - len = 65536; - } else { - lba = ((cfis[7] & 0xf) << 24) | (cfis[6] << 16) | - (cfis[5] << 8) | cfis[4]; - len = cfis[12]; - if (!len) - len = 256; - } - lba *= blockif_sectsz(p->bctx); - len *= blockif_sectsz(p->bctx); - - /* Pull request off free list */ - aior = STAILQ_FIRST(&p->iofhd); - assert(aior != NULL); - STAILQ_REMOVE_HEAD(&p->iofhd, io_flist); - - aior->cfis = cfis; - aior->slot = slot; - aior->len = len; - aior->done = done; - breq = &aior->io_req; - breq->br_offset = lba + done; - ahci_build_iov(p, aior, prdt, hdr->prdtl); - - /* Mark this command in-flight. */ - p->pending |= 1 << slot; - - /* Stuff request onto busy list. */ - TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); - - if (ncq && first) - ahci_write_fis_d2h_ncq(p, slot); - - if (readop) - err = blockif_read(p->bctx, breq); - else - err = blockif_write(p->bctx, breq); - assert(err == 0); -} - -static void -ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis) -{ - struct ahci_ioreq *aior; - struct blockif_req *breq; - int err; - - /* - * Pull request off free list - */ - aior = STAILQ_FIRST(&p->iofhd); - assert(aior != NULL); - STAILQ_REMOVE_HEAD(&p->iofhd, io_flist); - aior->cfis = cfis; - aior->slot = slot; - aior->len = 0; - aior->done = 0; - aior->more = 0; - breq = &aior->io_req; - - /* - * Mark this command in-flight. - */ - p->pending |= 1 << slot; - - /* - * Stuff request onto busy list - */ - TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); - - err = blockif_flush(p->bctx, breq); - assert(err == 0); -} - -static inline void -read_prdt(struct ahci_port *p, int slot, uint8_t *cfis, - void *buf, int size) -{ - struct ahci_cmd_hdr *hdr; - struct ahci_prdt_entry *prdt; - void *to; - int i, len; - - hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); - len = size; - to = buf; - prdt = (struct ahci_prdt_entry *)(cfis + 0x80); - for (i = 0; i < hdr->prdtl && len; i++) { - uint8_t *ptr; - uint32_t dbcsz; - int sublen; - - dbcsz = (prdt->dbc & DBCMASK) + 1; - ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz); - sublen = MIN(len, dbcsz); - memcpy(to, ptr, sublen); - len -= sublen; - to += sublen; - prdt++; - } -} - -static void -ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done) -{ - struct ahci_ioreq *aior; - struct blockif_req *breq; - uint8_t *entry; - uint64_t elba; - uint32_t len, elen; - int err, first, ncq; - uint8_t buf[512]; - - first = (done == 0); - if (cfis[2] == ATA_DATA_SET_MANAGEMENT) { - len = (uint16_t)cfis[13] << 8 | cfis[12]; - len *= 512; - ncq = 0; - } else { /* ATA_SEND_FPDMA_QUEUED */ - len = (uint16_t)cfis[11] << 8 | cfis[3]; - len *= 512; - ncq = 1; - } - read_prdt(p, slot, cfis, buf, sizeof(buf)); - -next: - entry = &buf[done]; - elba = ((uint64_t)entry[5] << 40) | - ((uint64_t)entry[4] << 32) | - ((uint64_t)entry[3] << 24) | - ((uint64_t)entry[2] << 16) | - ((uint64_t)entry[1] << 8) | - entry[0]; - elen = (uint16_t)entry[7] << 8 | entry[6]; - done += 8; - if (elen == 0) { - if (done >= len) { - ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); - p->pending &= ~(1 << slot); - ahci_check_stopped(p); - if (!first) - ahci_handle_port(p); - return; - } - goto next; - } - - /* - * Pull request off free list - */ - aior = STAILQ_FIRST(&p->iofhd); - assert(aior != NULL); - STAILQ_REMOVE_HEAD(&p->iofhd, io_flist); - aior->cfis = cfis; - aior->slot = slot; - aior->len = len; - aior->done = done; - aior->more = (len != done); - - breq = &aior->io_req; - breq->br_offset = elba * blockif_sectsz(p->bctx); - breq->br_resid = elen * blockif_sectsz(p->bctx); - - /* - * Mark this command in-flight. - */ - p->pending |= 1 << slot; - - /* - * Stuff request onto busy list - */ - TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); - - if (ncq && first) - ahci_write_fis_d2h_ncq(p, slot); - - err = blockif_delete(p->bctx, breq); - assert(err == 0); -} - -static inline void -write_prdt(struct ahci_port *p, int slot, uint8_t *cfis, - void *buf, int size) -{ - struct ahci_cmd_hdr *hdr; - struct ahci_prdt_entry *prdt; - void *from; - int i, len; - - hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); - len = size; - from = buf; - prdt = (struct ahci_prdt_entry *)(cfis + 0x80); - for (i = 0; i < hdr->prdtl && len; i++) { - uint8_t *ptr; - uint32_t dbcsz; - int sublen; - - dbcsz = (prdt->dbc & DBCMASK) + 1; - ptr = paddr_guest2host(ahci_ctx(p->pr_sc), prdt->dba, dbcsz); - sublen = MIN(len, dbcsz); - memcpy(ptr, from, sublen); - len -= sublen; - from += sublen; - prdt++; - } - hdr->prdbc = size - len; -} - -static void -ahci_checksum(uint8_t *buf, int size) -{ - int i; - uint8_t sum = 0; - - for (i = 0; i < size - 1; i++) - sum += buf[i]; - buf[size - 1] = 0x100 - sum; -} - -static void -ahci_handle_read_log(struct ahci_port *p, int slot, uint8_t *cfis) -{ - struct ahci_cmd_hdr *hdr; - uint8_t buf[512]; - - hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); - if (p->atapi || hdr->prdtl == 0 || cfis[4] != 0x10 || - cfis[5] != 0 || cfis[9] != 0 || cfis[12] != 1 || cfis[13] != 0) { - ahci_write_fis_d2h(p, slot, cfis, - (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); - return; - } - - memset(buf, 0, sizeof(buf)); - memcpy(buf, p->err_cfis, sizeof(p->err_cfis)); - ahci_checksum(buf, sizeof(buf)); - - if (cfis[2] == ATA_READ_LOG_EXT) - ahci_write_fis_piosetup(p); - write_prdt(p, slot, cfis, (void *)buf, sizeof(buf)); - ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY); -} - -static void -handle_identify(struct ahci_port *p, int slot, uint8_t *cfis) -{ - struct ahci_cmd_hdr *hdr; - - hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); - if (p->atapi || hdr->prdtl == 0) { - ahci_write_fis_d2h(p, slot, cfis, - (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); - } else { - uint16_t buf[256]; - uint64_t sectors; - int sectsz, psectsz, psectoff, candelete, ro; - uint16_t cyl; - uint8_t sech, heads; - - ro = blockif_is_ro(p->bctx); - candelete = blockif_candelete(p->bctx); - sectsz = blockif_sectsz(p->bctx); - sectors = blockif_size(p->bctx) / sectsz; - blockif_chs(p->bctx, &cyl, &heads, &sech); - blockif_psectsz(p->bctx, &psectsz, &psectoff); - memset(buf, 0, sizeof(buf)); - buf[0] = 0x0040; - buf[1] = cyl; - buf[3] = heads; - buf[6] = sech; - ata_string((uint8_t *)(buf+10), p->ident, 20); - ata_string((uint8_t *)(buf+23), "001", 8); - ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40); - buf[47] = (0x8000 | 128); - buf[48] = 0; - buf[49] = (1 << 8 | 1 << 9 | 1 << 11); - buf[50] = (1 << 14); - buf[53] = (1 << 1 | 1 << 2); - if (p->mult_sectors) - buf[59] = (0x100 | p->mult_sectors); - if (sectors <= 0x0fffffff) { - buf[60] = sectors; - buf[61] = (sectors >> 16); - } else { - buf[60] = 0xffff; - buf[61] = 0x0fff; - } - buf[63] = 0x7; - if (p->xfermode & ATA_WDMA0) - buf[63] |= (1 << ((p->xfermode & 7) + 8)); - buf[64] = 0x3; - buf[65] = 120; - buf[66] = 120; - buf[67] = 120; - buf[68] = 120; - buf[69] = 0; - buf[75] = 31; - buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3 | - ATA_SUPPORT_NCQ); - buf[77] = (ATA_SUPPORT_RCVSND_FPDMA_QUEUED | - (p->ssts & ATA_SS_SPD_MASK) >> 3); - buf[80] = 0x3f0; - buf[81] = 0x28; - buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE| - ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP); - buf[83] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE | - ATA_SUPPORT_FLUSHCACHE48 | 1 << 14); - buf[84] = (1 << 14); - buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_WRITECACHE| - ATA_SUPPORT_LOOKAHEAD | ATA_SUPPORT_NOP); - buf[86] = (ATA_SUPPORT_ADDRESS48 | ATA_SUPPORT_FLUSHCACHE | - ATA_SUPPORT_FLUSHCACHE48 | 1 << 15); - buf[87] = (1 << 14); - buf[88] = 0x7f; - if (p->xfermode & ATA_UDMA0) - buf[88] |= (1 << ((p->xfermode & 7) + 8)); - buf[100] = sectors; - buf[101] = (sectors >> 16); - buf[102] = (sectors >> 32); - buf[103] = (sectors >> 48); - if (candelete && !ro) { - buf[69] |= ATA_SUPPORT_RZAT | ATA_SUPPORT_DRAT; - buf[105] = 1; - buf[169] = ATA_SUPPORT_DSM_TRIM; - } - buf[106] = 0x4000; - buf[209] = 0x4000; - if (psectsz > sectsz) { - buf[106] |= 0x2000; - buf[106] |= ffsl(psectsz / sectsz) - 1; - buf[209] |= (psectoff / sectsz); - } - if (sectsz > 512) { - buf[106] |= 0x1000; - buf[117] = sectsz / 2; - buf[118] = ((sectsz / 2) >> 16); - } - buf[119] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14); - buf[120] = (ATA_SUPPORT_RWLOGDMAEXT | 1 << 14); - buf[222] = 0x1020; - buf[255] = 0x00a5; - ahci_checksum((uint8_t *)buf, sizeof(buf)); - ahci_write_fis_piosetup(p); - write_prdt(p, slot, cfis, (void *)buf, sizeof(buf)); - ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY); - } -} - -static void -handle_atapi_identify(struct ahci_port *p, int slot, uint8_t *cfis) -{ - if (!p->atapi) { - ahci_write_fis_d2h(p, slot, cfis, - (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); - } else { - uint16_t buf[256]; - - memset(buf, 0, sizeof(buf)); - buf[0] = (2 << 14 | 5 << 8 | 1 << 7 | 2 << 5); - ata_string((uint8_t *)(buf+10), p->ident, 20); - ata_string((uint8_t *)(buf+23), "001", 8); - ata_string((uint8_t *)(buf+27), "BHYVE SATA DVD ROM", 40); - buf[49] = (1 << 9 | 1 << 8); - buf[50] = (1 << 14 | 1); - buf[53] = (1 << 2 | 1 << 1); - buf[62] = 0x3f; - buf[63] = 7; - if (p->xfermode & ATA_WDMA0) - buf[63] |= (1 << ((p->xfermode & 7) + 8)); - buf[64] = 3; - buf[65] = 120; - buf[66] = 120; - buf[67] = 120; - buf[68] = 120; - buf[76] = (ATA_SATA_GEN1 | ATA_SATA_GEN2 | ATA_SATA_GEN3); - buf[77] = ((p->ssts & ATA_SS_SPD_MASK) >> 3); - buf[78] = (1 << 5); - buf[80] = 0x3f0; - buf[82] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET | - ATA_SUPPORT_RESET | ATA_SUPPORT_NOP); - buf[83] = (1 << 14); - buf[84] = (1 << 14); - buf[85] = (ATA_SUPPORT_POWERMGT | ATA_SUPPORT_PACKET | - ATA_SUPPORT_RESET | ATA_SUPPORT_NOP); - buf[87] = (1 << 14); - buf[88] = 0x7f; - if (p->xfermode & ATA_UDMA0) - buf[88] |= (1 << ((p->xfermode & 7) + 8)); - buf[222] = 0x1020; - buf[255] = 0x00a5; - ahci_checksum((uint8_t *)buf, sizeof(buf)); - ahci_write_fis_piosetup(p); - write_prdt(p, slot, cfis, (void *)buf, sizeof(buf)); - ahci_write_fis_d2h(p, slot, cfis, ATA_S_DSC | ATA_S_READY); - } -} - -static void -atapi_inquiry(struct ahci_port *p, int slot, uint8_t *cfis) -{ - uint8_t buf[36]; - uint8_t *acmd; - int len; - uint32_t tfd; - - acmd = cfis + 0x40; - - if (acmd[1] & 1) { /* VPD */ - if (acmd[2] == 0) { /* Supported VPD pages */ - buf[0] = 0x05; - buf[1] = 0; - buf[2] = 0; - buf[3] = 1; - buf[4] = 0; - len = 4 + buf[3]; - } else { - p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; - p->asc = 0x24; - tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - ahci_write_fis_d2h(p, slot, cfis, tfd); - return; - } - } else { - buf[0] = 0x05; - buf[1] = 0x80; - buf[2] = 0x00; - buf[3] = 0x21; - buf[4] = 31; - buf[5] = 0; - buf[6] = 0; - buf[7] = 0; - atapi_string(buf + 8, "BHYVE", 8); - atapi_string(buf + 16, "BHYVE DVD-ROM", 16); - atapi_string(buf + 32, "001", 4); - len = sizeof(buf); - } - - if (len > acmd[4]) - len = acmd[4]; - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - write_prdt(p, slot, cfis, buf, len); - ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); -} - -static void -atapi_read_capacity(struct ahci_port *p, int slot, uint8_t *cfis) -{ - uint8_t buf[8]; - uint64_t sectors; - - sectors = blockif_size(p->bctx) / 2048; - be32enc(buf, sectors - 1); - be32enc(buf + 4, 2048); - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - write_prdt(p, slot, cfis, buf, sizeof(buf)); - ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); -} - -static void -atapi_read_toc(struct ahci_port *p, int slot, uint8_t *cfis) -{ - uint8_t *acmd; - uint8_t format; - int len; - - acmd = cfis + 0x40; - - len = be16dec(acmd + 7); - format = acmd[9] >> 6; - switch (format) { - case 0: - { - int msf, size; - uint64_t sectors; - uint8_t start_track, buf[20], *bp; - - msf = (acmd[1] >> 1) & 1; - start_track = acmd[6]; - if (start_track > 1 && start_track != 0xaa) { - uint32_t tfd; - p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; - p->asc = 0x24; - tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - ahci_write_fis_d2h(p, slot, cfis, tfd); - return; - } - bp = buf + 2; - *bp++ = 1; - *bp++ = 1; - if (start_track <= 1) { - *bp++ = 0; - *bp++ = 0x14; - *bp++ = 1; - *bp++ = 0; - if (msf) { - *bp++ = 0; - lba_to_msf(bp, 0); - bp += 3; - } else { - *bp++ = 0; - *bp++ = 0; - *bp++ = 0; - *bp++ = 0; - } - } - *bp++ = 0; - *bp++ = 0x14; - *bp++ = 0xaa; - *bp++ = 0; - sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx); - sectors >>= 2; - if (msf) { - *bp++ = 0; - lba_to_msf(bp, sectors); - bp += 3; - } else { - be32enc(bp, sectors); - bp += 4; - } - size = bp - buf; - be16enc(buf, size - 2); - if (len > size) - len = size; - write_prdt(p, slot, cfis, buf, len); - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); - break; - } - case 1: - { - uint8_t buf[12]; - - memset(buf, 0, sizeof(buf)); - buf[1] = 0xa; - buf[2] = 0x1; - buf[3] = 0x1; - if (len > sizeof(buf)) - len = sizeof(buf); - write_prdt(p, slot, cfis, buf, len); - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); - break; - } - case 2: - { - int msf, size; - uint64_t sectors; - uint8_t *bp, buf[50]; - - msf = (acmd[1] >> 1) & 1; - bp = buf + 2; - *bp++ = 1; - *bp++ = 1; - - *bp++ = 1; - *bp++ = 0x14; - *bp++ = 0; - *bp++ = 0xa0; - *bp++ = 0; - *bp++ = 0; - *bp++ = 0; - *bp++ = 0; - *bp++ = 1; - *bp++ = 0; - *bp++ = 0; - - *bp++ = 1; - *bp++ = 0x14; - *bp++ = 0; - *bp++ = 0xa1; - *bp++ = 0; - *bp++ = 0; - *bp++ = 0; - *bp++ = 0; - *bp++ = 1; - *bp++ = 0; - *bp++ = 0; - - *bp++ = 1; - *bp++ = 0x14; - *bp++ = 0; - *bp++ = 0xa2; - *bp++ = 0; - *bp++ = 0; - *bp++ = 0; - sectors = blockif_size(p->bctx) / blockif_sectsz(p->bctx); - sectors >>= 2; - if (msf) { - *bp++ = 0; - lba_to_msf(bp, sectors); - bp += 3; - } else { - be32enc(bp, sectors); - bp += 4; - } - - *bp++ = 1; - *bp++ = 0x14; - *bp++ = 0; - *bp++ = 1; - *bp++ = 0; - *bp++ = 0; - *bp++ = 0; - if (msf) { - *bp++ = 0; - lba_to_msf(bp, 0); - bp += 3; - } else { - *bp++ = 0; - *bp++ = 0; - *bp++ = 0; - *bp++ = 0; - } - - size = bp - buf; - be16enc(buf, size - 2); - if (len > size) - len = size; - write_prdt(p, slot, cfis, buf, len); - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); - break; - } - default: - { - uint32_t tfd; - - p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; - p->asc = 0x24; - tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - ahci_write_fis_d2h(p, slot, cfis, tfd); - break; - } - } -} - -static void -atapi_report_luns(struct ahci_port *p, int slot, uint8_t *cfis) -{ - uint8_t buf[16]; - - memset(buf, 0, sizeof(buf)); - buf[3] = 8; - - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - write_prdt(p, slot, cfis, buf, sizeof(buf)); - ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); -} - -static void -atapi_read(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done) -{ - struct ahci_ioreq *aior; - struct ahci_cmd_hdr *hdr; - struct ahci_prdt_entry *prdt; - struct blockif_req *breq; - uint8_t *acmd; - uint64_t lba; - uint32_t len; - int err; - - acmd = cfis + 0x40; - hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); - prdt = (struct ahci_prdt_entry *)(cfis + 0x80); - - lba = be32dec(acmd + 2); - if (acmd[0] == READ_10) - len = be16dec(acmd + 7); - else - len = be32dec(acmd + 6); - if (len == 0) { - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); - } - lba *= 2048; - len *= 2048; - - /* - * Pull request off free list - */ - aior = STAILQ_FIRST(&p->iofhd); - assert(aior != NULL); - STAILQ_REMOVE_HEAD(&p->iofhd, io_flist); - aior->cfis = cfis; - aior->slot = slot; - aior->len = len; - aior->done = done; - breq = &aior->io_req; - breq->br_offset = lba + done; - ahci_build_iov(p, aior, prdt, hdr->prdtl); - - /* Mark this command in-flight. */ - p->pending |= 1 << slot; - - /* Stuff request onto busy list. */ - TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist); - - err = blockif_read(p->bctx, breq); - assert(err == 0); -} - -static void -atapi_request_sense(struct ahci_port *p, int slot, uint8_t *cfis) -{ - uint8_t buf[64]; - uint8_t *acmd; - int len; - - acmd = cfis + 0x40; - len = acmd[4]; - if (len > sizeof(buf)) - len = sizeof(buf); - memset(buf, 0, len); - buf[0] = 0x70 | (1 << 7); - buf[2] = p->sense_key; - buf[7] = 10; - buf[12] = p->asc; - write_prdt(p, slot, cfis, buf, len); - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); -} - -static void -atapi_start_stop_unit(struct ahci_port *p, int slot, uint8_t *cfis) -{ - uint8_t *acmd = cfis + 0x40; - uint32_t tfd; - - switch (acmd[4] & 3) { - case 0: - case 1: - case 3: - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - tfd = ATA_S_READY | ATA_S_DSC; - break; - case 2: - /* TODO eject media */ - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; - p->asc = 0x53; - tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; - break; - } - ahci_write_fis_d2h(p, slot, cfis, tfd); -} - -static void -atapi_mode_sense(struct ahci_port *p, int slot, uint8_t *cfis) -{ - uint8_t *acmd; - uint32_t tfd; - uint8_t pc, code; - int len; - - acmd = cfis + 0x40; - len = be16dec(acmd + 7); - pc = acmd[2] >> 6; - code = acmd[2] & 0x3f; - - switch (pc) { - case 0: - switch (code) { - case MODEPAGE_RW_ERROR_RECOVERY: - { - uint8_t buf[16]; - - if (len > sizeof(buf)) - len = sizeof(buf); - - memset(buf, 0, sizeof(buf)); - be16enc(buf, 16 - 2); - buf[2] = 0x70; - buf[8] = 0x01; - buf[9] = 16 - 10; - buf[11] = 0x05; - write_prdt(p, slot, cfis, buf, len); - tfd = ATA_S_READY | ATA_S_DSC; - break; - } - case MODEPAGE_CD_CAPABILITIES: - { - uint8_t buf[30]; - - if (len > sizeof(buf)) - len = sizeof(buf); - - memset(buf, 0, sizeof(buf)); - be16enc(buf, 30 - 2); - buf[2] = 0x70; - buf[8] = 0x2A; - buf[9] = 30 - 10; - buf[10] = 0x08; - buf[12] = 0x71; - be16enc(&buf[18], 2); - be16enc(&buf[20], 512); - write_prdt(p, slot, cfis, buf, len); - tfd = ATA_S_READY | ATA_S_DSC; - break; - } - default: - goto error; - break; - } - break; - case 3: - p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; - p->asc = 0x39; - tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; - break; -error: - case 1: - case 2: - p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; - p->asc = 0x24; - tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; - break; - } - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - ahci_write_fis_d2h(p, slot, cfis, tfd); -} - -static void -atapi_get_event_status_notification(struct ahci_port *p, int slot, - uint8_t *cfis) -{ - uint8_t *acmd; - uint32_t tfd; - - acmd = cfis + 0x40; - - /* we don't support asynchronous operation */ - if (!(acmd[1] & 1)) { - p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; - p->asc = 0x24; - tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; - } else { - uint8_t buf[8]; - int len; - - len = be16dec(acmd + 7); - if (len > sizeof(buf)) - len = sizeof(buf); - - memset(buf, 0, sizeof(buf)); - be16enc(buf, 8 - 2); - buf[2] = 0x04; - buf[3] = 0x10; - buf[5] = 0x02; - write_prdt(p, slot, cfis, buf, len); - tfd = ATA_S_READY | ATA_S_DSC; - } - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - ahci_write_fis_d2h(p, slot, cfis, tfd); -} - -static void -handle_packet_cmd(struct ahci_port *p, int slot, uint8_t *cfis) -{ - uint8_t *acmd; - - acmd = cfis + 0x40; - -#ifdef AHCI_DEBUG - { - int i; - DPRINTF("ACMD:"); - for (i = 0; i < 16; i++) - DPRINTF("%02x ", acmd[i]); - DPRINTF("\n"); - } -#endif - - switch (acmd[0]) { - case TEST_UNIT_READY: - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); - break; - case INQUIRY: - atapi_inquiry(p, slot, cfis); - break; - case READ_CAPACITY: - atapi_read_capacity(p, slot, cfis); - break; - case PREVENT_ALLOW: - /* TODO */ - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); - break; - case READ_TOC: - atapi_read_toc(p, slot, cfis); - break; - case REPORT_LUNS: - atapi_report_luns(p, slot, cfis); - break; - case READ_10: - case READ_12: - atapi_read(p, slot, cfis, 0); - break; - case REQUEST_SENSE: - atapi_request_sense(p, slot, cfis); - break; - case START_STOP_UNIT: - atapi_start_stop_unit(p, slot, cfis); - break; - case MODE_SENSE_10: - atapi_mode_sense(p, slot, cfis); - break; - case GET_EVENT_STATUS_NOTIFICATION: - atapi_get_event_status_notification(p, slot, cfis); - break; - default: - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; - p->asc = 0x20; - ahci_write_fis_d2h(p, slot, cfis, (p->sense_key << 12) | - ATA_S_READY | ATA_S_ERROR); - break; - } -} - -static void -ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis) -{ - - p->tfd |= ATA_S_BUSY; - switch (cfis[2]) { - case ATA_ATA_IDENTIFY: - handle_identify(p, slot, cfis); - break; - case ATA_SETFEATURES: - { - switch (cfis[3]) { - case ATA_SF_ENAB_SATA_SF: - switch (cfis[12]) { - case ATA_SATA_SF_AN: - p->tfd = ATA_S_DSC | ATA_S_READY; - break; - default: - p->tfd = ATA_S_ERROR | ATA_S_READY; - p->tfd |= (ATA_ERROR_ABORT << 8); - break; - } - break; - case ATA_SF_ENAB_WCACHE: - case ATA_SF_DIS_WCACHE: - case ATA_SF_ENAB_RCACHE: - case ATA_SF_DIS_RCACHE: - p->tfd = ATA_S_DSC | ATA_S_READY; - break; - case ATA_SF_SETXFER: - { - switch (cfis[12] & 0xf8) { - case ATA_PIO: - case ATA_PIO0: - break; - case ATA_WDMA0: - case ATA_UDMA0: - p->xfermode = (cfis[12] & 0x7); - break; - } - p->tfd = ATA_S_DSC | ATA_S_READY; - break; - } - default: - p->tfd = ATA_S_ERROR | ATA_S_READY; - p->tfd |= (ATA_ERROR_ABORT << 8); - break; - } - ahci_write_fis_d2h(p, slot, cfis, p->tfd); - break; - } - case ATA_SET_MULTI: - if (cfis[12] != 0 && - (cfis[12] > 128 || (cfis[12] & (cfis[12] - 1)))) { - p->tfd = ATA_S_ERROR | ATA_S_READY; - p->tfd |= (ATA_ERROR_ABORT << 8); - } else { - p->mult_sectors = cfis[12]; - p->tfd = ATA_S_DSC | ATA_S_READY; - } - ahci_write_fis_d2h(p, slot, cfis, p->tfd); - break; - case ATA_READ: - case ATA_WRITE: - case ATA_READ48: - case ATA_WRITE48: - case ATA_READ_MUL: - case ATA_WRITE_MUL: - case ATA_READ_MUL48: - case ATA_WRITE_MUL48: - case ATA_READ_DMA: - case ATA_WRITE_DMA: - case ATA_READ_DMA48: - case ATA_WRITE_DMA48: - case ATA_READ_FPDMA_QUEUED: - case ATA_WRITE_FPDMA_QUEUED: - ahci_handle_rw(p, slot, cfis, 0); - break; - case ATA_FLUSHCACHE: - case ATA_FLUSHCACHE48: - ahci_handle_flush(p, slot, cfis); - break; - case ATA_DATA_SET_MANAGEMENT: - if (cfis[11] == 0 && cfis[3] == ATA_DSM_TRIM && - cfis[13] == 0 && cfis[12] == 1) { - ahci_handle_dsm_trim(p, slot, cfis, 0); - break; - } - ahci_write_fis_d2h(p, slot, cfis, - (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); - break; - case ATA_SEND_FPDMA_QUEUED: - if ((cfis[13] & 0x1f) == ATA_SFPDMA_DSM && - cfis[17] == 0 && cfis[16] == ATA_DSM_TRIM && - cfis[11] == 0 && cfis[13] == 1) { - ahci_handle_dsm_trim(p, slot, cfis, 0); - break; - } - ahci_write_fis_d2h(p, slot, cfis, - (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); - break; - case ATA_READ_LOG_EXT: - case ATA_READ_LOG_DMA_EXT: - ahci_handle_read_log(p, slot, cfis); - break; - case ATA_SECURITY_FREEZE_LOCK: - case ATA_SMART_CMD: - case ATA_NOP: - ahci_write_fis_d2h(p, slot, cfis, - (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); - break; - case ATA_CHECK_POWER_MODE: - cfis[12] = 0xff; /* always on */ - ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); - break; - case ATA_STANDBY_CMD: - case ATA_STANDBY_IMMEDIATE: - case ATA_IDLE_CMD: - case ATA_IDLE_IMMEDIATE: - case ATA_SLEEP: - case ATA_READ_VERIFY: - case ATA_READ_VERIFY48: - ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC); - break; - case ATA_ATAPI_IDENTIFY: - handle_atapi_identify(p, slot, cfis); - break; - case ATA_PACKET_CMD: - if (!p->atapi) { - ahci_write_fis_d2h(p, slot, cfis, - (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); - } else - handle_packet_cmd(p, slot, cfis); - break; - default: - WPRINTF("Unsupported cmd:%02x\n", cfis[2]); - ahci_write_fis_d2h(p, slot, cfis, - (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR); - break; - } -} - -static void -ahci_handle_slot(struct ahci_port *p, int slot) -{ - struct ahci_cmd_hdr *hdr; - struct ahci_prdt_entry *prdt; - struct pci_ahci_softc *sc; - uint8_t *cfis; - int cfl; - - sc = p->pr_sc; - hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); - cfl = (hdr->flags & 0x1f) * 4; - cfis = paddr_guest2host(ahci_ctx(sc), hdr->ctba, - 0x80 + hdr->prdtl * sizeof(struct ahci_prdt_entry)); - prdt = (struct ahci_prdt_entry *)(cfis + 0x80); - -#ifdef AHCI_DEBUG - DPRINTF("\ncfis:"); - for (i = 0; i < cfl; i++) { - if (i % 10 == 0) - DPRINTF("\n"); - DPRINTF("%02x ", cfis[i]); - } - DPRINTF("\n"); - - for (i = 0; i < hdr->prdtl; i++) { - DPRINTF("%d@%08"PRIx64"\n", prdt->dbc & 0x3fffff, prdt->dba); - prdt++; - } -#endif - - if (cfis[0] != FIS_TYPE_REGH2D) { - WPRINTF("Not a H2D FIS:%02x\n", cfis[0]); - return; - } - - if (cfis[1] & 0x80) { - ahci_handle_cmd(p, slot, cfis); - } else { - if (cfis[15] & (1 << 2)) - p->reset = 1; - else if (p->reset) { - p->reset = 0; - ahci_port_reset(p); - } - p->ci &= ~(1 << slot); - } -} - -static void -ahci_handle_port(struct ahci_port *p) -{ - - if (!(p->cmd & AHCI_P_CMD_ST)) - return; - - /* - * Search for any new commands to issue ignoring those that - * are already in-flight. Stop if device is busy or in error. - */ - for (; (p->ci & ~p->pending) != 0; p->ccs = ((p->ccs + 1) & 31)) { - if ((p->tfd & (ATA_S_BUSY | ATA_S_DRQ)) != 0) - break; - if (p->waitforclear) - break; - if ((p->ci & ~p->pending & (1 << p->ccs)) != 0) { - p->cmd &= ~AHCI_P_CMD_CCS_MASK; - p->cmd |= p->ccs << AHCI_P_CMD_CCS_SHIFT; - ahci_handle_slot(p, p->ccs); - } - } -} - -/* - * blockif callback routine - this runs in the context of the blockif - * i/o thread, so the mutex needs to be acquired. - */ -static void -ata_ioreq_cb(struct blockif_req *br, int err) -{ - struct ahci_cmd_hdr *hdr; - struct ahci_ioreq *aior; - struct ahci_port *p; - struct pci_ahci_softc *sc; - uint32_t tfd; - uint8_t *cfis; - int slot, ncq, dsm; - - DPRINTF("%s %d\n", __func__, err); - - ncq = dsm = 0; - aior = br->br_param; - p = aior->io_pr; - cfis = aior->cfis; - slot = aior->slot; - sc = p->pr_sc; - hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + slot * AHCI_CL_SIZE); - - if (cfis[2] == ATA_WRITE_FPDMA_QUEUED || - cfis[2] == ATA_READ_FPDMA_QUEUED || - cfis[2] == ATA_SEND_FPDMA_QUEUED) - ncq = 1; - if (cfis[2] == ATA_DATA_SET_MANAGEMENT || - (cfis[2] == ATA_SEND_FPDMA_QUEUED && - (cfis[13] & 0x1f) == ATA_SFPDMA_DSM)) - dsm = 1; - - pthread_mutex_lock(&sc->mtx); - - /* - * Delete the blockif request from the busy list - */ - TAILQ_REMOVE(&p->iobhd, aior, io_blist); - - /* - * Move the blockif request back to the free list - */ - STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist); - - if (!err) - hdr->prdbc = aior->done; - - if (!err && aior->more) { - if (dsm) - ahci_handle_dsm_trim(p, slot, cfis, aior->done); - else - ahci_handle_rw(p, slot, cfis, aior->done); - goto out; - } - - if (!err) - tfd = ATA_S_READY | ATA_S_DSC; - else - tfd = (ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR; - if (ncq) - ahci_write_fis_sdb(p, slot, cfis, tfd); - else - ahci_write_fis_d2h(p, slot, cfis, tfd); - - /* - * This command is now complete. - */ - p->pending &= ~(1 << slot); - - ahci_check_stopped(p); - ahci_handle_port(p); -out: - pthread_mutex_unlock(&sc->mtx); - DPRINTF("%s exit\n", __func__); -} - -static void -atapi_ioreq_cb(struct blockif_req *br, int err) -{ - struct ahci_cmd_hdr *hdr; - struct ahci_ioreq *aior; - struct ahci_port *p; - struct pci_ahci_softc *sc; - uint8_t *cfis; - uint32_t tfd; - int slot; - - DPRINTF("%s %d\n", __func__, err); - - aior = br->br_param; - p = aior->io_pr; - cfis = aior->cfis; - slot = aior->slot; - sc = p->pr_sc; - hdr = (struct ahci_cmd_hdr *)(p->cmd_lst + aior->slot * AHCI_CL_SIZE); - - pthread_mutex_lock(&sc->mtx); - - /* - * Delete the blockif request from the busy list - */ - TAILQ_REMOVE(&p->iobhd, aior, io_blist); - - /* - * Move the blockif request back to the free list - */ - STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist); - - if (!err) - hdr->prdbc = aior->done; - - if (!err && aior->more) { - atapi_read(p, slot, cfis, aior->done); - goto out; - } - - if (!err) { - tfd = ATA_S_READY | ATA_S_DSC; - } else { - p->sense_key = ATA_SENSE_ILLEGAL_REQUEST; - p->asc = 0x21; - tfd = (p->sense_key << 12) | ATA_S_READY | ATA_S_ERROR; - } - cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN; - ahci_write_fis_d2h(p, slot, cfis, tfd); - - /* - * This command is now complete. - */ - p->pending &= ~(1 << slot); - - ahci_check_stopped(p); - ahci_handle_port(p); -out: - pthread_mutex_unlock(&sc->mtx); - DPRINTF("%s exit\n", __func__); -} - -static void -pci_ahci_ioreq_init(struct ahci_port *pr) -{ - struct ahci_ioreq *vr; - int i; - - pr->ioqsz = blockif_queuesz(pr->bctx); - pr->ioreq = calloc(pr->ioqsz, sizeof(struct ahci_ioreq)); - STAILQ_INIT(&pr->iofhd); - - /* - * Add all i/o request entries to the free queue - */ - for (i = 0; i < pr->ioqsz; i++) { - vr = &pr->ioreq[i]; - vr->io_pr = pr; - if (!pr->atapi) - vr->io_req.br_callback = ata_ioreq_cb; - else - vr->io_req.br_callback = atapi_ioreq_cb; - vr->io_req.br_param = vr; - STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist); - } - - TAILQ_INIT(&pr->iobhd); -} - -static void -pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value) -{ - int port = (offset - AHCI_OFFSET) / AHCI_STEP; - offset = (offset - AHCI_OFFSET) % AHCI_STEP; - struct ahci_port *p = &sc->port[port]; - - DPRINTF("pci_ahci_port %d: write offset 0x%"PRIx64" value 0x%"PRIx64"\n", - port, offset, value); - - switch (offset) { - case AHCI_P_CLB: - p->clb = value; - break; - case AHCI_P_CLBU: - p->clbu = value; - break; - case AHCI_P_FB: - p->fb = value; - break; - case AHCI_P_FBU: - p->fbu = value; - break; - case AHCI_P_IS: - p->is &= ~value; - break; - case AHCI_P_IE: - p->ie = value & 0xFDC000FF; - ahci_generate_intr(sc); - break; - case AHCI_P_CMD: - { - p->cmd &= ~(AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD | - AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE | - AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE | - AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK); - p->cmd |= (AHCI_P_CMD_ST | AHCI_P_CMD_SUD | AHCI_P_CMD_POD | - AHCI_P_CMD_CLO | AHCI_P_CMD_FRE | AHCI_P_CMD_APSTE | - AHCI_P_CMD_ATAPI | AHCI_P_CMD_DLAE | AHCI_P_CMD_ALPE | - AHCI_P_CMD_ASP | AHCI_P_CMD_ICC_MASK) & value; - - if (!(value & AHCI_P_CMD_ST)) { - ahci_port_stop(p); - } else { - uint64_t clb; - - p->cmd |= AHCI_P_CMD_CR; - clb = (uint64_t)p->clbu << 32 | p->clb; - p->cmd_lst = paddr_guest2host(ahci_ctx(sc), clb, - AHCI_CL_SIZE * AHCI_MAX_SLOTS); - } - - if (value & AHCI_P_CMD_FRE) { - uint64_t fb; - - p->cmd |= AHCI_P_CMD_FR; - fb = (uint64_t)p->fbu << 32 | p->fb; - /* we don't support FBSCP, so rfis size is 256Bytes */ - p->rfis = paddr_guest2host(ahci_ctx(sc), fb, 256); - } else { - p->cmd &= ~AHCI_P_CMD_FR; - } - - if (value & AHCI_P_CMD_CLO) { - p->tfd &= ~(ATA_S_BUSY | ATA_S_DRQ); - p->cmd &= ~AHCI_P_CMD_CLO; - } - - if (value & AHCI_P_CMD_ICC_MASK) { - p->cmd &= ~AHCI_P_CMD_ICC_MASK; - } - - ahci_handle_port(p); - break; - } - case AHCI_P_TFD: - case AHCI_P_SIG: - case AHCI_P_SSTS: - WPRINTF("pci_ahci_port: read only registers 0x%"PRIx64"\n", offset); - break; - case AHCI_P_SCTL: - p->sctl = value; - if (!(p->cmd & AHCI_P_CMD_ST)) { - if (value & ATA_SC_DET_RESET) - ahci_port_reset(p); - } - break; - case AHCI_P_SERR: - p->serr &= ~value; - break; - case AHCI_P_SACT: - p->sact |= value; - break; - case AHCI_P_CI: - p->ci |= value; - ahci_handle_port(p); - break; - case AHCI_P_SNTF: - case AHCI_P_FBS: - default: - break; - } -} - -static void -pci_ahci_host_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value) -{ - DPRINTF("pci_ahci_host: write offset 0x%"PRIx64" value 0x%"PRIx64"\n", - offset, value); - - switch (offset) { - case AHCI_CAP: - case AHCI_PI: - case AHCI_VS: - case AHCI_CAP2: - DPRINTF("pci_ahci_host: read only registers 0x%"PRIx64"\n", offset); - break; - case AHCI_GHC: - if (value & AHCI_GHC_HR) - ahci_reset(sc); - else if (value & AHCI_GHC_IE) { - sc->ghc |= AHCI_GHC_IE; - ahci_generate_intr(sc); - } - break; - case AHCI_IS: - sc->is &= ~value; - ahci_generate_intr(sc); - break; - default: - break; - } -} - -static void -pci_ahci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size, uint64_t value) -{ - struct pci_ahci_softc *sc = pi->pi_arg; - - assert(baridx == 5); - assert((offset % 4) == 0 && size == 4); - - pthread_mutex_lock(&sc->mtx); - - if (offset < AHCI_OFFSET) - pci_ahci_host_write(sc, offset, value); - else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP) - pci_ahci_port_write(sc, offset, value); - else - WPRINTF("pci_ahci: unknown i/o write offset 0x%"PRIx64"\n", offset); - - pthread_mutex_unlock(&sc->mtx); -} - -static uint64_t -pci_ahci_host_read(struct pci_ahci_softc *sc, uint64_t offset) -{ - uint32_t value; - - switch (offset) { - case AHCI_CAP: - case AHCI_GHC: - case AHCI_IS: - case AHCI_PI: - case AHCI_VS: - case AHCI_CCCC: - case AHCI_CCCP: - case AHCI_EM_LOC: - case AHCI_EM_CTL: - case AHCI_CAP2: - { - uint32_t *p = &sc->cap; - p += (offset - AHCI_CAP) / sizeof(uint32_t); - value = *p; - break; - } - default: - value = 0; - break; - } - DPRINTF("pci_ahci_host: read offset 0x%"PRIx64" value 0x%x\n", - offset, value); - - return (value); -} - -static uint64_t -pci_ahci_port_read(struct pci_ahci_softc *sc, uint64_t offset) -{ - uint32_t value; - int port = (offset - AHCI_OFFSET) / AHCI_STEP; - offset = (offset - AHCI_OFFSET) % AHCI_STEP; - - switch (offset) { - case AHCI_P_CLB: - case AHCI_P_CLBU: - case AHCI_P_FB: - case AHCI_P_FBU: - case AHCI_P_IS: - case AHCI_P_IE: - case AHCI_P_CMD: - case AHCI_P_TFD: - case AHCI_P_SIG: - case AHCI_P_SSTS: - case AHCI_P_SCTL: - case AHCI_P_SERR: - case AHCI_P_SACT: - case AHCI_P_CI: - case AHCI_P_SNTF: - case AHCI_P_FBS: - { - uint32_t *p= &sc->port[port].clb; - p += (offset - AHCI_P_CLB) / sizeof(uint32_t); - value = *p; - break; - } - default: - value = 0; - break; - } - - DPRINTF("pci_ahci_port %d: read offset 0x%"PRIx64" value 0x%x\n", - port, offset, value); - - return value; -} - -static uint64_t -pci_ahci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, - uint64_t regoff, int size) -{ - struct pci_ahci_softc *sc = pi->pi_arg; - uint64_t offset; - uint32_t value; - - assert(baridx == 5); - assert(size == 1 || size == 2 || size == 4); - assert((regoff & (size - 1)) == 0); - - pthread_mutex_lock(&sc->mtx); - - offset = regoff & ~0x3; /* round down to a multiple of 4 bytes */ - if (offset < AHCI_OFFSET) - value = pci_ahci_host_read(sc, offset); - else if (offset < AHCI_OFFSET + sc->ports * AHCI_STEP) - value = pci_ahci_port_read(sc, offset); - else { - value = 0; - WPRINTF("pci_ahci: unknown i/o read offset 0x%"PRIx64"\n", - regoff); - } - value >>= 8 * (regoff & 0x3); - - pthread_mutex_unlock(&sc->mtx); - - return (value); -} - -static int -pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi) -{ - char bident[sizeof("XX:X:X")]; - struct blockif_ctxt *bctxt; - struct pci_ahci_softc *sc; - int ret, slots; - MD5_CTX mdctx; - u_char digest[16]; - - ret = 0; - - if (opts == NULL) { - fprintf(stderr, "pci_ahci: backing device required\n"); - return (1); - } - -#ifdef AHCI_DEBUG - dbg = fopen("/tmp/log", "w+"); -#endif - - sc = calloc(1, sizeof(struct pci_ahci_softc)); - pi->pi_arg = sc; - sc->asc_pi = pi; - sc->ports = MAX_PORTS; - - /* - * Only use port 0 for a backing device. All other ports will be - * marked as unused - */ - sc->port[0].atapi = atapi; - - /* - * Attempt to open the backing image. Use the PCI - * slot/func for the identifier string. - */ - snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func); - bctxt = blockif_open(opts, bident); - if (bctxt == NULL) { - ret = 1; - goto open_fail; - } - sc->port[0].bctx = bctxt; - sc->port[0].pr_sc = sc; - - /* - * Create an identifier for the backing file. Use parts of the - * md5 sum of the filename - */ - MD5Init(&mdctx); - MD5Update(&mdctx, opts, strlen(opts)); - MD5Final(digest, &mdctx); - sprintf(sc->port[0].ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X", - digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); - - /* - * Allocate blockif request structures and add them - * to the free list - */ - pci_ahci_ioreq_init(&sc->port[0]); - - pthread_mutex_init(&sc->mtx, NULL); - - /* Intel ICH8 AHCI */ - slots = sc->port[0].ioqsz; - if (slots > 32) - slots = 32; - --slots; - sc->cap = AHCI_CAP_64BIT | AHCI_CAP_SNCQ | AHCI_CAP_SSNTF | - AHCI_CAP_SMPS | AHCI_CAP_SSS | AHCI_CAP_SALP | - AHCI_CAP_SAL | AHCI_CAP_SCLO | (0x3 << AHCI_CAP_ISS_SHIFT)| - AHCI_CAP_PMD | AHCI_CAP_SSC | AHCI_CAP_PSC | - (slots << AHCI_CAP_NCS_SHIFT) | AHCI_CAP_SXS | (sc->ports - 1); - - /* Only port 0 implemented */ - sc->pi = 1; - sc->vs = 0x10300; - sc->cap2 = AHCI_CAP2_APST; - ahci_reset(sc); - - pci_set_cfgdata16(pi, PCIR_DEVICE, 0x2821); - pci_set_cfgdata16(pi, PCIR_VENDOR, 0x8086); - pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); - pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_STORAGE_SATA); - pci_set_cfgdata8(pi, PCIR_PROGIF, PCIP_STORAGE_SATA_AHCI_1_0); - pci_emul_add_msicap(pi, 1); - pci_emul_alloc_bar(pi, 5, PCIBAR_MEM32, - AHCI_OFFSET + sc->ports * AHCI_STEP); - - pci_lintr_request(pi); - -open_fail: - if (ret) { - if (sc->port[0].bctx != NULL) - blockif_close(sc->port[0].bctx); - free(sc); - } - - return (ret); -} - -static int -pci_ahci_hd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) -{ - - return (pci_ahci_init(ctx, pi, opts, 0)); -} - -static int -pci_ahci_atapi_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) -{ - - return (pci_ahci_init(ctx, pi, opts, 1)); -} - -/* - * Use separate emulation names to distinguish drive and atapi devices - */ -struct pci_devemu pci_de_ahci_hd = { - .pe_emu = "ahci-hd", - .pe_init = pci_ahci_hd_init, - .pe_barwrite = pci_ahci_write, - .pe_barread = pci_ahci_read -}; -PCI_EMUL_SET(pci_de_ahci_hd); - -struct pci_devemu pci_de_ahci_cd = { - .pe_emu = "ahci-cd", - .pe_init = pci_ahci_atapi_init, - .pe_barwrite = pci_ahci_write, - .pe_barread = pci_ahci_read -}; -PCI_EMUL_SET(pci_de_ahci_cd); diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c deleted file mode 100644 index 523d7b0..0000000 --- a/usr.sbin/bhyve/pci_emul.c +++ /dev/null @@ -1,2108 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/linker_set.h> -#include <sys/errno.h> - -#include <ctype.h> -#include <pthread.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <strings.h> -#include <assert.h> -#include <stdbool.h> - -#include <machine/vmm.h> -#include <vmmapi.h> - -#include "acpi.h" -#include "bhyverun.h" -#include "inout.h" -#include "ioapic.h" -#include "mem.h" -#include "pci_emul.h" -#include "pci_irq.h" -#include "pci_lpc.h" - -#define CONF1_ADDR_PORT 0x0cf8 -#define CONF1_DATA_PORT 0x0cfc - -#define CONF1_ENABLE 0x80000000ul - -#define MAXBUSES (PCI_BUSMAX + 1) -#define MAXSLOTS (PCI_SLOTMAX + 1) -#define MAXFUNCS (PCI_FUNCMAX + 1) - -struct funcinfo { - char *fi_name; - char *fi_param; - struct pci_devinst *fi_devi; -}; - -struct intxinfo { - int ii_count; - int ii_pirq_pin; - int ii_ioapic_irq; -}; - -struct slotinfo { - struct intxinfo si_intpins[4]; - struct funcinfo si_funcs[MAXFUNCS]; -}; - -struct businfo { - uint16_t iobase, iolimit; /* I/O window */ - uint32_t membase32, memlimit32; /* mmio window below 4GB */ - uint64_t membase64, memlimit64; /* mmio window above 4GB */ - struct slotinfo slotinfo[MAXSLOTS]; -}; - -static struct businfo *pci_businfo[MAXBUSES]; - -SET_DECLARE(pci_devemu_set, struct pci_devemu); - -static uint64_t pci_emul_iobase; -static uint64_t pci_emul_membase32; -static uint64_t pci_emul_membase64; - -#define PCI_EMUL_IOBASE 0x2000 -#define PCI_EMUL_IOLIMIT 0x10000 - -#define PCI_EMUL_ECFG_BASE 0xE0000000 /* 3.5GB */ -#define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */ -SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); - -#define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE - -#define PCI_EMUL_MEMBASE64 0xD000000000UL -#define PCI_EMUL_MEMLIMIT64 0xFD00000000UL - -static struct pci_devemu *pci_emul_finddev(char *name); -static void pci_lintr_route(struct pci_devinst *pi); -static void pci_lintr_update(struct pci_devinst *pi); -static void pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, - int func, int coff, int bytes, uint32_t *val); - -static __inline void -CFGWRITE(struct pci_devinst *pi, int coff, uint32_t val, int bytes) -{ - - if (bytes == 1) - pci_set_cfgdata8(pi, coff, val); - else if (bytes == 2) - pci_set_cfgdata16(pi, coff, val); - else - pci_set_cfgdata32(pi, coff, val); -} - -static __inline uint32_t -CFGREAD(struct pci_devinst *pi, int coff, int bytes) -{ - - if (bytes == 1) - return (pci_get_cfgdata8(pi, coff)); - else if (bytes == 2) - return (pci_get_cfgdata16(pi, coff)); - else - return (pci_get_cfgdata32(pi, coff)); -} - -/* - * I/O access - */ - -/* - * Slot options are in the form: - * - * <bus>:<slot>:<func>,<emul>[,<config>] - * <slot>[:<func>],<emul>[,<config>] - * - * slot is 0..31 - * func is 0..7 - * emul is a string describing the type of PCI device e.g. virtio-net - * config is an optional string, depending on the device, that can be - * used for configuration. - * Examples are: - * 1,virtio-net,tap0 - * 3:0,dummy - */ -static void -pci_parse_slot_usage(char *aopt) -{ - - fprintf(stderr, "Invalid PCI slot info field \"%s\"\n", aopt); -} - -int -pci_parse_slot(char *opt) -{ - struct businfo *bi; - struct slotinfo *si; - char *emul, *config, *str, *cp; - int error, bnum, snum, fnum; - - error = -1; - str = strdup(opt); - - emul = config = NULL; - if ((cp = strchr(str, ',')) != NULL) { - *cp = '\0'; - emul = cp + 1; - if ((cp = strchr(emul, ',')) != NULL) { - *cp = '\0'; - config = cp + 1; - } - } else { - pci_parse_slot_usage(opt); - goto done; - } - - /* <bus>:<slot>:<func> */ - if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { - bnum = 0; - /* <slot>:<func> */ - if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { - fnum = 0; - /* <slot> */ - if (sscanf(str, "%d", &snum) != 1) { - snum = -1; - } - } - } - - if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || - fnum < 0 || fnum >= MAXFUNCS) { - pci_parse_slot_usage(opt); - goto done; - } - - if (pci_businfo[bnum] == NULL) - pci_businfo[bnum] = calloc(1, sizeof(struct businfo)); - - bi = pci_businfo[bnum]; - si = &bi->slotinfo[snum]; - - if (si->si_funcs[fnum].fi_name != NULL) { - fprintf(stderr, "pci slot %d:%d already occupied!\n", - snum, fnum); - goto done; - } - - if (pci_emul_finddev(emul) == NULL) { - fprintf(stderr, "pci slot %d:%d: unknown device \"%s\"\n", - snum, fnum, emul); - goto done; - } - - error = 0; - si->si_funcs[fnum].fi_name = emul; - si->si_funcs[fnum].fi_param = config; - -done: - if (error) - free(str); - - return (error); -} - -static int -pci_valid_pba_offset(struct pci_devinst *pi, uint64_t offset) -{ - - if (offset < pi->pi_msix.pba_offset) - return (0); - - if (offset >= pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { - return (0); - } - - return (1); -} - -int -pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, - uint64_t value) -{ - int msix_entry_offset; - int tab_index; - char *dest; - - /* support only 4 or 8 byte writes */ - if (size != 4 && size != 8) - return (-1); - - /* - * Return if table index is beyond what device supports - */ - tab_index = offset / MSIX_TABLE_ENTRY_SIZE; - if (tab_index >= pi->pi_msix.table_count) - return (-1); - - msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; - - /* support only aligned writes */ - if ((msix_entry_offset % size) != 0) - return (-1); - - dest = (char *)(pi->pi_msix.table + tab_index); - dest += msix_entry_offset; - - if (size == 4) - *((uint32_t *)dest) = value; - else - *((uint64_t *)dest) = value; - - return (0); -} - -uint64_t -pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size) -{ - char *dest; - int msix_entry_offset; - int tab_index; - uint64_t retval = ~0; - - /* - * The PCI standard only allows 4 and 8 byte accesses to the MSI-X - * table but we also allow 1 byte access to accommodate reads from - * ddb. - */ - if (size != 1 && size != 4 && size != 8) - return (retval); - - msix_entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; - - /* support only aligned reads */ - if ((msix_entry_offset % size) != 0) { - return (retval); - } - - tab_index = offset / MSIX_TABLE_ENTRY_SIZE; - - if (tab_index < pi->pi_msix.table_count) { - /* valid MSI-X Table access */ - dest = (char *)(pi->pi_msix.table + tab_index); - dest += msix_entry_offset; - - if (size == 1) - retval = *((uint8_t *)dest); - else if (size == 4) - retval = *((uint32_t *)dest); - else - retval = *((uint64_t *)dest); - } else if (pci_valid_pba_offset(pi, offset)) { - /* return 0 for PBA access */ - retval = 0; - } - - return (retval); -} - -int -pci_msix_table_bar(struct pci_devinst *pi) -{ - - if (pi->pi_msix.table != NULL) - return (pi->pi_msix.table_bar); - else - return (-1); -} - -int -pci_msix_pba_bar(struct pci_devinst *pi) -{ - - if (pi->pi_msix.table != NULL) - return (pi->pi_msix.pba_bar); - else - return (-1); -} - -static int -pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - struct pci_devinst *pdi = arg; - struct pci_devemu *pe = pdi->pi_d; - uint64_t offset; - int i; - - for (i = 0; i <= PCI_BARMAX; i++) { - if (pdi->pi_bar[i].type == PCIBAR_IO && - port >= pdi->pi_bar[i].addr && - port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) { - offset = port - pdi->pi_bar[i].addr; - if (in) - *eax = (*pe->pe_barread)(ctx, vcpu, pdi, i, - offset, bytes); - else - (*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset, - bytes, *eax); - return (0); - } - } - return (-1); -} - -static int -pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, - int size, uint64_t *val, void *arg1, long arg2) -{ - struct pci_devinst *pdi = arg1; - struct pci_devemu *pe = pdi->pi_d; - uint64_t offset; - int bidx = (int) arg2; - - assert(bidx <= PCI_BARMAX); - assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 || - pdi->pi_bar[bidx].type == PCIBAR_MEM64); - assert(addr >= pdi->pi_bar[bidx].addr && - addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size); - - offset = addr - pdi->pi_bar[bidx].addr; - - if (dir == MEM_F_WRITE) { - if (size == 8) { - (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, - 4, *val & 0xffffffff); - (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, - 4, *val >> 32); - } else { - (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, - size, *val); - } - } else { - if (size == 8) { - *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, - offset, 4); - *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, - offset + 4, 4) << 32; - } else { - *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, - offset, size); - } - } - - return (0); -} - - -static int -pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size, - uint64_t *addr) -{ - uint64_t base; - - assert((size & (size - 1)) == 0); /* must be a power of 2 */ - - base = roundup2(*baseptr, size); - - if (base + size <= limit) { - *addr = base; - *baseptr = base + size; - return (0); - } else - return (-1); -} - -int -pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, - uint64_t size) -{ - - return (pci_emul_alloc_pbar(pdi, idx, 0, type, size)); -} - -/* - * Register (or unregister) the MMIO or I/O region associated with the BAR - * register 'idx' of an emulated pci device. - */ -static void -modify_bar_registration(struct pci_devinst *pi, int idx, int registration) -{ - int error; - struct inout_port iop; - struct mem_range mr; - - switch (pi->pi_bar[idx].type) { - case PCIBAR_IO: - bzero(&iop, sizeof(struct inout_port)); - iop.name = pi->pi_name; - iop.port = pi->pi_bar[idx].addr; - iop.size = pi->pi_bar[idx].size; - if (registration) { - iop.flags = IOPORT_F_INOUT; - iop.handler = pci_emul_io_handler; - iop.arg = pi; - error = register_inout(&iop); - } else - error = unregister_inout(&iop); - break; - case PCIBAR_MEM32: - case PCIBAR_MEM64: - bzero(&mr, sizeof(struct mem_range)); - mr.name = pi->pi_name; - mr.base = pi->pi_bar[idx].addr; - mr.size = pi->pi_bar[idx].size; - if (registration) { - mr.flags = MEM_F_RW; - mr.handler = pci_emul_mem_handler; - mr.arg1 = pi; - mr.arg2 = idx; - error = register_mem(&mr); - } else - error = unregister_mem(&mr); - break; - default: - error = EINVAL; - break; - } - assert(error == 0); -} - -static void -unregister_bar(struct pci_devinst *pi, int idx) -{ - - modify_bar_registration(pi, idx, 0); -} - -static void -register_bar(struct pci_devinst *pi, int idx) -{ - - modify_bar_registration(pi, idx, 1); -} - -/* Are we decoding i/o port accesses for the emulated pci device? */ -static int -porten(struct pci_devinst *pi) -{ - uint16_t cmd; - - cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); - - return (cmd & PCIM_CMD_PORTEN); -} - -/* Are we decoding memory accesses for the emulated pci device? */ -static int -memen(struct pci_devinst *pi) -{ - uint16_t cmd; - - cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); - - return (cmd & PCIM_CMD_MEMEN); -} - -/* - * Update the MMIO or I/O address that is decoded by the BAR register. - * - * If the pci device has enabled the address space decoding then intercept - * the address range decoded by the BAR register. - */ -static void -update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) -{ - int decode; - - if (pi->pi_bar[idx].type == PCIBAR_IO) - decode = porten(pi); - else - decode = memen(pi); - - if (decode) - unregister_bar(pi, idx); - - switch (type) { - case PCIBAR_IO: - case PCIBAR_MEM32: - pi->pi_bar[idx].addr = addr; - break; - case PCIBAR_MEM64: - pi->pi_bar[idx].addr &= ~0xffffffffUL; - pi->pi_bar[idx].addr |= addr; - break; - case PCIBAR_MEMHI64: - pi->pi_bar[idx].addr &= 0xffffffff; - pi->pi_bar[idx].addr |= addr; - break; - default: - assert(0); - } - - if (decode) - register_bar(pi, idx); -} - -int -pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, - enum pcibar_type type, uint64_t size) -{ - int error; - uint64_t *baseptr, limit, addr, mask, lobits, bar; - - assert(idx >= 0 && idx <= PCI_BARMAX); - - if ((size & (size - 1)) != 0) - size = 1UL << flsl(size); /* round up to a power of 2 */ - - /* Enforce minimum BAR sizes required by the PCI standard */ - if (type == PCIBAR_IO) { - if (size < 4) - size = 4; - } else { - if (size < 16) - size = 16; - } - - switch (type) { - case PCIBAR_NONE: - baseptr = NULL; - addr = mask = lobits = 0; - break; - case PCIBAR_IO: - baseptr = &pci_emul_iobase; - limit = PCI_EMUL_IOLIMIT; - mask = PCIM_BAR_IO_BASE; - lobits = PCIM_BAR_IO_SPACE; - break; - case PCIBAR_MEM64: - /* - * XXX - * Some drivers do not work well if the 64-bit BAR is allocated - * above 4GB. Allow for this by allocating small requests under - * 4GB unless then allocation size is larger than some arbitrary - * number (32MB currently). - */ - if (size > 32 * 1024 * 1024) { - /* - * XXX special case for device requiring peer-peer DMA - */ - if (size == 0x100000000UL) - baseptr = &hostbase; - else - baseptr = &pci_emul_membase64; - limit = PCI_EMUL_MEMLIMIT64; - mask = PCIM_BAR_MEM_BASE; - lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | - PCIM_BAR_MEM_PREFETCH; - break; - } else { - baseptr = &pci_emul_membase32; - limit = PCI_EMUL_MEMLIMIT32; - mask = PCIM_BAR_MEM_BASE; - lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; - } - break; - case PCIBAR_MEM32: - baseptr = &pci_emul_membase32; - limit = PCI_EMUL_MEMLIMIT32; - mask = PCIM_BAR_MEM_BASE; - lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; - break; - default: - printf("pci_emul_alloc_base: invalid bar type %d\n", type); - assert(0); - } - - if (baseptr != NULL) { - error = pci_emul_alloc_resource(baseptr, limit, size, &addr); - if (error != 0) - return (error); - } - - pdi->pi_bar[idx].type = type; - pdi->pi_bar[idx].addr = addr; - pdi->pi_bar[idx].size = size; - - /* Initialize the BAR register in config space */ - bar = (addr & mask) | lobits; - pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); - - if (type == PCIBAR_MEM64) { - assert(idx + 1 <= PCI_BARMAX); - pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; - pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); - } - - register_bar(pdi, idx); - - return (0); -} - -#define CAP_START_OFFSET 0x40 -static int -pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) -{ - int i, capoff, reallen; - uint16_t sts; - - assert(caplen > 0); - - reallen = roundup2(caplen, 4); /* dword aligned */ - - sts = pci_get_cfgdata16(pi, PCIR_STATUS); - if ((sts & PCIM_STATUS_CAPPRESENT) == 0) - capoff = CAP_START_OFFSET; - else - capoff = pi->pi_capend + 1; - - /* Check if we have enough space */ - if (capoff + reallen > PCI_REGMAX + 1) - return (-1); - - /* Set the previous capability pointer */ - if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { - pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); - pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); - } else - pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); - - /* Copy the capability */ - for (i = 0; i < caplen; i++) - pci_set_cfgdata8(pi, capoff + i, capdata[i]); - - /* Set the next capability pointer */ - pci_set_cfgdata8(pi, capoff + 1, 0); - - pi->pi_prevcap = capoff; - pi->pi_capend = capoff + reallen - 1; - return (0); -} - -static struct pci_devemu * -pci_emul_finddev(char *name) -{ - struct pci_devemu **pdpp, *pdp; - - SET_FOREACH(pdpp, pci_devemu_set) { - pdp = *pdpp; - if (!strcmp(pdp->pe_emu, name)) { - return (pdp); - } - } - - return (NULL); -} - -static int -pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, - int func, struct funcinfo *fi) -{ - struct pci_devinst *pdi; - int err; - - pdi = calloc(1, sizeof(struct pci_devinst)); - - pdi->pi_vmctx = ctx; - pdi->pi_bus = bus; - pdi->pi_slot = slot; - pdi->pi_func = func; - pthread_mutex_init(&pdi->pi_lintr.lock, NULL); - pdi->pi_lintr.pin = 0; - pdi->pi_lintr.state = IDLE; - pdi->pi_lintr.pirq_pin = 0; - pdi->pi_lintr.ioapic_irq = 0; - pdi->pi_d = pde; - snprintf(pdi->pi_name, PI_NAMESZ, "%s-pci-%d", pde->pe_emu, slot); - - /* Disable legacy interrupts */ - pci_set_cfgdata8(pdi, PCIR_INTLINE, 255); - pci_set_cfgdata8(pdi, PCIR_INTPIN, 0); - - pci_set_cfgdata8(pdi, PCIR_COMMAND, - PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); - - err = (*pde->pe_init)(ctx, pdi, fi->fi_param); - if (err == 0) - fi->fi_devi = pdi; - else - free(pdi); - - return (err); -} - -void -pci_populate_msicap(struct msicap *msicap, int msgnum, int nextptr) -{ - int mmc; - - CTASSERT(sizeof(struct msicap) == 14); - - /* Number of msi messages must be a power of 2 between 1 and 32 */ - assert((msgnum & (msgnum - 1)) == 0 && msgnum >= 1 && msgnum <= 32); - mmc = ffs(msgnum) - 1; - - bzero(msicap, sizeof(struct msicap)); - msicap->capid = PCIY_MSI; - msicap->nextptr = nextptr; - msicap->msgctrl = PCIM_MSICTRL_64BIT | (mmc << 1); -} - -int -pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) -{ - struct msicap msicap; - - pci_populate_msicap(&msicap, msgnum, 0); - - return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); -} - -static void -pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, - uint32_t msix_tab_size) -{ - CTASSERT(sizeof(struct msixcap) == 12); - - assert(msix_tab_size % 4096 == 0); - - bzero(msixcap, sizeof(struct msixcap)); - msixcap->capid = PCIY_MSIX; - - /* - * Message Control Register, all fields set to - * zero except for the Table Size. - * Note: Table size N is encoded as N-1 - */ - msixcap->msgctrl = msgnum - 1; - - /* - * MSI-X BAR setup: - * - MSI-X table start at offset 0 - * - PBA table starts at a 4K aligned offset after the MSI-X table - */ - msixcap->table_info = barnum & PCIM_MSIX_BIR_MASK; - msixcap->pba_info = msix_tab_size | (barnum & PCIM_MSIX_BIR_MASK); -} - -static void -pci_msix_table_init(struct pci_devinst *pi, int table_entries) -{ - int i, table_size; - - assert(table_entries > 0); - assert(table_entries <= MAX_MSIX_TABLE_ENTRIES); - - table_size = table_entries * MSIX_TABLE_ENTRY_SIZE; - pi->pi_msix.table = calloc(1, table_size); - - /* set mask bit of vector control register */ - for (i = 0; i < table_entries; i++) - pi->pi_msix.table[i].vector_control |= PCIM_MSIX_VCTRL_MASK; -} - -int -pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) -{ - uint32_t tab_size; - struct msixcap msixcap; - - assert(msgnum >= 1 && msgnum <= MAX_MSIX_TABLE_ENTRIES); - assert(barnum >= 0 && barnum <= PCIR_MAX_BAR_0); - - tab_size = msgnum * MSIX_TABLE_ENTRY_SIZE; - - /* Align table size to nearest 4K */ - tab_size = roundup2(tab_size, 4096); - - pi->pi_msix.table_bar = barnum; - pi->pi_msix.pba_bar = barnum; - pi->pi_msix.table_offset = 0; - pi->pi_msix.table_count = msgnum; - pi->pi_msix.pba_offset = tab_size; - pi->pi_msix.pba_size = PBA_SIZE(msgnum); - - pci_msix_table_init(pi, msgnum); - - pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); - - /* allocate memory for MSI-X Table and PBA */ - pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, - tab_size + pi->pi_msix.pba_size); - - return (pci_emul_add_capability(pi, (u_char *)&msixcap, - sizeof(msixcap))); -} - -void -msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, - int bytes, uint32_t val) -{ - uint16_t msgctrl, rwmask; - int off; - - off = offset - capoff; - /* Message Control Register */ - if (off == 2 && bytes == 2) { - rwmask = PCIM_MSIXCTRL_MSIX_ENABLE | PCIM_MSIXCTRL_FUNCTION_MASK; - msgctrl = pci_get_cfgdata16(pi, offset); - msgctrl &= ~rwmask; - msgctrl |= val & rwmask; - val = msgctrl; - - pi->pi_msix.enabled = val & PCIM_MSIXCTRL_MSIX_ENABLE; - pi->pi_msix.function_mask = val & PCIM_MSIXCTRL_FUNCTION_MASK; - pci_lintr_update(pi); - } - - CFGWRITE(pi, offset, val, bytes); -} - -void -msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, - int bytes, uint32_t val) -{ - uint16_t msgctrl, rwmask, msgdata, mme; - uint32_t addrlo; - - /* - * If guest is writing to the message control register make sure - * we do not overwrite read-only fields. - */ - if ((offset - capoff) == 2 && bytes == 2) { - rwmask = PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE; - msgctrl = pci_get_cfgdata16(pi, offset); - msgctrl &= ~rwmask; - msgctrl |= val & rwmask; - val = msgctrl; - - addrlo = pci_get_cfgdata32(pi, capoff + 4); - if (msgctrl & PCIM_MSICTRL_64BIT) - msgdata = pci_get_cfgdata16(pi, capoff + 12); - else - msgdata = pci_get_cfgdata16(pi, capoff + 8); - - mme = msgctrl & PCIM_MSICTRL_MME_MASK; - pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; - if (pi->pi_msi.enabled) { - pi->pi_msi.addr = addrlo; - pi->pi_msi.msg_data = msgdata; - pi->pi_msi.maxmsgnum = 1 << (mme >> 4); - } else { - pi->pi_msi.maxmsgnum = 0; - } - pci_lintr_update(pi); - } - - CFGWRITE(pi, offset, val, bytes); -} - -void -pciecap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, - int bytes, uint32_t val) -{ - - /* XXX don't write to the readonly parts */ - CFGWRITE(pi, offset, val, bytes); -} - -#define PCIECAP_VERSION 0x2 -int -pci_emul_add_pciecap(struct pci_devinst *pi, int type) -{ - int err; - struct pciecap pciecap; - - CTASSERT(sizeof(struct pciecap) == 60); - - if (type != PCIEM_TYPE_ROOT_PORT) - return (-1); - - bzero(&pciecap, sizeof(pciecap)); - - pciecap.capid = PCIY_EXPRESS; - pciecap.pcie_capabilities = PCIECAP_VERSION | PCIEM_TYPE_ROOT_PORT; - pciecap.link_capabilities = 0x411; /* gen1, x1 */ - pciecap.link_status = 0x11; /* gen1, x1 */ - - err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); - return (err); -} - -/* - * This function assumes that 'coff' is in the capabilities region of the - * config space. - */ -static void -pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) -{ - int capid; - uint8_t capoff, nextoff; - - /* Do not allow un-aligned writes */ - if ((offset & (bytes - 1)) != 0) - return; - - /* Find the capability that we want to update */ - capoff = CAP_START_OFFSET; - while (1) { - nextoff = pci_get_cfgdata8(pi, capoff + 1); - if (nextoff == 0) - break; - if (offset >= capoff && offset < nextoff) - break; - - capoff = nextoff; - } - assert(offset >= capoff); - - /* - * Capability ID and Next Capability Pointer are readonly. - * However, some o/s's do 4-byte writes that include these. - * For this case, trim the write back to 2 bytes and adjust - * the data. - */ - if (offset == capoff || offset == capoff + 1) { - if (offset == capoff && bytes == 4) { - bytes = 2; - offset += 2; - val >>= 16; - } else - return; - } - - capid = pci_get_cfgdata8(pi, capoff); - switch (capid) { - case PCIY_MSI: - msicap_cfgwrite(pi, capoff, offset, bytes, val); - break; - case PCIY_MSIX: - msixcap_cfgwrite(pi, capoff, offset, bytes, val); - break; - case PCIY_EXPRESS: - pciecap_cfgwrite(pi, capoff, offset, bytes, val); - break; - default: - break; - } -} - -static int -pci_emul_iscap(struct pci_devinst *pi, int offset) -{ - uint16_t sts; - - sts = pci_get_cfgdata16(pi, PCIR_STATUS); - if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { - if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) - return (1); - } - return (0); -} - -static int -pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, - int size, uint64_t *val, void *arg1, long arg2) -{ - /* - * Ignore writes; return 0xff's for reads. The mem read code - * will take care of truncating to the correct size. - */ - if (dir == MEM_F_READ) { - *val = 0xffffffffffffffff; - } - - return (0); -} - -static int -pci_emul_ecfg_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, - int bytes, uint64_t *val, void *arg1, long arg2) -{ - int bus, slot, func, coff, in; - - coff = addr & 0xfff; - func = (addr >> 12) & 0x7; - slot = (addr >> 15) & 0x1f; - bus = (addr >> 20) & 0xff; - in = (dir == MEM_F_READ); - if (in) - *val = ~0UL; - pci_cfgrw(ctx, vcpu, in, bus, slot, func, coff, bytes, (uint32_t *)val); - return (0); -} - -uint64_t -pci_ecfg_base(void) -{ - - return (PCI_EMUL_ECFG_BASE); -} - -#define BUSIO_ROUNDUP 32 -#define BUSMEM_ROUNDUP (1024 * 1024) - -int -init_pci(struct vmctx *ctx) -{ - struct mem_range mr; - struct pci_devemu *pde; - struct businfo *bi; - struct slotinfo *si; - struct funcinfo *fi; - size_t lowmem; - int bus, slot, func; - int error; - - pci_emul_iobase = PCI_EMUL_IOBASE; - pci_emul_membase32 = vm_get_lowmem_limit(ctx); - pci_emul_membase64 = PCI_EMUL_MEMBASE64; - - for (bus = 0; bus < MAXBUSES; bus++) { - if ((bi = pci_businfo[bus]) == NULL) - continue; - /* - * Keep track of the i/o and memory resources allocated to - * this bus. - */ - bi->iobase = pci_emul_iobase; - bi->membase32 = pci_emul_membase32; - bi->membase64 = pci_emul_membase64; - - for (slot = 0; slot < MAXSLOTS; slot++) { - si = &bi->slotinfo[slot]; - for (func = 0; func < MAXFUNCS; func++) { - fi = &si->si_funcs[func]; - if (fi->fi_name == NULL) - continue; - pde = pci_emul_finddev(fi->fi_name); - assert(pde != NULL); - error = pci_emul_init(ctx, pde, bus, slot, - func, fi); - if (error) - return (error); - } - } - - /* - * Add some slop to the I/O and memory resources decoded by - * this bus to give a guest some flexibility if it wants to - * reprogram the BARs. - */ - pci_emul_iobase += BUSIO_ROUNDUP; - pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); - bi->iolimit = pci_emul_iobase; - - pci_emul_membase32 += BUSMEM_ROUNDUP; - pci_emul_membase32 = roundup2(pci_emul_membase32, - BUSMEM_ROUNDUP); - bi->memlimit32 = pci_emul_membase32; - - pci_emul_membase64 += BUSMEM_ROUNDUP; - pci_emul_membase64 = roundup2(pci_emul_membase64, - BUSMEM_ROUNDUP); - bi->memlimit64 = pci_emul_membase64; - } - - /* - * PCI backends are initialized before routing INTx interrupts - * so that LPC devices are able to reserve ISA IRQs before - * routing PIRQ pins. - */ - for (bus = 0; bus < MAXBUSES; bus++) { - if ((bi = pci_businfo[bus]) == NULL) - continue; - - for (slot = 0; slot < MAXSLOTS; slot++) { - si = &bi->slotinfo[slot]; - for (func = 0; func < MAXFUNCS; func++) { - fi = &si->si_funcs[func]; - if (fi->fi_devi == NULL) - continue; - pci_lintr_route(fi->fi_devi); - } - } - } - lpc_pirq_routed(); - - /* - * The guest physical memory map looks like the following: - * [0, lowmem) guest system memory - * [lowmem, lowmem_limit) memory hole (may be absent) - * [lowmem_limit, 0xE0000000) PCI hole (32-bit BAR allocation) - * [0xE0000000, 0xF0000000) PCI extended config window - * [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware - * [4GB, 4GB + highmem) - */ - - /* - * Accesses to memory addresses that are not allocated to system - * memory or PCI devices return 0xff's. - */ - lowmem = vm_get_lowmem_size(ctx); - bzero(&mr, sizeof(struct mem_range)); - mr.name = "PCI hole"; - mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; - mr.base = lowmem; - mr.size = (4ULL * 1024 * 1024 * 1024) - lowmem; - mr.handler = pci_emul_fallback_handler; - error = register_mem_fallback(&mr); - assert(error == 0); - - /* PCI extended config space */ - bzero(&mr, sizeof(struct mem_range)); - mr.name = "PCI ECFG"; - mr.flags = MEM_F_RW | MEM_F_IMMUTABLE; - mr.base = PCI_EMUL_ECFG_BASE; - mr.size = PCI_EMUL_ECFG_SIZE; - mr.handler = pci_emul_ecfg_handler; - error = register_mem(&mr); - assert(error == 0); - - return (0); -} - -static void -pci_apic_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, - void *arg) -{ - - dsdt_line(" Package ()"); - dsdt_line(" {"); - dsdt_line(" 0x%X,", slot << 16 | 0xffff); - dsdt_line(" 0x%02X,", pin - 1); - dsdt_line(" Zero,"); - dsdt_line(" 0x%X", ioapic_irq); - dsdt_line(" },"); -} - -static void -pci_pirq_prt_entry(int bus, int slot, int pin, int pirq_pin, int ioapic_irq, - void *arg) -{ - char *name; - - name = lpc_pirq_name(pirq_pin); - if (name == NULL) - return; - dsdt_line(" Package ()"); - dsdt_line(" {"); - dsdt_line(" 0x%X,", slot << 16 | 0xffff); - dsdt_line(" 0x%02X,", pin - 1); - dsdt_line(" %s,", name); - dsdt_line(" 0x00"); - dsdt_line(" },"); - free(name); -} - -/* - * A bhyve virtual machine has a flat PCI hierarchy with a root port - * corresponding to each PCI bus. - */ -static void -pci_bus_write_dsdt(int bus) -{ - struct businfo *bi; - struct slotinfo *si; - struct pci_devinst *pi; - int count, func, slot; - - /* - * If there are no devices on this 'bus' then just return. - */ - if ((bi = pci_businfo[bus]) == NULL) { - /* - * Bus 0 is special because it decodes the I/O ports used - * for PCI config space access even if there are no devices - * on it. - */ - if (bus != 0) - return; - } - - dsdt_line(" Device (PC%02X)", bus); - dsdt_line(" {"); - dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); - dsdt_line(" Name (_ADR, Zero)"); - - dsdt_line(" Method (_BBN, 0, NotSerialized)"); - dsdt_line(" {"); - dsdt_line(" Return (0x%08X)", bus); - dsdt_line(" }"); - dsdt_line(" Name (_CRS, ResourceTemplate ()"); - dsdt_line(" {"); - dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " - "MaxFixed, PosDecode,"); - dsdt_line(" 0x0000, // Granularity"); - dsdt_line(" 0x%04X, // Range Minimum", bus); - dsdt_line(" 0x%04X, // Range Maximum", bus); - dsdt_line(" 0x0000, // Translation Offset"); - dsdt_line(" 0x0001, // Length"); - dsdt_line(" ,, )"); - - if (bus == 0) { - dsdt_indent(3); - dsdt_fixed_ioport(0xCF8, 8); - dsdt_unindent(3); - - dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " - "PosDecode, EntireRange,"); - dsdt_line(" 0x0000, // Granularity"); - dsdt_line(" 0x0000, // Range Minimum"); - dsdt_line(" 0x0CF7, // Range Maximum"); - dsdt_line(" 0x0000, // Translation Offset"); - dsdt_line(" 0x0CF8, // Length"); - dsdt_line(" ,, , TypeStatic)"); - - dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " - "PosDecode, EntireRange,"); - dsdt_line(" 0x0000, // Granularity"); - dsdt_line(" 0x0D00, // Range Minimum"); - dsdt_line(" 0x%04X, // Range Maximum", - PCI_EMUL_IOBASE - 1); - dsdt_line(" 0x0000, // Translation Offset"); - dsdt_line(" 0x%04X, // Length", - PCI_EMUL_IOBASE - 0x0D00); - dsdt_line(" ,, , TypeStatic)"); - - if (bi == NULL) { - dsdt_line(" })"); - goto done; - } - } - assert(bi != NULL); - - /* i/o window */ - dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " - "PosDecode, EntireRange,"); - dsdt_line(" 0x0000, // Granularity"); - dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); - dsdt_line(" 0x%04X, // Range Maximum", - bi->iolimit - 1); - dsdt_line(" 0x0000, // Translation Offset"); - dsdt_line(" 0x%04X, // Length", - bi->iolimit - bi->iobase); - dsdt_line(" ,, , TypeStatic)"); - - /* mmio window (32-bit) */ - dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " - "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); - dsdt_line(" 0x00000000, // Granularity"); - dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); - dsdt_line(" 0x%08X, // Range Maximum\n", - bi->memlimit32 - 1); - dsdt_line(" 0x00000000, // Translation Offset"); - dsdt_line(" 0x%08X, // Length\n", - bi->memlimit32 - bi->membase32); - dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); - - /* mmio window (64-bit) */ - dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " - "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); - dsdt_line(" 0x0000000000000000, // Granularity"); - dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); - dsdt_line(" 0x%016lX, // Range Maximum\n", - bi->memlimit64 - 1); - dsdt_line(" 0x0000000000000000, // Translation Offset"); - dsdt_line(" 0x%016lX, // Length\n", - bi->memlimit64 - bi->membase64); - dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); - dsdt_line(" })"); - - count = pci_count_lintr(bus); - if (count != 0) { - dsdt_indent(2); - dsdt_line("Name (PPRT, Package ()"); - dsdt_line("{"); - pci_walk_lintr(bus, pci_pirq_prt_entry, NULL); - dsdt_line("})"); - dsdt_line("Name (APRT, Package ()"); - dsdt_line("{"); - pci_walk_lintr(bus, pci_apic_prt_entry, NULL); - dsdt_line("})"); - dsdt_line("Method (_PRT, 0, NotSerialized)"); - dsdt_line("{"); - dsdt_line(" If (PICM)"); - dsdt_line(" {"); - dsdt_line(" Return (APRT)"); - dsdt_line(" }"); - dsdt_line(" Else"); - dsdt_line(" {"); - dsdt_line(" Return (PPRT)"); - dsdt_line(" }"); - dsdt_line("}"); - dsdt_unindent(2); - } - - dsdt_indent(2); - for (slot = 0; slot < MAXSLOTS; slot++) { - si = &bi->slotinfo[slot]; - for (func = 0; func < MAXFUNCS; func++) { - pi = si->si_funcs[func].fi_devi; - if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) - pi->pi_d->pe_write_dsdt(pi); - } - } - dsdt_unindent(2); -done: - dsdt_line(" }"); -} - -void -pci_write_dsdt(void) -{ - int bus; - - dsdt_indent(1); - dsdt_line("Name (PICM, 0x00)"); - dsdt_line("Method (_PIC, 1, NotSerialized)"); - dsdt_line("{"); - dsdt_line(" Store (Arg0, PICM)"); - dsdt_line("}"); - dsdt_line(""); - dsdt_line("Scope (_SB)"); - dsdt_line("{"); - for (bus = 0; bus < MAXBUSES; bus++) - pci_bus_write_dsdt(bus); - dsdt_line("}"); - dsdt_unindent(1); -} - -int -pci_bus_configured(int bus) -{ - assert(bus >= 0 && bus < MAXBUSES); - return (pci_businfo[bus] != NULL); -} - -int -pci_msi_enabled(struct pci_devinst *pi) -{ - return (pi->pi_msi.enabled); -} - -int -pci_msi_maxmsgnum(struct pci_devinst *pi) -{ - if (pi->pi_msi.enabled) - return (pi->pi_msi.maxmsgnum); - else - return (0); -} - -int -pci_msix_enabled(struct pci_devinst *pi) -{ - - return (pi->pi_msix.enabled && !pi->pi_msi.enabled); -} - -void -pci_generate_msix(struct pci_devinst *pi, int index) -{ - struct msix_table_entry *mte; - - if (!pci_msix_enabled(pi)) - return; - - if (pi->pi_msix.function_mask) - return; - - if (index >= pi->pi_msix.table_count) - return; - - mte = &pi->pi_msix.table[index]; - if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { - /* XXX Set PBA bit if interrupt is disabled */ - vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); - } -} - -void -pci_generate_msi(struct pci_devinst *pi, int index) -{ - - if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { - vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, - pi->pi_msi.msg_data + index); - } -} - -static bool -pci_lintr_permitted(struct pci_devinst *pi) -{ - uint16_t cmd; - - cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); - return (!(pi->pi_msi.enabled || pi->pi_msix.enabled || - (cmd & PCIM_CMD_INTxDIS))); -} - -void -pci_lintr_request(struct pci_devinst *pi) -{ - struct businfo *bi; - struct slotinfo *si; - int bestpin, bestcount, pin; - - bi = pci_businfo[pi->pi_bus]; - assert(bi != NULL); - - /* - * Just allocate a pin from our slot. The pin will be - * assigned IRQs later when interrupts are routed. - */ - si = &bi->slotinfo[pi->pi_slot]; - bestpin = 0; - bestcount = si->si_intpins[0].ii_count; - for (pin = 1; pin < 4; pin++) { - if (si->si_intpins[pin].ii_count < bestcount) { - bestpin = pin; - bestcount = si->si_intpins[pin].ii_count; - } - } - - si->si_intpins[bestpin].ii_count++; - pi->pi_lintr.pin = bestpin + 1; - pci_set_cfgdata8(pi, PCIR_INTPIN, bestpin + 1); -} - -static void -pci_lintr_route(struct pci_devinst *pi) -{ - struct businfo *bi; - struct intxinfo *ii; - - if (pi->pi_lintr.pin == 0) - return; - - bi = pci_businfo[pi->pi_bus]; - assert(bi != NULL); - ii = &bi->slotinfo[pi->pi_slot].si_intpins[pi->pi_lintr.pin - 1]; - - /* - * Attempt to allocate an I/O APIC pin for this intpin if one - * is not yet assigned. - */ - if (ii->ii_ioapic_irq == 0) - ii->ii_ioapic_irq = ioapic_pci_alloc_irq(); - assert(ii->ii_ioapic_irq > 0); - - /* - * Attempt to allocate a PIRQ pin for this intpin if one is - * not yet assigned. - */ - if (ii->ii_pirq_pin == 0) - ii->ii_pirq_pin = pirq_alloc_pin(pi->pi_vmctx); - assert(ii->ii_pirq_pin > 0); - - pi->pi_lintr.ioapic_irq = ii->ii_ioapic_irq; - pi->pi_lintr.pirq_pin = ii->ii_pirq_pin; - pci_set_cfgdata8(pi, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin)); -} - -void -pci_lintr_assert(struct pci_devinst *pi) -{ - - assert(pi->pi_lintr.pin > 0); - - pthread_mutex_lock(&pi->pi_lintr.lock); - if (pi->pi_lintr.state == IDLE) { - if (pci_lintr_permitted(pi)) { - pi->pi_lintr.state = ASSERTED; - pci_irq_assert(pi); - } else - pi->pi_lintr.state = PENDING; - } - pthread_mutex_unlock(&pi->pi_lintr.lock); -} - -void -pci_lintr_deassert(struct pci_devinst *pi) -{ - - assert(pi->pi_lintr.pin > 0); - - pthread_mutex_lock(&pi->pi_lintr.lock); - if (pi->pi_lintr.state == ASSERTED) { - pi->pi_lintr.state = IDLE; - pci_irq_deassert(pi); - } else if (pi->pi_lintr.state == PENDING) - pi->pi_lintr.state = IDLE; - pthread_mutex_unlock(&pi->pi_lintr.lock); -} - -static void -pci_lintr_update(struct pci_devinst *pi) -{ - - pthread_mutex_lock(&pi->pi_lintr.lock); - if (pi->pi_lintr.state == ASSERTED && !pci_lintr_permitted(pi)) { - pci_irq_deassert(pi); - pi->pi_lintr.state = PENDING; - } else if (pi->pi_lintr.state == PENDING && pci_lintr_permitted(pi)) { - pi->pi_lintr.state = ASSERTED; - pci_irq_assert(pi); - } - pthread_mutex_unlock(&pi->pi_lintr.lock); -} - -int -pci_count_lintr(int bus) -{ - int count, slot, pin; - struct slotinfo *slotinfo; - - count = 0; - if (pci_businfo[bus] != NULL) { - for (slot = 0; slot < MAXSLOTS; slot++) { - slotinfo = &pci_businfo[bus]->slotinfo[slot]; - for (pin = 0; pin < 4; pin++) { - if (slotinfo->si_intpins[pin].ii_count != 0) - count++; - } - } - } - return (count); -} - -void -pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) -{ - struct businfo *bi; - struct slotinfo *si; - struct intxinfo *ii; - int slot, pin; - - if ((bi = pci_businfo[bus]) == NULL) - return; - - for (slot = 0; slot < MAXSLOTS; slot++) { - si = &bi->slotinfo[slot]; - for (pin = 0; pin < 4; pin++) { - ii = &si->si_intpins[pin]; - if (ii->ii_count != 0) - cb(bus, slot, pin + 1, ii->ii_pirq_pin, - ii->ii_ioapic_irq, arg); - } - } -} - -/* - * Return 1 if the emulated device in 'slot' is a multi-function device. - * Return 0 otherwise. - */ -static int -pci_emul_is_mfdev(int bus, int slot) -{ - struct businfo *bi; - struct slotinfo *si; - int f, numfuncs; - - numfuncs = 0; - if ((bi = pci_businfo[bus]) != NULL) { - si = &bi->slotinfo[slot]; - for (f = 0; f < MAXFUNCS; f++) { - if (si->si_funcs[f].fi_devi != NULL) { - numfuncs++; - } - } - } - return (numfuncs > 1); -} - -/* - * Ensure that the PCIM_MFDEV bit is properly set (or unset) depending on - * whether or not is a multi-function being emulated in the pci 'slot'. - */ -static void -pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) -{ - int mfdev; - - if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { - mfdev = pci_emul_is_mfdev(bus, slot); - switch (bytes) { - case 1: - case 2: - *rv &= ~PCIM_MFDEV; - if (mfdev) { - *rv |= PCIM_MFDEV; - } - break; - case 4: - *rv &= ~(PCIM_MFDEV << 16); - if (mfdev) { - *rv |= (PCIM_MFDEV << 16); - } - break; - } - } -} - -static void -pci_emul_cmdsts_write(struct pci_devinst *pi, int coff, uint32_t new, int bytes) -{ - int i, rshift; - uint32_t cmd, cmd2, changed, old, readonly; - - cmd = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */ - - /* - * From PCI Local Bus Specification 3.0 sections 6.2.2 and 6.2.3. - * - * XXX Bits 8, 11, 12, 13, 14 and 15 in the status register are - * 'write 1 to clear'. However these bits are not set to '1' by - * any device emulation so it is simpler to treat them as readonly. - */ - rshift = (coff & 0x3) * 8; - readonly = 0xFFFFF880 >> rshift; - - old = CFGREAD(pi, coff, bytes); - new &= ~readonly; - new |= (old & readonly); - CFGWRITE(pi, coff, new, bytes); /* update config */ - - cmd2 = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */ - changed = cmd ^ cmd2; - - /* - * If the MMIO or I/O address space decoding has changed then - * register/unregister all BARs that decode that address space. - */ - for (i = 0; i <= PCI_BARMAX; i++) { - switch (pi->pi_bar[i].type) { - case PCIBAR_NONE: - case PCIBAR_MEMHI64: - break; - case PCIBAR_IO: - /* I/O address space decoding changed? */ - if (changed & PCIM_CMD_PORTEN) { - if (porten(pi)) - register_bar(pi, i); - else - unregister_bar(pi, i); - } - break; - case PCIBAR_MEM32: - case PCIBAR_MEM64: - /* MMIO address space decoding changed? */ - if (changed & PCIM_CMD_MEMEN) { - if (memen(pi)) - register_bar(pi, i); - else - unregister_bar(pi, i); - } - break; - default: - assert(0); - } - } - - /* - * If INTx has been unmasked and is pending, assert the - * interrupt. - */ - pci_lintr_update(pi); -} - -static void -pci_cfgrw(struct vmctx *ctx, int vcpu, int in, int bus, int slot, int func, - int coff, int bytes, uint32_t *eax) -{ - struct businfo *bi; - struct slotinfo *si; - struct pci_devinst *pi; - struct pci_devemu *pe; - int idx, needcfg; - uint64_t addr, bar, mask; - - if ((bi = pci_businfo[bus]) != NULL) { - si = &bi->slotinfo[slot]; - pi = si->si_funcs[func].fi_devi; - } else - pi = NULL; - - /* - * Just return if there is no device at this slot:func or if the - * the guest is doing an un-aligned access. - */ - if (pi == NULL || (bytes != 1 && bytes != 2 && bytes != 4) || - (coff & (bytes - 1)) != 0) { - if (in) - *eax = 0xffffffff; - return; - } - - /* - * Ignore all writes beyond the standard config space and return all - * ones on reads. - */ - if (coff >= PCI_REGMAX + 1) { - if (in) { - *eax = 0xffffffff; - /* - * Extended capabilities begin at offset 256 in config - * space. Absence of extended capabilities is signaled - * with all 0s in the extended capability header at - * offset 256. - */ - if (coff <= PCI_REGMAX + 4) - *eax = 0x00000000; - } - return; - } - - pe = pi->pi_d; - - /* - * Config read - */ - if (in) { - /* Let the device emulation override the default handler */ - if (pe->pe_cfgread != NULL) { - needcfg = pe->pe_cfgread(ctx, vcpu, pi, coff, bytes, - eax); - } else { - needcfg = 1; - } - - if (needcfg) - *eax = CFGREAD(pi, coff, bytes); - - pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); - } else { - /* Let the device emulation override the default handler */ - if (pe->pe_cfgwrite != NULL && - (*pe->pe_cfgwrite)(ctx, vcpu, pi, coff, bytes, *eax) == 0) - return; - - /* - * Special handling for write to BAR registers - */ - if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) { - /* - * Ignore writes to BAR registers that are not - * 4-byte aligned. - */ - if (bytes != 4 || (coff & 0x3) != 0) - return; - idx = (coff - PCIR_BAR(0)) / 4; - mask = ~(pi->pi_bar[idx].size - 1); - switch (pi->pi_bar[idx].type) { - case PCIBAR_NONE: - pi->pi_bar[idx].addr = bar = 0; - break; - case PCIBAR_IO: - addr = *eax & mask; - addr &= 0xffff; - bar = addr | PCIM_BAR_IO_SPACE; - /* - * Register the new BAR value for interception - */ - if (addr != pi->pi_bar[idx].addr) { - update_bar_address(pi, addr, idx, - PCIBAR_IO); - } - break; - case PCIBAR_MEM32: - addr = bar = *eax & mask; - bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; - if (addr != pi->pi_bar[idx].addr) { - update_bar_address(pi, addr, idx, - PCIBAR_MEM32); - } - break; - case PCIBAR_MEM64: - addr = bar = *eax & mask; - bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | - PCIM_BAR_MEM_PREFETCH; - if (addr != (uint32_t)pi->pi_bar[idx].addr) { - update_bar_address(pi, addr, idx, - PCIBAR_MEM64); - } - break; - case PCIBAR_MEMHI64: - mask = ~(pi->pi_bar[idx - 1].size - 1); - addr = ((uint64_t)*eax << 32) & mask; - bar = addr >> 32; - if (bar != pi->pi_bar[idx - 1].addr >> 32) { - update_bar_address(pi, addr, idx - 1, - PCIBAR_MEMHI64); - } - break; - default: - assert(0); - } - pci_set_cfgdata32(pi, coff, bar); - - } else if (pci_emul_iscap(pi, coff)) { - pci_emul_capwrite(pi, coff, bytes, *eax); - } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { - pci_emul_cmdsts_write(pi, coff, *eax, bytes); - } else { - CFGWRITE(pi, coff, *eax, bytes); - } - } -} - -static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; - -static int -pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - uint32_t x; - - if (bytes != 4) { - if (in) - *eax = (bytes == 2) ? 0xffff : 0xff; - return (0); - } - - if (in) { - x = (cfgbus << 16) | (cfgslot << 11) | (cfgfunc << 8) | cfgoff; - if (cfgenable) - x |= CONF1_ENABLE; - *eax = x; - } else { - x = *eax; - cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; - cfgoff = x & PCI_REGMAX; - cfgfunc = (x >> 8) & PCI_FUNCMAX; - cfgslot = (x >> 11) & PCI_SLOTMAX; - cfgbus = (x >> 16) & PCI_BUSMAX; - } - - return (0); -} -INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_INOUT, pci_emul_cfgaddr); - -static int -pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - int coff; - - assert(bytes == 1 || bytes == 2 || bytes == 4); - - coff = cfgoff + (port - CONF1_DATA_PORT); - if (cfgenable) { - pci_cfgrw(ctx, vcpu, in, cfgbus, cfgslot, cfgfunc, coff, bytes, - eax); - } else { - /* Ignore accesses to cfgdata if not enabled by cfgaddr */ - if (in) - *eax = 0xffffffff; - } - return (0); -} - -INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+0, IOPORT_F_INOUT, pci_emul_cfgdata); -INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+1, IOPORT_F_INOUT, pci_emul_cfgdata); -INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+2, IOPORT_F_INOUT, pci_emul_cfgdata); -INOUT_PORT(pci_cfgdata, CONF1_DATA_PORT+3, IOPORT_F_INOUT, pci_emul_cfgdata); - -#define PCI_EMUL_TEST -#ifdef PCI_EMUL_TEST -/* - * Define a dummy test device - */ -#define DIOSZ 8 -#define DMEMSZ 4096 -struct pci_emul_dsoftc { - uint8_t ioregs[DIOSZ]; - uint8_t memregs[2][DMEMSZ]; -}; - -#define PCI_EMUL_MSI_MSGS 4 -#define PCI_EMUL_MSIX_MSGS 16 - -static int -pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts) -{ - int error; - struct pci_emul_dsoftc *sc; - - sc = calloc(1, sizeof(struct pci_emul_dsoftc)); - - pi->pi_arg = sc; - - pci_set_cfgdata16(pi, PCIR_DEVICE, 0x0001); - pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD); - pci_set_cfgdata8(pi, PCIR_CLASS, 0x02); - - error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS); - assert(error == 0); - - error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ); - assert(error == 0); - - error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ); - assert(error == 0); - - error = pci_emul_alloc_bar(pi, 2, PCIBAR_MEM32, DMEMSZ); - assert(error == 0); - - return (0); -} - -static void -pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, - uint64_t offset, int size, uint64_t value) -{ - int i; - struct pci_emul_dsoftc *sc = pi->pi_arg; - - if (baridx == 0) { - if (offset + size > DIOSZ) { - printf("diow: iow too large, offset %ld size %d\n", - offset, size); - return; - } - - if (size == 1) { - sc->ioregs[offset] = value & 0xff; - } else if (size == 2) { - *(uint16_t *)&sc->ioregs[offset] = value & 0xffff; - } else if (size == 4) { - *(uint32_t *)&sc->ioregs[offset] = value; - } else { - printf("diow: iow unknown size %d\n", size); - } - - /* - * Special magic value to generate an interrupt - */ - if (offset == 4 && size == 4 && pci_msi_enabled(pi)) - pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); - - if (value == 0xabcdef) { - for (i = 0; i < pci_msi_maxmsgnum(pi); i++) - pci_generate_msi(pi, i); - } - } - - if (baridx == 1 || baridx == 2) { - if (offset + size > DMEMSZ) { - printf("diow: memw too large, offset %ld size %d\n", - offset, size); - return; - } - - i = baridx - 1; /* 'memregs' index */ - - if (size == 1) { - sc->memregs[i][offset] = value; - } else if (size == 2) { - *(uint16_t *)&sc->memregs[i][offset] = value; - } else if (size == 4) { - *(uint32_t *)&sc->memregs[i][offset] = value; - } else if (size == 8) { - *(uint64_t *)&sc->memregs[i][offset] = value; - } else { - printf("diow: memw unknown size %d\n", size); - } - - /* - * magic interrupt ?? - */ - } - - if (baridx > 2 || baridx < 0) { - printf("diow: unknown bar idx %d\n", baridx); - } -} - -static uint64_t -pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, - uint64_t offset, int size) -{ - struct pci_emul_dsoftc *sc = pi->pi_arg; - uint32_t value; - int i; - - if (baridx == 0) { - if (offset + size > DIOSZ) { - printf("dior: ior too large, offset %ld size %d\n", - offset, size); - return (0); - } - - value = 0; - if (size == 1) { - value = sc->ioregs[offset]; - } else if (size == 2) { - value = *(uint16_t *) &sc->ioregs[offset]; - } else if (size == 4) { - value = *(uint32_t *) &sc->ioregs[offset]; - } else { - printf("dior: ior unknown size %d\n", size); - } - } - - if (baridx == 1 || baridx == 2) { - if (offset + size > DMEMSZ) { - printf("dior: memr too large, offset %ld size %d\n", - offset, size); - return (0); - } - - i = baridx - 1; /* 'memregs' index */ - - if (size == 1) { - value = sc->memregs[i][offset]; - } else if (size == 2) { - value = *(uint16_t *) &sc->memregs[i][offset]; - } else if (size == 4) { - value = *(uint32_t *) &sc->memregs[i][offset]; - } else if (size == 8) { - value = *(uint64_t *) &sc->memregs[i][offset]; - } else { - printf("dior: ior unknown size %d\n", size); - } - } - - - if (baridx > 2 || baridx < 0) { - printf("dior: unknown bar idx %d\n", baridx); - return (0); - } - - return (value); -} - -struct pci_devemu pci_dummy = { - .pe_emu = "dummy", - .pe_init = pci_emul_dinit, - .pe_barwrite = pci_emul_diow, - .pe_barread = pci_emul_dior -}; -PCI_EMUL_SET(pci_dummy); - -#endif /* PCI_EMUL_TEST */ diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h deleted file mode 100644 index d6e5490..0000000 --- a/usr.sbin/bhyve/pci_emul.h +++ /dev/null @@ -1,285 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _PCI_EMUL_H_ -#define _PCI_EMUL_H_ - -#include <sys/types.h> -#include <sys/queue.h> -#include <sys/kernel.h> -#include <sys/_pthreadtypes.h> - -#include <dev/pci/pcireg.h> - -#include <assert.h> - -#define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */ - -struct vmctx; -struct pci_devinst; -struct memory_region; - -struct pci_devemu { - char *pe_emu; /* Name of device emulation */ - - /* instance creation */ - int (*pe_init)(struct vmctx *, struct pci_devinst *, - char *opts); - - /* ACPI DSDT enumeration */ - void (*pe_write_dsdt)(struct pci_devinst *); - - /* config space read/write callbacks */ - int (*pe_cfgwrite)(struct vmctx *ctx, int vcpu, - struct pci_devinst *pi, int offset, - int bytes, uint32_t val); - int (*pe_cfgread)(struct vmctx *ctx, int vcpu, - struct pci_devinst *pi, int offset, - int bytes, uint32_t *retval); - - /* BAR read/write callbacks */ - void (*pe_barwrite)(struct vmctx *ctx, int vcpu, - struct pci_devinst *pi, int baridx, - uint64_t offset, int size, uint64_t value); - uint64_t (*pe_barread)(struct vmctx *ctx, int vcpu, - struct pci_devinst *pi, int baridx, - uint64_t offset, int size); -}; -#define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x); - -enum pcibar_type { - PCIBAR_NONE, - PCIBAR_IO, - PCIBAR_MEM32, - PCIBAR_MEM64, - PCIBAR_MEMHI64 -}; - -struct pcibar { - enum pcibar_type type; /* io or memory */ - uint64_t size; - uint64_t addr; -}; - -#define PI_NAMESZ 40 - -struct msix_table_entry { - uint64_t addr; - uint32_t msg_data; - uint32_t vector_control; -} __packed; - -/* - * In case the structure is modified to hold extra information, use a define - * for the size that should be emulated. - */ -#define MSIX_TABLE_ENTRY_SIZE 16 -#define MAX_MSIX_TABLE_ENTRIES 2048 -#define PBA_SIZE(msgnum) (roundup2((msgnum), 64) / 8) - -enum lintr_stat { - IDLE, - ASSERTED, - PENDING -}; - -struct pci_devinst { - struct pci_devemu *pi_d; - struct vmctx *pi_vmctx; - uint8_t pi_bus, pi_slot, pi_func; - char pi_name[PI_NAMESZ]; - int pi_bar_getsize; - int pi_prevcap; - int pi_capend; - - struct { - int8_t pin; - enum lintr_stat state; - int pirq_pin; - int ioapic_irq; - pthread_mutex_t lock; - } pi_lintr; - - struct { - int enabled; - uint64_t addr; - uint64_t msg_data; - int maxmsgnum; - } pi_msi; - - struct { - int enabled; - int table_bar; - int pba_bar; - uint32_t table_offset; - int table_count; - uint32_t pba_offset; - int pba_size; - int function_mask; - struct msix_table_entry *table; /* allocated at runtime */ - void *pba_page; - int pba_page_offset; - } pi_msix; - - void *pi_arg; /* devemu-private data */ - - u_char pi_cfgdata[PCI_REGMAX + 1]; - struct pcibar pi_bar[PCI_BARMAX + 1]; -}; - -struct msicap { - uint8_t capid; - uint8_t nextptr; - uint16_t msgctrl; - uint32_t addrlo; - uint32_t addrhi; - uint16_t msgdata; -} __packed; - -struct msixcap { - uint8_t capid; - uint8_t nextptr; - uint16_t msgctrl; - uint32_t table_info; /* bar index and offset within it */ - uint32_t pba_info; /* bar index and offset within it */ -} __packed; - -struct pciecap { - uint8_t capid; - uint8_t nextptr; - uint16_t pcie_capabilities; - - uint32_t dev_capabilities; /* all devices */ - uint16_t dev_control; - uint16_t dev_status; - - uint32_t link_capabilities; /* devices with links */ - uint16_t link_control; - uint16_t link_status; - - uint32_t slot_capabilities; /* ports with slots */ - uint16_t slot_control; - uint16_t slot_status; - - uint16_t root_control; /* root ports */ - uint16_t root_capabilities; - uint32_t root_status; - - uint32_t dev_capabilities2; /* all devices */ - uint16_t dev_control2; - uint16_t dev_status2; - - uint32_t link_capabilities2; /* devices with links */ - uint16_t link_control2; - uint16_t link_status2; - - uint32_t slot_capabilities2; /* ports with slots */ - uint16_t slot_control2; - uint16_t slot_status2; -} __packed; - -typedef void (*pci_lintr_cb)(int b, int s, int pin, int pirq_pin, - int ioapic_irq, void *arg); - -int init_pci(struct vmctx *ctx); -void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, - int bytes, uint32_t val); -void msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, - int bytes, uint32_t val); -void pci_callback(void); -int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, - enum pcibar_type type, uint64_t size); -int pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, - uint64_t hostbase, enum pcibar_type type, uint64_t size); -int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum); -int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type); -void pci_generate_msi(struct pci_devinst *pi, int msgnum); -void pci_generate_msix(struct pci_devinst *pi, int msgnum); -void pci_lintr_assert(struct pci_devinst *pi); -void pci_lintr_deassert(struct pci_devinst *pi); -void pci_lintr_request(struct pci_devinst *pi); -int pci_msi_enabled(struct pci_devinst *pi); -int pci_msix_enabled(struct pci_devinst *pi); -int pci_msix_table_bar(struct pci_devinst *pi); -int pci_msix_pba_bar(struct pci_devinst *pi); -int pci_msi_msgnum(struct pci_devinst *pi); -int pci_parse_slot(char *opt); -void pci_populate_msicap(struct msicap *cap, int msgs, int nextptr); -int pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum); -int pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, - uint64_t value); -uint64_t pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size); -int pci_count_lintr(int bus); -void pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg); -void pci_write_dsdt(void); -uint64_t pci_ecfg_base(void); -int pci_bus_configured(int bus); - -static __inline void -pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val) -{ - assert(offset <= PCI_REGMAX); - *(uint8_t *)(pi->pi_cfgdata + offset) = val; -} - -static __inline void -pci_set_cfgdata16(struct pci_devinst *pi, int offset, uint16_t val) -{ - assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0); - *(uint16_t *)(pi->pi_cfgdata + offset) = val; -} - -static __inline void -pci_set_cfgdata32(struct pci_devinst *pi, int offset, uint32_t val) -{ - assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0); - *(uint32_t *)(pi->pi_cfgdata + offset) = val; -} - -static __inline uint8_t -pci_get_cfgdata8(struct pci_devinst *pi, int offset) -{ - assert(offset <= PCI_REGMAX); - return (*(uint8_t *)(pi->pi_cfgdata + offset)); -} - -static __inline uint16_t -pci_get_cfgdata16(struct pci_devinst *pi, int offset) -{ - assert(offset <= (PCI_REGMAX - 1) && (offset & 1) == 0); - return (*(uint16_t *)(pi->pi_cfgdata + offset)); -} - -static __inline uint32_t -pci_get_cfgdata32(struct pci_devinst *pi, int offset) -{ - assert(offset <= (PCI_REGMAX - 3) && (offset & 3) == 0); - return (*(uint32_t *)(pi->pi_cfgdata + offset)); -} - -#endif /* _PCI_EMUL_H_ */ diff --git a/usr.sbin/bhyve/pci_hostbridge.c b/usr.sbin/bhyve/pci_hostbridge.c deleted file mode 100644 index 5c9ea28..0000000 --- a/usr.sbin/bhyve/pci_hostbridge.c +++ /dev/null @@ -1,70 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include "pci_emul.h" - -static int -pci_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) -{ - - /* config space */ - pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1275); /* NetApp */ - pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1275); /* NetApp */ - pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL); - pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE); - pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_HOST); - - pci_emul_add_pciecap(pi, PCIEM_TYPE_ROOT_PORT); - - return (0); -} - -static int -pci_amd_hostbridge_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) -{ - (void) pci_hostbridge_init(ctx, pi, opts); - pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1022); /* AMD */ - pci_set_cfgdata16(pi, PCIR_DEVICE, 0x7432); /* made up */ - - return (0); -} - -struct pci_devemu pci_de_amd_hostbridge = { - .pe_emu = "amd_hostbridge", - .pe_init = pci_amd_hostbridge_init, -}; -PCI_EMUL_SET(pci_de_amd_hostbridge); - -struct pci_devemu pci_de_hostbridge = { - .pe_emu = "hostbridge", - .pe_init = pci_hostbridge_init, -}; -PCI_EMUL_SET(pci_de_hostbridge); diff --git a/usr.sbin/bhyve/pci_irq.c b/usr.sbin/bhyve/pci_irq.c deleted file mode 100644 index f22b15c..0000000 --- a/usr.sbin/bhyve/pci_irq.c +++ /dev/null @@ -1,346 +0,0 @@ -/*- - * Copyright (c) 2014 Hudson River Trading LLC - * Written by: John H. Baldwin <jhb@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <machine/vmm.h> - -#include <assert.h> -#include <pthread.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <vmmapi.h> - -#include "acpi.h" -#include "inout.h" -#include "pci_emul.h" -#include "pci_irq.h" -#include "pci_lpc.h" - -/* - * Implement an 8 pin PCI interrupt router compatible with the router - * present on Intel's ICH10 chip. - */ - -/* Fields in each PIRQ register. */ -#define PIRQ_DIS 0x80 -#define PIRQ_IRQ 0x0f - -/* Only IRQs 3-7, 9-12, and 14-15 are permitted. */ -#define PERMITTED_IRQS 0xdef8 -#define IRQ_PERMITTED(irq) (((1U << (irq)) & PERMITTED_IRQS) != 0) - -/* IRQ count to disable an IRQ. */ -#define IRQ_DISABLED 0xff - -static struct pirq { - uint8_t reg; - int use_count; - int active_count; - pthread_mutex_t lock; -} pirqs[8]; - -static u_char irq_counts[16]; -static int pirq_cold = 1; - -/* - * Returns true if this pin is enabled with a valid IRQ. Setting the - * register to a reserved IRQ causes interrupts to not be asserted as - * if the pin was disabled. - */ -static bool -pirq_valid_irq(int reg) -{ - - if (reg & PIRQ_DIS) - return (false); - return (IRQ_PERMITTED(reg & PIRQ_IRQ)); -} - -uint8_t -pirq_read(int pin) -{ - - assert(pin > 0 && pin <= nitems(pirqs)); - return (pirqs[pin - 1].reg); -} - -void -pirq_write(struct vmctx *ctx, int pin, uint8_t val) -{ - struct pirq *pirq; - - assert(pin > 0 && pin <= nitems(pirqs)); - pirq = &pirqs[pin - 1]; - pthread_mutex_lock(&pirq->lock); - if (pirq->reg != (val & (PIRQ_DIS | PIRQ_IRQ))) { - if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg)) - vm_isa_deassert_irq(ctx, pirq->reg & PIRQ_IRQ, -1); - pirq->reg = val & (PIRQ_DIS | PIRQ_IRQ); - if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg)) - vm_isa_assert_irq(ctx, pirq->reg & PIRQ_IRQ, -1); - } - pthread_mutex_unlock(&pirq->lock); -} - -void -pci_irq_reserve(int irq) -{ - - assert(irq >= 0 && irq < nitems(irq_counts)); - assert(pirq_cold); - assert(irq_counts[irq] == 0 || irq_counts[irq] == IRQ_DISABLED); - irq_counts[irq] = IRQ_DISABLED; -} - -void -pci_irq_use(int irq) -{ - - assert(irq >= 0 && irq < nitems(irq_counts)); - assert(pirq_cold); - assert(irq_counts[irq] != IRQ_DISABLED); - irq_counts[irq]++; -} - -void -pci_irq_init(struct vmctx *ctx) -{ - int i; - - for (i = 0; i < nitems(pirqs); i++) { - pirqs[i].reg = PIRQ_DIS; - pirqs[i].use_count = 0; - pirqs[i].active_count = 0; - pthread_mutex_init(&pirqs[i].lock, NULL); - } - for (i = 0; i < nitems(irq_counts); i++) { - if (IRQ_PERMITTED(i)) - irq_counts[i] = 0; - else - irq_counts[i] = IRQ_DISABLED; - } -} - -void -pci_irq_assert(struct pci_devinst *pi) -{ - struct pirq *pirq; - - if (pi->pi_lintr.pirq_pin > 0) { - assert(pi->pi_lintr.pirq_pin <= nitems(pirqs)); - pirq = &pirqs[pi->pi_lintr.pirq_pin - 1]; - pthread_mutex_lock(&pirq->lock); - pirq->active_count++; - if (pirq->active_count == 1 && pirq_valid_irq(pirq->reg)) { - vm_isa_assert_irq(pi->pi_vmctx, pirq->reg & PIRQ_IRQ, - pi->pi_lintr.ioapic_irq); - pthread_mutex_unlock(&pirq->lock); - return; - } - pthread_mutex_unlock(&pirq->lock); - } - vm_ioapic_assert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); -} - -void -pci_irq_deassert(struct pci_devinst *pi) -{ - struct pirq *pirq; - - if (pi->pi_lintr.pirq_pin > 0) { - assert(pi->pi_lintr.pirq_pin <= nitems(pirqs)); - pirq = &pirqs[pi->pi_lintr.pirq_pin - 1]; - pthread_mutex_lock(&pirq->lock); - pirq->active_count--; - if (pirq->active_count == 0 && pirq_valid_irq(pirq->reg)) { - vm_isa_deassert_irq(pi->pi_vmctx, pirq->reg & PIRQ_IRQ, - pi->pi_lintr.ioapic_irq); - pthread_mutex_unlock(&pirq->lock); - return; - } - pthread_mutex_unlock(&pirq->lock); - } - vm_ioapic_deassert_irq(pi->pi_vmctx, pi->pi_lintr.ioapic_irq); -} - -int -pirq_alloc_pin(struct vmctx *ctx) -{ - int best_count, best_irq, best_pin, irq, pin; - - pirq_cold = 0; - - /* First, find the least-used PIRQ pin. */ - best_pin = 0; - best_count = pirqs[0].use_count; - for (pin = 1; pin < nitems(pirqs); pin++) { - if (pirqs[pin].use_count < best_count) { - best_pin = pin; - best_count = pirqs[pin].use_count; - } - } - pirqs[best_pin].use_count++; - - /* Second, route this pin to an IRQ. */ - if (pirqs[best_pin].reg == PIRQ_DIS) { - best_irq = -1; - best_count = 0; - for (irq = 0; irq < nitems(irq_counts); irq++) { - if (irq_counts[irq] == IRQ_DISABLED) - continue; - if (best_irq == -1 || irq_counts[irq] < best_count) { - best_irq = irq; - best_count = irq_counts[irq]; - } - } - assert(best_irq >= 0); - irq_counts[best_irq]++; - pirqs[best_pin].reg = best_irq; - vm_isa_set_irq_trigger(ctx, best_irq, LEVEL_TRIGGER); - } - - return (best_pin + 1); -} - -int -pirq_irq(int pin) -{ - assert(pin > 0 && pin <= nitems(pirqs)); - return (pirqs[pin - 1].reg & PIRQ_IRQ); -} - -/* XXX: Generate $PIR table. */ - -static void -pirq_dsdt(void) -{ - char *irq_prs, *old; - int irq, pin; - - irq_prs = NULL; - for (irq = 0; irq < nitems(irq_counts); irq++) { - if (!IRQ_PERMITTED(irq)) - continue; - if (irq_prs == NULL) - asprintf(&irq_prs, "%d", irq); - else { - old = irq_prs; - asprintf(&irq_prs, "%s,%d", old, irq); - free(old); - } - } - - /* - * A helper method to validate a link register's value. This - * duplicates pirq_valid_irq(). - */ - dsdt_line(""); - dsdt_line("Method (PIRV, 1, NotSerialized)"); - dsdt_line("{"); - dsdt_line(" If (And (Arg0, 0x%02X))", PIRQ_DIS); - dsdt_line(" {"); - dsdt_line(" Return (0x00)"); - dsdt_line(" }"); - dsdt_line(" And (Arg0, 0x%02X, Local0)", PIRQ_IRQ); - dsdt_line(" If (LLess (Local0, 0x03))"); - dsdt_line(" {"); - dsdt_line(" Return (0x00)"); - dsdt_line(" }"); - dsdt_line(" If (LEqual (Local0, 0x08))"); - dsdt_line(" {"); - dsdt_line(" Return (0x00)"); - dsdt_line(" }"); - dsdt_line(" If (LEqual (Local0, 0x0D))"); - dsdt_line(" {"); - dsdt_line(" Return (0x00)"); - dsdt_line(" }"); - dsdt_line(" Return (0x01)"); - dsdt_line("}"); - - for (pin = 0; pin < nitems(pirqs); pin++) { - dsdt_line(""); - dsdt_line("Device (LNK%c)", 'A' + pin); - dsdt_line("{"); - dsdt_line(" Name (_HID, EisaId (\"PNP0C0F\"))"); - dsdt_line(" Name (_UID, 0x%02X)", pin + 1); - dsdt_line(" Method (_STA, 0, NotSerialized)"); - dsdt_line(" {"); - dsdt_line(" If (PIRV (PIR%c))", 'A' + pin); - dsdt_line(" {"); - dsdt_line(" Return (0x0B)"); - dsdt_line(" }"); - dsdt_line(" Else"); - dsdt_line(" {"); - dsdt_line(" Return (0x09)"); - dsdt_line(" }"); - dsdt_line(" }"); - dsdt_line(" Name (_PRS, ResourceTemplate ()"); - dsdt_line(" {"); - dsdt_line(" IRQ (Level, ActiveLow, Shared, )"); - dsdt_line(" {%s}", irq_prs); - dsdt_line(" })"); - dsdt_line(" Name (CB%02X, ResourceTemplate ()", pin + 1); - dsdt_line(" {"); - dsdt_line(" IRQ (Level, ActiveLow, Shared, )"); - dsdt_line(" {}"); - dsdt_line(" })"); - dsdt_line(" CreateWordField (CB%02X, 0x01, CIR%c)", - pin + 1, 'A' + pin); - dsdt_line(" Method (_CRS, 0, NotSerialized)"); - dsdt_line(" {"); - dsdt_line(" And (PIR%c, 0x%02X, Local0)", 'A' + pin, - PIRQ_DIS | PIRQ_IRQ); - dsdt_line(" If (PIRV (Local0))"); - dsdt_line(" {"); - dsdt_line(" ShiftLeft (0x01, Local0, CIR%c)", 'A' + pin); - dsdt_line(" }"); - dsdt_line(" Else"); - dsdt_line(" {"); - dsdt_line(" Store (0x00, CIR%c)", 'A' + pin); - dsdt_line(" }"); - dsdt_line(" Return (CB%02X)", pin + 1); - dsdt_line(" }"); - dsdt_line(" Method (_DIS, 0, NotSerialized)"); - dsdt_line(" {"); - dsdt_line(" Store (0x80, PIR%c)", 'A' + pin); - dsdt_line(" }"); - dsdt_line(" Method (_SRS, 1, NotSerialized)"); - dsdt_line(" {"); - dsdt_line(" CreateWordField (Arg0, 0x01, SIR%c)", 'A' + pin); - dsdt_line(" FindSetRightBit (SIR%c, Local0)", 'A' + pin); - dsdt_line(" Store (Decrement (Local0), PIR%c)", 'A' + pin); - dsdt_line(" }"); - dsdt_line("}"); - } - free(irq_prs); -} -LPC_DSDT(pirq_dsdt); diff --git a/usr.sbin/bhyve/pci_irq.h b/usr.sbin/bhyve/pci_irq.h deleted file mode 100644 index 24f9c99..0000000 --- a/usr.sbin/bhyve/pci_irq.h +++ /dev/null @@ -1,45 +0,0 @@ -/*- - * Copyright (c) 2014 Hudson River Trading LLC - * Written by: John H. Baldwin <jhb@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef __PCI_IRQ_H__ -#define __PCI_IRQ_H__ - -struct pci_devinst; - -void pci_irq_assert(struct pci_devinst *pi); -void pci_irq_deassert(struct pci_devinst *pi); -void pci_irq_init(struct vmctx *ctx); -void pci_irq_reserve(int irq); -void pci_irq_use(int irq); -int pirq_alloc_pin(struct vmctx *ctx); -int pirq_irq(int pin); -uint8_t pirq_read(int pin); -void pirq_write(struct vmctx *ctx, int pin, uint8_t val); - -#endif diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c deleted file mode 100644 index 2203a00..0000000 --- a/usr.sbin/bhyve/pci_lpc.c +++ /dev/null @@ -1,450 +0,0 @@ -/*- - * Copyright (c) 2013 Neel Natu <neel@freebsd.org> - * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> -#include <machine/vmm.h> - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <vmmapi.h> - -#include "acpi.h" -#include "bootrom.h" -#include "inout.h" -#include "pci_emul.h" -#include "pci_irq.h" -#include "pci_lpc.h" -#include "uart_emul.h" - -#define IO_ICU1 0x20 -#define IO_ICU2 0xA0 - -SET_DECLARE(lpc_dsdt_set, struct lpc_dsdt); -SET_DECLARE(lpc_sysres_set, struct lpc_sysres); - -#define ELCR_PORT 0x4d0 -SYSRES_IO(ELCR_PORT, 2); - -#define IO_TIMER1_PORT 0x40 - -#define NMISC_PORT 0x61 -SYSRES_IO(NMISC_PORT, 1); - -static struct pci_devinst *lpc_bridge; - -static const char *romfile; - -#define LPC_UART_NUM 2 -static struct lpc_uart_softc { - struct uart_softc *uart_softc; - const char *opts; - int iobase; - int irq; - int enabled; -} lpc_uart_softc[LPC_UART_NUM]; - -static const char *lpc_uart_names[LPC_UART_NUM] = { "COM1", "COM2" }; - -/* - * LPC device configuration is in the following form: - * <lpc_device_name>[,<options>] - * For e.g. "com1,stdio" or "bootrom,/var/romfile" - */ -int -lpc_device_parse(const char *opts) -{ - int unit, error; - char *str, *cpy, *lpcdev; - - error = -1; - str = cpy = strdup(opts); - lpcdev = strsep(&str, ","); - if (lpcdev != NULL) { - if (strcasecmp(lpcdev, "bootrom") == 0) { - romfile = str; - error = 0; - goto done; - } - for (unit = 0; unit < LPC_UART_NUM; unit++) { - if (strcasecmp(lpcdev, lpc_uart_names[unit]) == 0) { - lpc_uart_softc[unit].opts = str; - error = 0; - goto done; - } - } - } - -done: - if (error) - free(cpy); - - return (error); -} - -const char * -lpc_bootrom(void) -{ - - return (romfile); -} - -static void -lpc_uart_intr_assert(void *arg) -{ - struct lpc_uart_softc *sc = arg; - - assert(sc->irq >= 0); - - vm_isa_pulse_irq(lpc_bridge->pi_vmctx, sc->irq, sc->irq); -} - -static void -lpc_uart_intr_deassert(void *arg) -{ - /* - * The COM devices on the LPC bus generate edge triggered interrupts, - * so nothing more to do here. - */ -} - -static int -lpc_uart_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - int offset; - struct lpc_uart_softc *sc = arg; - - offset = port - sc->iobase; - - switch (bytes) { - case 1: - if (in) - *eax = uart_read(sc->uart_softc, offset); - else - uart_write(sc->uart_softc, offset, *eax); - break; - case 2: - if (in) { - *eax = uart_read(sc->uart_softc, offset); - *eax |= uart_read(sc->uart_softc, offset + 1) << 8; - } else { - uart_write(sc->uart_softc, offset, *eax); - uart_write(sc->uart_softc, offset + 1, *eax >> 8); - } - break; - default: - return (-1); - } - - return (0); -} - -static int -lpc_init(struct vmctx *ctx) -{ - struct lpc_uart_softc *sc; - struct inout_port iop; - const char *name; - int unit, error; - - if (romfile != NULL) { - error = bootrom_init(ctx, romfile); - if (error) - return (error); - } - - /* COM1 and COM2 */ - for (unit = 0; unit < LPC_UART_NUM; unit++) { - sc = &lpc_uart_softc[unit]; - name = lpc_uart_names[unit]; - - if (uart_legacy_alloc(unit, &sc->iobase, &sc->irq) != 0) { - fprintf(stderr, "Unable to allocate resources for " - "LPC device %s\n", name); - return (-1); - } - pci_irq_reserve(sc->irq); - - sc->uart_softc = uart_init(lpc_uart_intr_assert, - lpc_uart_intr_deassert, sc); - - if (uart_set_backend(sc->uart_softc, sc->opts) != 0) { - fprintf(stderr, "Unable to initialize backend '%s' " - "for LPC device %s\n", sc->opts, name); - return (-1); - } - - bzero(&iop, sizeof(struct inout_port)); - iop.name = name; - iop.port = sc->iobase; - iop.size = UART_IO_BAR_SIZE; - iop.flags = IOPORT_F_INOUT; - iop.handler = lpc_uart_io_handler; - iop.arg = sc; - - error = register_inout(&iop); - assert(error == 0); - sc->enabled = 1; - } - - return (0); -} - -static void -pci_lpc_write_dsdt(struct pci_devinst *pi) -{ - struct lpc_dsdt **ldpp, *ldp; - - dsdt_line(""); - dsdt_line("Device (ISA)"); - dsdt_line("{"); - dsdt_line(" Name (_ADR, 0x%04X%04X)", pi->pi_slot, pi->pi_func); - dsdt_line(" OperationRegion (LPCR, PCI_Config, 0x00, 0x100)"); - dsdt_line(" Field (LPCR, AnyAcc, NoLock, Preserve)"); - dsdt_line(" {"); - dsdt_line(" Offset (0x60),"); - dsdt_line(" PIRA, 8,"); - dsdt_line(" PIRB, 8,"); - dsdt_line(" PIRC, 8,"); - dsdt_line(" PIRD, 8,"); - dsdt_line(" Offset (0x68),"); - dsdt_line(" PIRE, 8,"); - dsdt_line(" PIRF, 8,"); - dsdt_line(" PIRG, 8,"); - dsdt_line(" PIRH, 8"); - dsdt_line(" }"); - dsdt_line(""); - - dsdt_indent(1); - SET_FOREACH(ldpp, lpc_dsdt_set) { - ldp = *ldpp; - ldp->handler(); - } - - dsdt_line(""); - dsdt_line("Device (PIC)"); - dsdt_line("{"); - dsdt_line(" Name (_HID, EisaId (\"PNP0000\"))"); - dsdt_line(" Name (_CRS, ResourceTemplate ()"); - dsdt_line(" {"); - dsdt_indent(2); - dsdt_fixed_ioport(IO_ICU1, 2); - dsdt_fixed_ioport(IO_ICU2, 2); - dsdt_fixed_irq(2); - dsdt_unindent(2); - dsdt_line(" })"); - dsdt_line("}"); - - dsdt_line(""); - dsdt_line("Device (TIMR)"); - dsdt_line("{"); - dsdt_line(" Name (_HID, EisaId (\"PNP0100\"))"); - dsdt_line(" Name (_CRS, ResourceTemplate ()"); - dsdt_line(" {"); - dsdt_indent(2); - dsdt_fixed_ioport(IO_TIMER1_PORT, 4); - dsdt_fixed_irq(0); - dsdt_unindent(2); - dsdt_line(" })"); - dsdt_line("}"); - dsdt_unindent(1); - - dsdt_line("}"); -} - -static void -pci_lpc_sysres_dsdt(void) -{ - struct lpc_sysres **lspp, *lsp; - - dsdt_line(""); - dsdt_line("Device (SIO)"); - dsdt_line("{"); - dsdt_line(" Name (_HID, EisaId (\"PNP0C02\"))"); - dsdt_line(" Name (_CRS, ResourceTemplate ()"); - dsdt_line(" {"); - - dsdt_indent(2); - SET_FOREACH(lspp, lpc_sysres_set) { - lsp = *lspp; - switch (lsp->type) { - case LPC_SYSRES_IO: - dsdt_fixed_ioport(lsp->base, lsp->length); - break; - case LPC_SYSRES_MEM: - dsdt_fixed_mem32(lsp->base, lsp->length); - break; - } - } - dsdt_unindent(2); - - dsdt_line(" })"); - dsdt_line("}"); -} -LPC_DSDT(pci_lpc_sysres_dsdt); - -static void -pci_lpc_uart_dsdt(void) -{ - struct lpc_uart_softc *sc; - int unit; - - for (unit = 0; unit < LPC_UART_NUM; unit++) { - sc = &lpc_uart_softc[unit]; - if (!sc->enabled) - continue; - dsdt_line(""); - dsdt_line("Device (%s)", lpc_uart_names[unit]); - dsdt_line("{"); - dsdt_line(" Name (_HID, EisaId (\"PNP0501\"))"); - dsdt_line(" Name (_UID, %d)", unit + 1); - dsdt_line(" Name (_CRS, ResourceTemplate ()"); - dsdt_line(" {"); - dsdt_indent(2); - dsdt_fixed_ioport(sc->iobase, UART_IO_BAR_SIZE); - dsdt_fixed_irq(sc->irq); - dsdt_unindent(2); - dsdt_line(" })"); - dsdt_line("}"); - } -} -LPC_DSDT(pci_lpc_uart_dsdt); - -static int -pci_lpc_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int coff, int bytes, uint32_t val) -{ - int pirq_pin; - - if (bytes == 1) { - pirq_pin = 0; - if (coff >= 0x60 && coff <= 0x63) - pirq_pin = coff - 0x60 + 1; - if (coff >= 0x68 && coff <= 0x6b) - pirq_pin = coff - 0x68 + 5; - if (pirq_pin != 0) { - pirq_write(ctx, pirq_pin, val); - pci_set_cfgdata8(pi, coff, pirq_read(pirq_pin)); - return (0); - } - } - return (-1); -} - -static void -pci_lpc_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size, uint64_t value) -{ -} - -static uint64_t -pci_lpc_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size) -{ - return (0); -} - -#define LPC_DEV 0x7000 -#define LPC_VENDOR 0x8086 - -static int -pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) -{ - - /* - * Do not allow more than one LPC bridge to be configured. - */ - if (lpc_bridge != NULL) { - fprintf(stderr, "Only one LPC bridge is allowed.\n"); - return (-1); - } - - /* - * Enforce that the LPC can only be configured on bus 0. This - * simplifies the ACPI DSDT because it can provide a decode for - * all legacy i/o ports behind bus 0. - */ - if (pi->pi_bus != 0) { - fprintf(stderr, "LPC bridge can be present only on bus 0.\n"); - return (-1); - } - - if (lpc_init(ctx) != 0) - return (-1); - - /* initialize config space */ - pci_set_cfgdata16(pi, PCIR_DEVICE, LPC_DEV); - pci_set_cfgdata16(pi, PCIR_VENDOR, LPC_VENDOR); - pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE); - pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA); - - lpc_bridge = pi; - - return (0); -} - -char * -lpc_pirq_name(int pin) -{ - char *name; - - if (lpc_bridge == NULL) - return (NULL); - asprintf(&name, "\\_SB.PC00.ISA.LNK%c,", 'A' + pin - 1); - return (name); -} - -void -lpc_pirq_routed(void) -{ - int pin; - - if (lpc_bridge == NULL) - return; - - for (pin = 0; pin < 4; pin++) - pci_set_cfgdata8(lpc_bridge, 0x60 + pin, pirq_read(pin + 1)); - for (pin = 0; pin < 4; pin++) - pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5)); -} - -struct pci_devemu pci_de_lpc = { - .pe_emu = "lpc", - .pe_init = pci_lpc_init, - .pe_write_dsdt = pci_lpc_write_dsdt, - .pe_cfgwrite = pci_lpc_cfgwrite, - .pe_barwrite = pci_lpc_write, - .pe_barread = pci_lpc_read -}; -PCI_EMUL_SET(pci_de_lpc); diff --git a/usr.sbin/bhyve/pci_lpc.h b/usr.sbin/bhyve/pci_lpc.h deleted file mode 100644 index 431f5cf..0000000 --- a/usr.sbin/bhyve/pci_lpc.h +++ /dev/null @@ -1,73 +0,0 @@ -/*- - * Copyright (c) 2013 Neel Natu <neel@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _LPC_H_ -#define _LPC_H_ - -#include <sys/linker_set.h> - -typedef void (*lpc_write_dsdt_t)(void); - -struct lpc_dsdt { - lpc_write_dsdt_t handler; -}; - -#define LPC_DSDT(handler) \ - static struct lpc_dsdt __CONCAT(__lpc_dsdt, __LINE__) = { \ - (handler), \ - }; \ - DATA_SET(lpc_dsdt_set, __CONCAT(__lpc_dsdt, __LINE__)) - -enum lpc_sysres_type { - LPC_SYSRES_IO, - LPC_SYSRES_MEM -}; - -struct lpc_sysres { - enum lpc_sysres_type type; - uint32_t base; - uint32_t length; -}; - -#define LPC_SYSRES(type, base, length) \ - static struct lpc_sysres __CONCAT(__lpc_sysres, __LINE__) = { \ - (type), \ - (base), \ - (length) \ - }; \ - DATA_SET(lpc_sysres_set, __CONCAT(__lpc_sysres, __LINE__)) - -#define SYSRES_IO(base, length) LPC_SYSRES(LPC_SYSRES_IO, base, length) -#define SYSRES_MEM(base, length) LPC_SYSRES(LPC_SYSRES_MEM, base, length) - -int lpc_device_parse(const char *opt); -char *lpc_pirq_name(int pin); -void lpc_pirq_routed(void); -const char *lpc_bootrom(void); - -#endif diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c deleted file mode 100644 index 78c1eae..0000000 --- a/usr.sbin/bhyve/pci_passthru.c +++ /dev/null @@ -1,897 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/types.h> -#include <sys/mman.h> -#include <sys/pciio.h> -#include <sys/ioctl.h> - -#include <dev/io/iodev.h> -#include <dev/pci/pcireg.h> - -#include <machine/iodev.h> - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <err.h> -#include <fcntl.h> -#include <unistd.h> - -#include <machine/vmm.h> -#include <vmmapi.h> -#include "pci_emul.h" -#include "mem.h" - -#ifndef _PATH_DEVPCI -#define _PATH_DEVPCI "/dev/pci" -#endif - -#ifndef _PATH_DEVIO -#define _PATH_DEVIO "/dev/io" -#endif - -#ifndef _PATH_MEM -#define _PATH_MEM "/dev/mem" -#endif - -#define LEGACY_SUPPORT 1 - -#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1) -#define MSIX_CAPLEN 12 - -static int pcifd = -1; -static int iofd = -1; -static int memfd = -1; - -struct passthru_softc { - struct pci_devinst *psc_pi; - struct pcibar psc_bar[PCI_BARMAX + 1]; - struct { - int capoff; - int msgctrl; - int emulated; - } psc_msi; - struct { - int capoff; - } psc_msix; - struct pcisel psc_sel; -}; - -static int -msi_caplen(int msgctrl) -{ - int len; - - len = 10; /* minimum length of msi capability */ - - if (msgctrl & PCIM_MSICTRL_64BIT) - len += 4; - -#if 0 - /* - * Ignore the 'mask' and 'pending' bits in the MSI capability. - * We'll let the guest manipulate them directly. - */ - if (msgctrl & PCIM_MSICTRL_VECTOR) - len += 10; -#endif - - return (len); -} - -static uint32_t -read_config(const struct pcisel *sel, long reg, int width) -{ - struct pci_io pi; - - bzero(&pi, sizeof(pi)); - pi.pi_sel = *sel; - pi.pi_reg = reg; - pi.pi_width = width; - - if (ioctl(pcifd, PCIOCREAD, &pi) < 0) - return (0); /* XXX */ - else - return (pi.pi_data); -} - -static void -write_config(const struct pcisel *sel, long reg, int width, uint32_t data) -{ - struct pci_io pi; - - bzero(&pi, sizeof(pi)); - pi.pi_sel = *sel; - pi.pi_reg = reg; - pi.pi_width = width; - pi.pi_data = data; - - (void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */ -} - -#ifdef LEGACY_SUPPORT -static int -passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr) -{ - int capoff, i; - struct msicap msicap; - u_char *capdata; - - pci_populate_msicap(&msicap, msgnum, nextptr); - - /* - * XXX - * Copy the msi capability structure in the last 16 bytes of the - * config space. This is wrong because it could shadow something - * useful to the device. - */ - capoff = 256 - roundup(sizeof(msicap), 4); - capdata = (u_char *)&msicap; - for (i = 0; i < sizeof(msicap); i++) - pci_set_cfgdata8(pi, capoff + i, capdata[i]); - - return (capoff); -} -#endif /* LEGACY_SUPPORT */ - -static int -cfginitmsi(struct passthru_softc *sc) -{ - int i, ptr, capptr, cap, sts, caplen, table_size; - uint32_t u32; - struct pcisel sel; - struct pci_devinst *pi; - struct msixcap msixcap; - uint32_t *msixcap_ptr; - - pi = sc->psc_pi; - sel = sc->psc_sel; - - /* - * Parse the capabilities and cache the location of the MSI - * and MSI-X capabilities. - */ - sts = read_config(&sel, PCIR_STATUS, 2); - if (sts & PCIM_STATUS_CAPPRESENT) { - ptr = read_config(&sel, PCIR_CAP_PTR, 1); - while (ptr != 0 && ptr != 0xff) { - cap = read_config(&sel, ptr + PCICAP_ID, 1); - if (cap == PCIY_MSI) { - /* - * Copy the MSI capability into the config - * space of the emulated pci device - */ - sc->psc_msi.capoff = ptr; - sc->psc_msi.msgctrl = read_config(&sel, - ptr + 2, 2); - sc->psc_msi.emulated = 0; - caplen = msi_caplen(sc->psc_msi.msgctrl); - capptr = ptr; - while (caplen > 0) { - u32 = read_config(&sel, capptr, 4); - pci_set_cfgdata32(pi, capptr, u32); - caplen -= 4; - capptr += 4; - } - } else if (cap == PCIY_MSIX) { - /* - * Copy the MSI-X capability - */ - sc->psc_msix.capoff = ptr; - caplen = 12; - msixcap_ptr = (uint32_t*) &msixcap; - capptr = ptr; - while (caplen > 0) { - u32 = read_config(&sel, capptr, 4); - *msixcap_ptr = u32; - pci_set_cfgdata32(pi, capptr, u32); - caplen -= 4; - capptr += 4; - msixcap_ptr++; - } - } - ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1); - } - } - - if (sc->psc_msix.capoff != 0) { - pi->pi_msix.pba_bar = - msixcap.pba_info & PCIM_MSIX_BIR_MASK; - pi->pi_msix.pba_offset = - msixcap.pba_info & ~PCIM_MSIX_BIR_MASK; - pi->pi_msix.table_bar = - msixcap.table_info & PCIM_MSIX_BIR_MASK; - pi->pi_msix.table_offset = - msixcap.table_info & ~PCIM_MSIX_BIR_MASK; - pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl); - pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count); - - /* Allocate the emulated MSI-X table array */ - table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; - pi->pi_msix.table = calloc(1, table_size); - - /* Mask all table entries */ - for (i = 0; i < pi->pi_msix.table_count; i++) { - pi->pi_msix.table[i].vector_control |= - PCIM_MSIX_VCTRL_MASK; - } - } - -#ifdef LEGACY_SUPPORT - /* - * If the passthrough device does not support MSI then craft a - * MSI capability for it. We link the new MSI capability at the - * head of the list of capabilities. - */ - if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) { - int origptr, msiptr; - origptr = read_config(&sel, PCIR_CAP_PTR, 1); - msiptr = passthru_add_msicap(pi, 1, origptr); - sc->psc_msi.capoff = msiptr; - sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2); - sc->psc_msi.emulated = 1; - pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr); - } -#endif - - /* Make sure one of the capabilities is present */ - if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) - return (-1); - else - return (0); -} - -static uint64_t -msix_table_read(struct passthru_softc *sc, uint64_t offset, int size) -{ - struct pci_devinst *pi; - struct msix_table_entry *entry; - uint8_t *src8; - uint16_t *src16; - uint32_t *src32; - uint64_t *src64; - uint64_t data; - size_t entry_offset; - int index; - - pi = sc->psc_pi; - if (offset >= pi->pi_msix.pba_offset && - offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { - switch(size) { - case 1: - src8 = (uint8_t *)(pi->pi_msix.pba_page + offset - - pi->pi_msix.pba_page_offset); - data = *src8; - break; - case 2: - src16 = (uint16_t *)(pi->pi_msix.pba_page + offset - - pi->pi_msix.pba_page_offset); - data = *src16; - break; - case 4: - src32 = (uint32_t *)(pi->pi_msix.pba_page + offset - - pi->pi_msix.pba_page_offset); - data = *src32; - break; - case 8: - src64 = (uint64_t *)(pi->pi_msix.pba_page + offset - - pi->pi_msix.pba_page_offset); - data = *src64; - break; - default: - return (-1); - } - return (data); - } - - if (offset < pi->pi_msix.table_offset) - return (-1); - - offset -= pi->pi_msix.table_offset; - index = offset / MSIX_TABLE_ENTRY_SIZE; - if (index >= pi->pi_msix.table_count) - return (-1); - - entry = &pi->pi_msix.table[index]; - entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; - - switch(size) { - case 1: - src8 = (uint8_t *)((void *)entry + entry_offset); - data = *src8; - break; - case 2: - src16 = (uint16_t *)((void *)entry + entry_offset); - data = *src16; - break; - case 4: - src32 = (uint32_t *)((void *)entry + entry_offset); - data = *src32; - break; - case 8: - src64 = (uint64_t *)((void *)entry + entry_offset); - data = *src64; - break; - default: - return (-1); - } - - return (data); -} - -static void -msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc, - uint64_t offset, int size, uint64_t data) -{ - struct pci_devinst *pi; - struct msix_table_entry *entry; - uint8_t *dest8; - uint16_t *dest16; - uint32_t *dest32; - uint64_t *dest64; - size_t entry_offset; - uint32_t vector_control; - int error, index; - - pi = sc->psc_pi; - if (offset >= pi->pi_msix.pba_offset && - offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { - switch(size) { - case 1: - dest8 = (uint8_t *)(pi->pi_msix.pba_page + offset - - pi->pi_msix.pba_page_offset); - *dest8 = data; - break; - case 2: - dest16 = (uint16_t *)(pi->pi_msix.pba_page + offset - - pi->pi_msix.pba_page_offset); - *dest16 = data; - break; - case 4: - dest32 = (uint32_t *)(pi->pi_msix.pba_page + offset - - pi->pi_msix.pba_page_offset); - *dest32 = data; - break; - case 8: - dest64 = (uint64_t *)(pi->pi_msix.pba_page + offset - - pi->pi_msix.pba_page_offset); - *dest64 = data; - break; - default: - break; - } - return; - } - - if (offset < pi->pi_msix.table_offset) - return; - - offset -= pi->pi_msix.table_offset; - index = offset / MSIX_TABLE_ENTRY_SIZE; - if (index >= pi->pi_msix.table_count) - return; - - entry = &pi->pi_msix.table[index]; - entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; - - /* Only 4 byte naturally-aligned writes are supported */ - assert(size == 4); - assert(entry_offset % 4 == 0); - - vector_control = entry->vector_control; - dest32 = (uint32_t *)((void *)entry + entry_offset); - *dest32 = data; - /* If MSI-X hasn't been enabled, do nothing */ - if (pi->pi_msix.enabled) { - /* If the entry is masked, don't set it up */ - if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || - (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { - error = vm_setup_pptdev_msix(ctx, vcpu, - sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, index, entry->addr, - entry->msg_data, entry->vector_control); - } - } -} - -static int -init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) -{ - int b, s, f; - int error, idx; - size_t len, remaining; - uint32_t table_size, table_offset; - uint32_t pba_size, pba_offset; - vm_paddr_t start; - struct pci_devinst *pi = sc->psc_pi; - - assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0); - - b = sc->psc_sel.pc_bus; - s = sc->psc_sel.pc_dev; - f = sc->psc_sel.pc_func; - - /* - * If the MSI-X table BAR maps memory intended for - * other uses, it is at least assured that the table - * either resides in its own page within the region, - * or it resides in a page shared with only the PBA. - */ - table_offset = rounddown2(pi->pi_msix.table_offset, 4096); - - table_size = pi->pi_msix.table_offset - table_offset; - table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; - table_size = roundup2(table_size, 4096); - - idx = pi->pi_msix.table_bar; - start = pi->pi_bar[idx].addr; - remaining = pi->pi_bar[idx].size; - - if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar) { - pba_offset = pi->pi_msix.pba_offset; - pba_size = pi->pi_msix.pba_size; - if (pba_offset >= table_offset + table_size || - table_offset >= pba_offset + pba_size) { - /* - * If the PBA does not share a page with the MSI-x - * tables, no PBA emulation is required. - */ - pi->pi_msix.pba_page = NULL; - pi->pi_msix.pba_page_offset = 0; - } else { - /* - * The PBA overlaps with either the first or last - * page of the MSI-X table region. Map the - * appropriate page. - */ - if (pba_offset <= table_offset) - pi->pi_msix.pba_page_offset = table_offset; - else - pi->pi_msix.pba_page_offset = table_offset + - table_size - 4096; - pi->pi_msix.pba_page = mmap(NULL, 4096, PROT_READ | - PROT_WRITE, MAP_SHARED, memfd, start + - pi->pi_msix.pba_page_offset); - if (pi->pi_msix.pba_page == MAP_FAILED) { - warn( - "Failed to map PBA page for MSI-X on %d/%d/%d", - b, s, f); - return (-1); - } - } - } - - /* Map everything before the MSI-X table */ - if (table_offset > 0) { - len = table_offset; - error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base); - if (error) - return (error); - - base += len; - start += len; - remaining -= len; - } - - /* Skip the MSI-X table */ - base += table_size; - start += table_size; - remaining -= table_size; - - /* Map everything beyond the end of the MSI-X table */ - if (remaining > 0) { - len = remaining; - error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base); - if (error) - return (error); - } - - return (0); -} - -static int -cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) -{ - int i, error; - struct pci_devinst *pi; - struct pci_bar_io bar; - enum pcibar_type bartype; - uint64_t base, size; - - pi = sc->psc_pi; - - /* - * Initialize BAR registers - */ - for (i = 0; i <= PCI_BARMAX; i++) { - bzero(&bar, sizeof(bar)); - bar.pbi_sel = sc->psc_sel; - bar.pbi_reg = PCIR_BAR(i); - - if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0) - continue; - - if (PCI_BAR_IO(bar.pbi_base)) { - bartype = PCIBAR_IO; - base = bar.pbi_base & PCIM_BAR_IO_BASE; - } else { - switch (bar.pbi_base & PCIM_BAR_MEM_TYPE) { - case PCIM_BAR_MEM_64: - bartype = PCIBAR_MEM64; - break; - default: - bartype = PCIBAR_MEM32; - break; - } - base = bar.pbi_base & PCIM_BAR_MEM_BASE; - } - size = bar.pbi_length; - - if (bartype != PCIBAR_IO) { - if (((base | size) & PAGE_MASK) != 0) { - warnx("passthru device %d/%d/%d BAR %d: " - "base %#lx or size %#lx not page aligned\n", - sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, i, base, size); - return (-1); - } - } - - /* Cache information about the "real" BAR */ - sc->psc_bar[i].type = bartype; - sc->psc_bar[i].size = size; - sc->psc_bar[i].addr = base; - - /* Allocate the BAR in the guest I/O or MMIO space */ - error = pci_emul_alloc_pbar(pi, i, base, bartype, size); - if (error) - return (-1); - - /* The MSI-X table needs special handling */ - if (i == pci_msix_table_bar(pi)) { - error = init_msix_table(ctx, sc, base); - if (error) - return (-1); - } else if (bartype != PCIBAR_IO) { - /* Map the physical BAR in the guest MMIO space */ - error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, sc->psc_sel.pc_func, - pi->pi_bar[i].addr, pi->pi_bar[i].size, base); - if (error) - return (-1); - } - - /* - * 64-bit BAR takes up two slots so skip the next one. - */ - if (bartype == PCIBAR_MEM64) { - i++; - assert(i <= PCI_BARMAX); - sc->psc_bar[i].type = PCIBAR_MEMHI64; - } - } - return (0); -} - -static int -cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func) -{ - int error; - struct passthru_softc *sc; - - error = 1; - sc = pi->pi_arg; - - bzero(&sc->psc_sel, sizeof(struct pcisel)); - sc->psc_sel.pc_bus = bus; - sc->psc_sel.pc_dev = slot; - sc->psc_sel.pc_func = func; - - if (cfginitmsi(sc) != 0) { - warnx("failed to initialize MSI for PCI %d/%d/%d", - bus, slot, func); - goto done; - } - - if (cfginitbar(ctx, sc) != 0) { - warnx("failed to initialize BARs for PCI %d/%d/%d", - bus, slot, func); - goto done; - } - - error = 0; /* success */ -done: - return (error); -} - -static int -passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) -{ - int bus, slot, func, error, memflags; - struct passthru_softc *sc; - - sc = NULL; - error = 1; - - memflags = vm_get_memflags(ctx); - if (!(memflags & VM_MEM_F_WIRED)) { - warnx("passthru requires guest memory to be wired"); - goto done; - } - - if (pcifd < 0) { - pcifd = open(_PATH_DEVPCI, O_RDWR, 0); - if (pcifd < 0) { - warn("failed to open %s", _PATH_DEVPCI); - goto done; - } - } - - if (iofd < 0) { - iofd = open(_PATH_DEVIO, O_RDWR, 0); - if (iofd < 0) { - warn("failed to open %s", _PATH_DEVIO); - goto done; - } - } - - if (memfd < 0) { - memfd = open(_PATH_MEM, O_RDWR, 0); - if (memfd < 0) { - warn("failed to open %s", _PATH_MEM); - goto done; - } - } - - if (opts == NULL || - sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3) { - warnx("invalid passthru options"); - goto done; - } - - if (vm_assign_pptdev(ctx, bus, slot, func) != 0) { - warnx("PCI device at %d/%d/%d is not using the ppt(4) driver", - bus, slot, func); - goto done; - } - - sc = calloc(1, sizeof(struct passthru_softc)); - - pi->pi_arg = sc; - sc->psc_pi = pi; - - /* initialize config space */ - if ((error = cfginit(ctx, pi, bus, slot, func)) != 0) - goto done; - - error = 0; /* success */ -done: - if (error) { - free(sc); - vm_unassign_pptdev(ctx, bus, slot, func); - } - return (error); -} - -static int -bar_access(int coff) -{ - if (coff >= PCIR_BAR(0) && coff < PCIR_BAR(PCI_BARMAX + 1)) - return (1); - else - return (0); -} - -static int -msicap_access(struct passthru_softc *sc, int coff) -{ - int caplen; - - if (sc->psc_msi.capoff == 0) - return (0); - - caplen = msi_caplen(sc->psc_msi.msgctrl); - - if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen) - return (1); - else - return (0); -} - -static int -msixcap_access(struct passthru_softc *sc, int coff) -{ - if (sc->psc_msix.capoff == 0) - return (0); - - return (coff >= sc->psc_msix.capoff && - coff < sc->psc_msix.capoff + MSIX_CAPLEN); -} - -static int -passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int coff, int bytes, uint32_t *rv) -{ - struct passthru_softc *sc; - - sc = pi->pi_arg; - - /* - * PCI BARs and MSI capability is emulated. - */ - if (bar_access(coff) || msicap_access(sc, coff)) - return (-1); - -#ifdef LEGACY_SUPPORT - /* - * Emulate PCIR_CAP_PTR if this device does not support MSI capability - * natively. - */ - if (sc->psc_msi.emulated) { - if (coff >= PCIR_CAP_PTR && coff < PCIR_CAP_PTR + 4) - return (-1); - } -#endif - - /* Everything else just read from the device's config space */ - *rv = read_config(&sc->psc_sel, coff, bytes); - - return (0); -} - -static int -passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int coff, int bytes, uint32_t val) -{ - int error, msix_table_entries, i; - struct passthru_softc *sc; - - sc = pi->pi_arg; - - /* - * PCI BARs are emulated - */ - if (bar_access(coff)) - return (-1); - - /* - * MSI capability is emulated - */ - if (msicap_access(sc, coff)) { - msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val); - - error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, sc->psc_sel.pc_func, - pi->pi_msi.addr, pi->pi_msi.msg_data, - pi->pi_msi.maxmsgnum); - if (error != 0) - err(1, "vm_setup_pptdev_msi"); - return (0); - } - - if (msixcap_access(sc, coff)) { - msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val); - if (pi->pi_msix.enabled) { - msix_table_entries = pi->pi_msix.table_count; - for (i = 0; i < msix_table_entries; i++) { - error = vm_setup_pptdev_msix(ctx, vcpu, - sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, i, - pi->pi_msix.table[i].addr, - pi->pi_msix.table[i].msg_data, - pi->pi_msix.table[i].vector_control); - - if (error) - err(1, "vm_setup_pptdev_msix"); - } - } - return (0); - } - -#ifdef LEGACY_SUPPORT - /* - * If this device does not support MSI natively then we cannot let - * the guest disable legacy interrupts from the device. It is the - * legacy interrupt that is triggering the virtual MSI to the guest. - */ - if (sc->psc_msi.emulated && pci_msi_enabled(pi)) { - if (coff == PCIR_COMMAND && bytes == 2) - val &= ~PCIM_CMD_INTxDIS; - } -#endif - - write_config(&sc->psc_sel, coff, bytes, val); - - return (0); -} - -static void -passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, - uint64_t offset, int size, uint64_t value) -{ - struct passthru_softc *sc; - struct iodev_pio_req pio; - - sc = pi->pi_arg; - - if (baridx == pci_msix_table_bar(pi)) { - msix_table_write(ctx, vcpu, sc, offset, size, value); - } else { - assert(pi->pi_bar[baridx].type == PCIBAR_IO); - bzero(&pio, sizeof(struct iodev_pio_req)); - pio.access = IODEV_PIO_WRITE; - pio.port = sc->psc_bar[baridx].addr + offset; - pio.width = size; - pio.val = value; - - (void)ioctl(iofd, IODEV_PIO, &pio); - } -} - -static uint64_t -passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, - uint64_t offset, int size) -{ - struct passthru_softc *sc; - struct iodev_pio_req pio; - uint64_t val; - - sc = pi->pi_arg; - - if (baridx == pci_msix_table_bar(pi)) { - val = msix_table_read(sc, offset, size); - } else { - assert(pi->pi_bar[baridx].type == PCIBAR_IO); - bzero(&pio, sizeof(struct iodev_pio_req)); - pio.access = IODEV_PIO_READ; - pio.port = sc->psc_bar[baridx].addr + offset; - pio.width = size; - pio.val = 0; - - (void)ioctl(iofd, IODEV_PIO, &pio); - - val = pio.val; - } - - return (val); -} - -struct pci_devemu passthru = { - .pe_emu = "passthru", - .pe_init = passthru_init, - .pe_cfgwrite = passthru_cfgwrite, - .pe_cfgread = passthru_cfgread, - .pe_barwrite = passthru_write, - .pe_barread = passthru_read, -}; -PCI_EMUL_SET(passthru); diff --git a/usr.sbin/bhyve/pci_uart.c b/usr.sbin/bhyve/pci_uart.c deleted file mode 100644 index 21b93bf..0000000 --- a/usr.sbin/bhyve/pci_uart.c +++ /dev/null @@ -1,119 +0,0 @@ -/*- - * Copyright (c) 2012 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> - -#include <stdio.h> - -#include "bhyverun.h" -#include "pci_emul.h" -#include "uart_emul.h" - -/* - * Pick a PCI vid/did of a chip with a single uart at - * BAR0, that most versions of FreeBSD can understand: - * Siig CyberSerial 1-port. - */ -#define COM_VENDOR 0x131f -#define COM_DEV 0x2000 - -static void -pci_uart_intr_assert(void *arg) -{ - struct pci_devinst *pi = arg; - - pci_lintr_assert(pi); -} - -static void -pci_uart_intr_deassert(void *arg) -{ - struct pci_devinst *pi = arg; - - pci_lintr_deassert(pi); -} - -static void -pci_uart_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size, uint64_t value) -{ - - assert(baridx == 0); - assert(size == 1); - - uart_write(pi->pi_arg, offset, value); -} - -uint64_t -pci_uart_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size) -{ - uint8_t val; - - assert(baridx == 0); - assert(size == 1); - - val = uart_read(pi->pi_arg, offset); - return (val); -} - -static int -pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) -{ - struct uart_softc *sc; - - pci_emul_alloc_bar(pi, 0, PCIBAR_IO, UART_IO_BAR_SIZE); - pci_lintr_request(pi); - - /* initialize config space */ - pci_set_cfgdata16(pi, PCIR_DEVICE, COM_DEV); - pci_set_cfgdata16(pi, PCIR_VENDOR, COM_VENDOR); - pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SIMPLECOMM); - - sc = uart_init(pci_uart_intr_assert, pci_uart_intr_deassert, pi); - pi->pi_arg = sc; - - if (uart_set_backend(sc, opts) != 0) { - fprintf(stderr, "Unable to initialize backend '%s' for " - "pci uart at %d:%d\n", opts, pi->pi_slot, pi->pi_func); - return (-1); - } - - return (0); -} - -struct pci_devemu pci_de_com = { - .pe_emu = "uart", - .pe_init = pci_uart_init, - .pe_barwrite = pci_uart_write, - .pe_barread = pci_uart_read -}; -PCI_EMUL_SET(pci_de_com); diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c deleted file mode 100644 index a437c1c..0000000 --- a/usr.sbin/bhyve/pci_virtio_block.c +++ /dev/null @@ -1,410 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/linker_set.h> -#include <sys/stat.h> -#include <sys/uio.h> -#include <sys/ioctl.h> -#include <sys/disk.h> - -#include <errno.h> -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <string.h> -#include <strings.h> -#include <unistd.h> -#include <assert.h> -#include <pthread.h> -#include <md5.h> - -#include "bhyverun.h" -#include "pci_emul.h" -#include "virtio.h" -#include "block_if.h" - -#define VTBLK_RINGSZ 64 - -#define VTBLK_S_OK 0 -#define VTBLK_S_IOERR 1 -#define VTBLK_S_UNSUPP 2 - -#define VTBLK_BLK_ID_BYTES 20 - -/* Capability bits */ -#define VTBLK_F_SEG_MAX (1 << 2) /* Maximum request segments */ -#define VTBLK_F_BLK_SIZE (1 << 6) /* cfg block size valid */ -#define VTBLK_F_FLUSH (1 << 9) /* Cache flush support */ -#define VTBLK_F_TOPOLOGY (1 << 10) /* Optimal I/O alignment */ - -/* - * Host capabilities - */ -#define VTBLK_S_HOSTCAPS \ - ( VTBLK_F_SEG_MAX | \ - VTBLK_F_BLK_SIZE | \ - VTBLK_F_FLUSH | \ - VTBLK_F_TOPOLOGY | \ - VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */ - -/* - * Config space "registers" - */ -struct vtblk_config { - uint64_t vbc_capacity; - uint32_t vbc_size_max; - uint32_t vbc_seg_max; - struct { - uint16_t cylinders; - uint8_t heads; - uint8_t sectors; - } vbc_geometry; - uint32_t vbc_blk_size; - struct { - uint8_t physical_block_exp; - uint8_t alignment_offset; - uint16_t min_io_size; - uint32_t opt_io_size; - } vbc_topology; - uint8_t vbc_writeback; -} __packed; - -/* - * Fixed-size block header - */ -struct virtio_blk_hdr { -#define VBH_OP_READ 0 -#define VBH_OP_WRITE 1 -#define VBH_OP_FLUSH 4 -#define VBH_OP_FLUSH_OUT 5 -#define VBH_OP_IDENT 8 -#define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */ - uint32_t vbh_type; - uint32_t vbh_ioprio; - uint64_t vbh_sector; -} __packed; - -/* - * Debug printf - */ -static int pci_vtblk_debug; -#define DPRINTF(params) if (pci_vtblk_debug) printf params -#define WPRINTF(params) printf params - -struct pci_vtblk_ioreq { - struct blockif_req io_req; - struct pci_vtblk_softc *io_sc; - uint8_t *io_status; - uint16_t io_idx; -}; - -/* - * Per-device softc - */ -struct pci_vtblk_softc { - struct virtio_softc vbsc_vs; - pthread_mutex_t vsc_mtx; - struct vqueue_info vbsc_vq; - struct vtblk_config vbsc_cfg; - struct blockif_ctxt *bc; - char vbsc_ident[VTBLK_BLK_ID_BYTES]; - struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ]; -}; - -static void pci_vtblk_reset(void *); -static void pci_vtblk_notify(void *, struct vqueue_info *); -static int pci_vtblk_cfgread(void *, int, int, uint32_t *); -static int pci_vtblk_cfgwrite(void *, int, int, uint32_t); - -static struct virtio_consts vtblk_vi_consts = { - "vtblk", /* our name */ - 1, /* we support 1 virtqueue */ - sizeof(struct vtblk_config), /* config reg size */ - pci_vtblk_reset, /* reset */ - pci_vtblk_notify, /* device-wide qnotify */ - pci_vtblk_cfgread, /* read PCI config */ - pci_vtblk_cfgwrite, /* write PCI config */ - NULL, /* apply negotiated features */ - VTBLK_S_HOSTCAPS, /* our capabilities */ -}; - -static void -pci_vtblk_reset(void *vsc) -{ - struct pci_vtblk_softc *sc = vsc; - - DPRINTF(("vtblk: device reset requested !\n")); - vi_reset_dev(&sc->vbsc_vs); -} - -static void -pci_vtblk_done(struct blockif_req *br, int err) -{ - struct pci_vtblk_ioreq *io = br->br_param; - struct pci_vtblk_softc *sc = io->io_sc; - - /* convert errno into a virtio block error return */ - if (err == EOPNOTSUPP || err == ENOSYS) - *io->io_status = VTBLK_S_UNSUPP; - else if (err != 0) - *io->io_status = VTBLK_S_IOERR; - else - *io->io_status = VTBLK_S_OK; - - /* - * Return the descriptor back to the host. - * We wrote 1 byte (our status) to host. - */ - pthread_mutex_lock(&sc->vsc_mtx); - vq_relchain(&sc->vbsc_vq, io->io_idx, 1); - vq_endchains(&sc->vbsc_vq, 0); - pthread_mutex_unlock(&sc->vsc_mtx); -} - -static void -pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq) -{ - struct virtio_blk_hdr *vbh; - struct pci_vtblk_ioreq *io; - int i, n; - int err; - ssize_t iolen; - int writeop, type; - struct iovec iov[BLOCKIF_IOV_MAX + 2]; - uint16_t idx, flags[BLOCKIF_IOV_MAX + 2]; - - n = vq_getchain(vq, &idx, iov, BLOCKIF_IOV_MAX + 2, flags); - - /* - * The first descriptor will be the read-only fixed header, - * and the last is for status (hence +2 above and below). - * The remaining iov's are the actual data I/O vectors. - * - * XXX - note - this fails on crash dump, which does a - * VIRTIO_BLK_T_FLUSH with a zero transfer length - */ - assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2); - - io = &sc->vbsc_ios[idx]; - assert((flags[0] & VRING_DESC_F_WRITE) == 0); - assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr)); - vbh = iov[0].iov_base; - memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2)); - io->io_req.br_iovcnt = n - 2; - io->io_req.br_offset = vbh->vbh_sector * DEV_BSIZE; - io->io_status = iov[--n].iov_base; - assert(iov[n].iov_len == 1); - assert(flags[n] & VRING_DESC_F_WRITE); - - /* - * XXX - * The guest should not be setting the BARRIER flag because - * we don't advertise the capability. - */ - type = vbh->vbh_type & ~VBH_FLAG_BARRIER; - writeop = (type == VBH_OP_WRITE); - - iolen = 0; - for (i = 1; i < n; i++) { - /* - * - write op implies read-only descriptor, - * - read/ident op implies write-only descriptor, - * therefore test the inverse of the descriptor bit - * to the op. - */ - assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop); - iolen += iov[i].iov_len; - } - io->io_req.br_resid = iolen; - - DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld\n\r", - writeop ? "write" : "read/ident", iolen, i - 1, - io->io_req.br_offset)); - - switch (type) { - case VBH_OP_READ: - err = blockif_read(sc->bc, &io->io_req); - break; - case VBH_OP_WRITE: - err = blockif_write(sc->bc, &io->io_req); - break; - case VBH_OP_FLUSH: - case VBH_OP_FLUSH_OUT: - err = blockif_flush(sc->bc, &io->io_req); - break; - case VBH_OP_IDENT: - /* Assume a single buffer */ - /* S/n equal to buffer is not zero-terminated. */ - memset(iov[1].iov_base, 0, iov[1].iov_len); - strncpy(iov[1].iov_base, sc->vbsc_ident, - MIN(iov[1].iov_len, sizeof(sc->vbsc_ident))); - pci_vtblk_done(&io->io_req, 0); - return; - default: - pci_vtblk_done(&io->io_req, EOPNOTSUPP); - return; - } - assert(err == 0); -} - -static void -pci_vtblk_notify(void *vsc, struct vqueue_info *vq) -{ - struct pci_vtblk_softc *sc = vsc; - - while (vq_has_descs(vq)) - pci_vtblk_proc(sc, vq); -} - -static int -pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) -{ - char bident[sizeof("XX:X:X")]; - struct blockif_ctxt *bctxt; - MD5_CTX mdctx; - u_char digest[16]; - struct pci_vtblk_softc *sc; - off_t size; - int i, sectsz, sts, sto; - - if (opts == NULL) { - printf("virtio-block: backing device required\n"); - return (1); - } - - /* - * The supplied backing file has to exist - */ - snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func); - bctxt = blockif_open(opts, bident); - if (bctxt == NULL) { - perror("Could not open backing file"); - return (1); - } - - size = blockif_size(bctxt); - sectsz = blockif_sectsz(bctxt); - blockif_psectsz(bctxt, &sts, &sto); - - sc = calloc(1, sizeof(struct pci_vtblk_softc)); - sc->bc = bctxt; - for (i = 0; i < VTBLK_RINGSZ; i++) { - struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i]; - io->io_req.br_callback = pci_vtblk_done; - io->io_req.br_param = io; - io->io_sc = sc; - io->io_idx = i; - } - - pthread_mutex_init(&sc->vsc_mtx, NULL); - - /* init virtio softc and virtqueues */ - vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq); - sc->vbsc_vs.vs_mtx = &sc->vsc_mtx; - - sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ; - /* sc->vbsc_vq.vq_notify = we have no per-queue notify */ - - /* - * Create an identifier for the backing file. Use parts of the - * md5 sum of the filename - */ - MD5Init(&mdctx); - MD5Update(&mdctx, opts, strlen(opts)); - MD5Final(digest, &mdctx); - sprintf(sc->vbsc_ident, "BHYVE-%02X%02X-%02X%02X-%02X%02X", - digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]); - - /* setup virtio block config space */ - sc->vbsc_cfg.vbc_capacity = size / DEV_BSIZE; /* 512-byte units */ - sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */ - sc->vbsc_cfg.vbc_seg_max = BLOCKIF_IOV_MAX; - sc->vbsc_cfg.vbc_geometry.cylinders = 0; /* no geometry */ - sc->vbsc_cfg.vbc_geometry.heads = 0; - sc->vbsc_cfg.vbc_geometry.sectors = 0; - sc->vbsc_cfg.vbc_blk_size = sectsz; - sc->vbsc_cfg.vbc_topology.physical_block_exp = - (sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0; - sc->vbsc_cfg.vbc_topology.alignment_offset = - (sto != 0) ? ((sts - sto) / sectsz) : 0; - sc->vbsc_cfg.vbc_topology.min_io_size = 0; - sc->vbsc_cfg.vbc_topology.opt_io_size = 0; - sc->vbsc_cfg.vbc_writeback = 0; - - /* - * Should we move some of this into virtio.c? Could - * have the device, class, and subdev_0 as fields in - * the virtio constants structure. - */ - pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); - pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); - pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); - pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK); - pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); - - if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) { - blockif_close(sc->bc); - free(sc); - return (1); - } - vi_set_io_bar(&sc->vbsc_vs, 0); - return (0); -} - -static int -pci_vtblk_cfgwrite(void *vsc, int offset, int size, uint32_t value) -{ - - DPRINTF(("vtblk: write to readonly reg %d\n\r", offset)); - return (1); -} - -static int -pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval) -{ - struct pci_vtblk_softc *sc = vsc; - void *ptr; - - /* our caller has already verified offset and size */ - ptr = (uint8_t *)&sc->vbsc_cfg + offset; - memcpy(retval, ptr, size); - return (0); -} - -struct pci_devemu pci_de_vblk = { - .pe_emu = "virtio-blk", - .pe_init = pci_vtblk_init, - .pe_barwrite = vi_pci_write, - .pe_barread = vi_pci_read -}; -PCI_EMUL_SET(pci_de_vblk); diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c deleted file mode 100644 index 9f220d1..0000000 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ /dev/null @@ -1,976 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/linker_set.h> -#include <sys/select.h> -#include <sys/uio.h> -#include <sys/ioctl.h> -#include <machine/atomic.h> -#include <net/ethernet.h> -#ifndef NETMAP_WITH_LIBS -#define NETMAP_WITH_LIBS -#endif -#include <net/netmap_user.h> - -#include <errno.h> -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <string.h> -#include <strings.h> -#include <unistd.h> -#include <assert.h> -#include <md5.h> -#include <pthread.h> -#include <pthread_np.h> - -#include "bhyverun.h" -#include "pci_emul.h" -#include "mevent.h" -#include "virtio.h" - -#define VTNET_RINGSZ 1024 - -#define VTNET_MAXSEGS 256 - -/* - * Host capabilities. Note that we only offer a few of these. - */ -#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */ -#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */ -#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */ -#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */ -#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */ -#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */ -#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */ -#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */ -#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */ -#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */ -#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */ -#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */ -#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */ -#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */ -#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */ -#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */ -#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */ -#define VIRTIO_NET_F_GUEST_ANNOUNCE \ - (1 << 21) /* guest can send gratuitous pkts */ - -#define VTNET_S_HOSTCAPS \ - ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \ - VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) - -/* - * PCI config-space "registers" - */ -struct virtio_net_config { - uint8_t mac[6]; - uint16_t status; -} __packed; - -/* - * Queue definitions. - */ -#define VTNET_RXQ 0 -#define VTNET_TXQ 1 -#define VTNET_CTLQ 2 /* NB: not yet supported */ - -#define VTNET_MAXQ 3 - -/* - * Fixed network header size - */ -struct virtio_net_rxhdr { - uint8_t vrh_flags; - uint8_t vrh_gso_type; - uint16_t vrh_hdr_len; - uint16_t vrh_gso_size; - uint16_t vrh_csum_start; - uint16_t vrh_csum_offset; - uint16_t vrh_bufs; -} __packed; - -/* - * Debug printf - */ -static int pci_vtnet_debug; -#define DPRINTF(params) if (pci_vtnet_debug) printf params -#define WPRINTF(params) printf params - -/* - * Per-device softc - */ -struct pci_vtnet_softc { - struct virtio_softc vsc_vs; - struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; - pthread_mutex_t vsc_mtx; - struct mevent *vsc_mevp; - - int vsc_tapfd; - struct nm_desc *vsc_nmd; - - int vsc_rx_ready; - volatile int resetting; /* set and checked outside lock */ - - uint64_t vsc_features; /* negotiated features */ - - struct virtio_net_config vsc_config; - - pthread_mutex_t rx_mtx; - int rx_in_progress; - int rx_vhdrlen; - int rx_merge; /* merged rx bufs in use */ - - pthread_t tx_tid; - pthread_mutex_t tx_mtx; - pthread_cond_t tx_cond; - int tx_in_progress; - - void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc); - void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov, - int iovcnt, int len); -}; - -static void pci_vtnet_reset(void *); -/* static void pci_vtnet_notify(void *, struct vqueue_info *); */ -static int pci_vtnet_cfgread(void *, int, int, uint32_t *); -static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); -static void pci_vtnet_neg_features(void *, uint64_t); - -static struct virtio_consts vtnet_vi_consts = { - "vtnet", /* our name */ - VTNET_MAXQ - 1, /* we currently support 2 virtqueues */ - sizeof(struct virtio_net_config), /* config reg size */ - pci_vtnet_reset, /* reset */ - NULL, /* device-wide qnotify -- not used */ - pci_vtnet_cfgread, /* read PCI config */ - pci_vtnet_cfgwrite, /* write PCI config */ - pci_vtnet_neg_features, /* apply negotiated features */ - VTNET_S_HOSTCAPS, /* our capabilities */ -}; - -/* - * If the transmit thread is active then stall until it is done. - */ -static void -pci_vtnet_txwait(struct pci_vtnet_softc *sc) -{ - - pthread_mutex_lock(&sc->tx_mtx); - while (sc->tx_in_progress) { - pthread_mutex_unlock(&sc->tx_mtx); - usleep(10000); - pthread_mutex_lock(&sc->tx_mtx); - } - pthread_mutex_unlock(&sc->tx_mtx); -} - -/* - * If the receive thread is active then stall until it is done. - */ -static void -pci_vtnet_rxwait(struct pci_vtnet_softc *sc) -{ - - pthread_mutex_lock(&sc->rx_mtx); - while (sc->rx_in_progress) { - pthread_mutex_unlock(&sc->rx_mtx); - usleep(10000); - pthread_mutex_lock(&sc->rx_mtx); - } - pthread_mutex_unlock(&sc->rx_mtx); -} - -static void -pci_vtnet_reset(void *vsc) -{ - struct pci_vtnet_softc *sc = vsc; - - DPRINTF(("vtnet: device reset requested !\n")); - - sc->resetting = 1; - - /* - * Wait for the transmit and receive threads to finish their - * processing. - */ - pci_vtnet_txwait(sc); - pci_vtnet_rxwait(sc); - - sc->vsc_rx_ready = 0; - sc->rx_merge = 1; - sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); - - /* now reset rings, MSI-X vectors, and negotiated capabilities */ - vi_reset_dev(&sc->vsc_vs); - - sc->resetting = 0; -} - -/* - * Called to send a buffer chain out to the tap device - */ -static void -pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, - int len) -{ - static char pad[60]; /* all zero bytes */ - - if (sc->vsc_tapfd == -1) - return; - - /* - * If the length is < 60, pad out to that and add the - * extra zero'd segment to the iov. It is guaranteed that - * there is always an extra iov available by the caller. - */ - if (len < 60) { - iov[iovcnt].iov_base = pad; - iov[iovcnt].iov_len = 60 - len; - iovcnt++; - } - (void) writev(sc->vsc_tapfd, iov, iovcnt); -} - -/* - * Called when there is read activity on the tap file descriptor. - * Each buffer posted by the guest is assumed to be able to contain - * an entire ethernet frame + rx header. - * MP note: the dummybuf is only used for discarding frames, so there - * is no need for it to be per-vtnet or locked. - */ -static uint8_t dummybuf[2048]; - -static __inline struct iovec * -rx_iov_trim(struct iovec *iov, int *niov, int tlen) -{ - struct iovec *riov; - - /* XXX short-cut: assume first segment is >= tlen */ - assert(iov[0].iov_len >= tlen); - - iov[0].iov_len -= tlen; - if (iov[0].iov_len == 0) { - assert(*niov > 1); - *niov -= 1; - riov = &iov[1]; - } else { - iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen); - riov = &iov[0]; - } - - return (riov); -} - -static void -pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) -{ - struct iovec iov[VTNET_MAXSEGS], *riov; - struct vqueue_info *vq; - void *vrx; - int len, n; - uint16_t idx; - - /* - * Should never be called without a valid tap fd - */ - assert(sc->vsc_tapfd != -1); - - /* - * But, will be called when the rx ring hasn't yet - * been set up or the guest is resetting the device. - */ - if (!sc->vsc_rx_ready || sc->resetting) { - /* - * Drop the packet and try later. - */ - (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); - return; - } - - /* - * Check for available rx buffers - */ - vq = &sc->vsc_queues[VTNET_RXQ]; - if (!vq_has_descs(vq)) { - /* - * Drop the packet and try later. Interrupt on - * empty, if that's negotiated. - */ - (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); - vq_endchains(vq, 1); - return; - } - - do { - /* - * Get descriptor chain. - */ - n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); - assert(n >= 1 && n <= VTNET_MAXSEGS); - - /* - * Get a pointer to the rx header, and use the - * data immediately following it for the packet buffer. - */ - vrx = iov[0].iov_base; - riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen); - - len = readv(sc->vsc_tapfd, riov, n); - - if (len < 0 && errno == EWOULDBLOCK) { - /* - * No more packets, but still some avail ring - * entries. Interrupt if needed/appropriate. - */ - vq_retchain(vq); - vq_endchains(vq, 0); - return; - } - - /* - * The only valid field in the rx packet header is the - * number of buffers if merged rx bufs were negotiated. - */ - memset(vrx, 0, sc->rx_vhdrlen); - - if (sc->rx_merge) { - struct virtio_net_rxhdr *vrxh; - - vrxh = vrx; - vrxh->vrh_bufs = 1; - } - - /* - * Release this chain and handle more chains. - */ - vq_relchain(vq, idx, len + sc->rx_vhdrlen); - } while (vq_has_descs(vq)); - - /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ - vq_endchains(vq, 1); -} - -static __inline int -pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt) -{ - int r, i; - int len = 0; - - for (r = nmd->cur_tx_ring; ; ) { - struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r); - uint32_t cur, idx; - char *buf; - - if (nm_ring_empty(ring)) { - r++; - if (r > nmd->last_tx_ring) - r = nmd->first_tx_ring; - if (r == nmd->cur_tx_ring) - break; - continue; - } - cur = ring->cur; - idx = ring->slot[cur].buf_idx; - buf = NETMAP_BUF(ring, idx); - - for (i = 0; i < iovcnt; i++) { - if (len + iov[i].iov_len > 2048) - break; - memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len); - len += iov[i].iov_len; - } - ring->slot[cur].len = len; - ring->head = ring->cur = nm_ring_next(ring, cur); - nmd->cur_tx_ring = r; - ioctl(nmd->fd, NIOCTXSYNC, NULL); - break; - } - - return (len); -} - -static __inline int -pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt) -{ - int len = 0; - int i = 0; - int r; - - for (r = nmd->cur_rx_ring; ; ) { - struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r); - uint32_t cur, idx; - char *buf; - size_t left; - - if (nm_ring_empty(ring)) { - r++; - if (r > nmd->last_rx_ring) - r = nmd->first_rx_ring; - if (r == nmd->cur_rx_ring) - break; - continue; - } - cur = ring->cur; - idx = ring->slot[cur].buf_idx; - buf = NETMAP_BUF(ring, idx); - left = ring->slot[cur].len; - - for (i = 0; i < iovcnt && left > 0; i++) { - if (iov[i].iov_len > left) - iov[i].iov_len = left; - memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len); - len += iov[i].iov_len; - left -= iov[i].iov_len; - } - ring->head = ring->cur = nm_ring_next(ring, cur); - nmd->cur_rx_ring = r; - ioctl(nmd->fd, NIOCRXSYNC, NULL); - break; - } - for (; i < iovcnt; i++) - iov[i].iov_len = 0; - - return (len); -} - -/* - * Called to send a buffer chain out to the vale port - */ -static void -pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, - int len) -{ - static char pad[60]; /* all zero bytes */ - - if (sc->vsc_nmd == NULL) - return; - - /* - * If the length is < 60, pad out to that and add the - * extra zero'd segment to the iov. It is guaranteed that - * there is always an extra iov available by the caller. - */ - if (len < 60) { - iov[iovcnt].iov_base = pad; - iov[iovcnt].iov_len = 60 - len; - iovcnt++; - } - (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt); -} - -static void -pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc) -{ - struct iovec iov[VTNET_MAXSEGS], *riov; - struct vqueue_info *vq; - void *vrx; - int len, n; - uint16_t idx; - - /* - * Should never be called without a valid netmap descriptor - */ - assert(sc->vsc_nmd != NULL); - - /* - * But, will be called when the rx ring hasn't yet - * been set up or the guest is resetting the device. - */ - if (!sc->vsc_rx_ready || sc->resetting) { - /* - * Drop the packet and try later. - */ - (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); - return; - } - - /* - * Check for available rx buffers - */ - vq = &sc->vsc_queues[VTNET_RXQ]; - if (!vq_has_descs(vq)) { - /* - * Drop the packet and try later. Interrupt on - * empty, if that's negotiated. - */ - (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); - vq_endchains(vq, 1); - return; - } - - do { - /* - * Get descriptor chain. - */ - n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); - assert(n >= 1 && n <= VTNET_MAXSEGS); - - /* - * Get a pointer to the rx header, and use the - * data immediately following it for the packet buffer. - */ - vrx = iov[0].iov_base; - riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen); - - len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n); - - if (len == 0) { - /* - * No more packets, but still some avail ring - * entries. Interrupt if needed/appropriate. - */ - vq_retchain(vq); - vq_endchains(vq, 0); - return; - } - - /* - * The only valid field in the rx packet header is the - * number of buffers if merged rx bufs were negotiated. - */ - memset(vrx, 0, sc->rx_vhdrlen); - - if (sc->rx_merge) { - struct virtio_net_rxhdr *vrxh; - - vrxh = vrx; - vrxh->vrh_bufs = 1; - } - - /* - * Release this chain and handle more chains. - */ - vq_relchain(vq, idx, len + sc->rx_vhdrlen); - } while (vq_has_descs(vq)); - - /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ - vq_endchains(vq, 1); -} - -static void -pci_vtnet_rx_callback(int fd, enum ev_type type, void *param) -{ - struct pci_vtnet_softc *sc = param; - - pthread_mutex_lock(&sc->rx_mtx); - sc->rx_in_progress = 1; - sc->pci_vtnet_rx(sc); - sc->rx_in_progress = 0; - pthread_mutex_unlock(&sc->rx_mtx); - -} - -static void -pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) -{ - struct pci_vtnet_softc *sc = vsc; - - /* - * A qnotify means that the rx process can now begin - */ - if (sc->vsc_rx_ready == 0) { - sc->vsc_rx_ready = 1; - vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY; - } -} - -static void -pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) -{ - struct iovec iov[VTNET_MAXSEGS + 1]; - int i, n; - int plen, tlen; - uint16_t idx; - - /* - * Obtain chain of descriptors. The first one is - * really the header descriptor, so we need to sum - * up two lengths: packet length and transfer length. - */ - n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); - assert(n >= 1 && n <= VTNET_MAXSEGS); - plen = 0; - tlen = iov[0].iov_len; - for (i = 1; i < n; i++) { - plen += iov[i].iov_len; - tlen += iov[i].iov_len; - } - - DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n)); - sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen); - - /* chain is processed, release it and set tlen */ - vq_relchain(vq, idx, tlen); -} - -static void -pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) -{ - struct pci_vtnet_softc *sc = vsc; - - /* - * Any ring entries to process? - */ - if (!vq_has_descs(vq)) - return; - - /* Signal the tx thread for processing */ - pthread_mutex_lock(&sc->tx_mtx); - vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY; - if (sc->tx_in_progress == 0) - pthread_cond_signal(&sc->tx_cond); - pthread_mutex_unlock(&sc->tx_mtx); -} - -/* - * Thread which will handle processing of TX desc - */ -static void * -pci_vtnet_tx_thread(void *param) -{ - struct pci_vtnet_softc *sc = param; - struct vqueue_info *vq; - int error; - - vq = &sc->vsc_queues[VTNET_TXQ]; - - /* - * Let us wait till the tx queue pointers get initialised & - * first tx signaled - */ - pthread_mutex_lock(&sc->tx_mtx); - error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); - assert(error == 0); - - for (;;) { - /* note - tx mutex is locked here */ - while (sc->resetting || !vq_has_descs(vq)) { - vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY; - mb(); - if (!sc->resetting && vq_has_descs(vq)) - break; - - sc->tx_in_progress = 0; - error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); - assert(error == 0); - } - vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY; - sc->tx_in_progress = 1; - pthread_mutex_unlock(&sc->tx_mtx); - - do { - /* - * Run through entries, placing them into - * iovecs and sending when an end-of-packet - * is found - */ - pci_vtnet_proctx(sc, vq); - } while (vq_has_descs(vq)); - - /* - * Generate an interrupt if needed. - */ - vq_endchains(vq, 1); - - pthread_mutex_lock(&sc->tx_mtx); - } -} - -#ifdef notyet -static void -pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) -{ - - DPRINTF(("vtnet: control qnotify!\n\r")); -} -#endif - -static int -pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr) -{ - struct ether_addr *ea; - char *tmpstr; - char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; - - tmpstr = strsep(&mac_str,"="); - - if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) { - ea = ether_aton(mac_str); - - if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) || - memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) { - fprintf(stderr, "Invalid MAC %s\n", mac_str); - return (EINVAL); - } else - memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN); - } - - return (0); -} - -static void -pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname) -{ - char tbuf[80]; - - strcpy(tbuf, "/dev/"); - strlcat(tbuf, devname, sizeof(tbuf)); - - sc->pci_vtnet_rx = pci_vtnet_tap_rx; - sc->pci_vtnet_tx = pci_vtnet_tap_tx; - - sc->vsc_tapfd = open(tbuf, O_RDWR); - if (sc->vsc_tapfd == -1) { - WPRINTF(("open of tap device %s failed\n", tbuf)); - return; - } - - /* - * Set non-blocking and register for read - * notifications with the event loop - */ - int opt = 1; - if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { - WPRINTF(("tap device O_NONBLOCK failed\n")); - close(sc->vsc_tapfd); - sc->vsc_tapfd = -1; - } - - sc->vsc_mevp = mevent_add(sc->vsc_tapfd, - EVF_READ, - pci_vtnet_rx_callback, - sc); - if (sc->vsc_mevp == NULL) { - WPRINTF(("Could not register event\n")); - close(sc->vsc_tapfd); - sc->vsc_tapfd = -1; - } -} - -static void -pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname) -{ - sc->pci_vtnet_rx = pci_vtnet_netmap_rx; - sc->pci_vtnet_tx = pci_vtnet_netmap_tx; - - sc->vsc_nmd = nm_open(ifname, NULL, 0, 0); - if (sc->vsc_nmd == NULL) { - WPRINTF(("open of netmap device %s failed\n", ifname)); - return; - } - - sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd, - EVF_READ, - pci_vtnet_rx_callback, - sc); - if (sc->vsc_mevp == NULL) { - WPRINTF(("Could not register event\n")); - nm_close(sc->vsc_nmd); - sc->vsc_nmd = NULL; - } -} - -static int -pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) -{ - MD5_CTX mdctx; - unsigned char digest[16]; - char nstr[80]; - char tname[MAXCOMLEN + 1]; - struct pci_vtnet_softc *sc; - char *devname; - char *vtopts; - int mac_provided; - - sc = calloc(1, sizeof(struct pci_vtnet_softc)); - - pthread_mutex_init(&sc->vsc_mtx, NULL); - - vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues); - sc->vsc_vs.vs_mtx = &sc->vsc_mtx; - - sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; - sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; - sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; - sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; -#ifdef notyet - sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; - sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; -#endif - - /* - * Attempt to open the tap device and read the MAC address - * if specified - */ - mac_provided = 0; - sc->vsc_tapfd = -1; - sc->vsc_nmd = NULL; - if (opts != NULL) { - int err; - - devname = vtopts = strdup(opts); - (void) strsep(&vtopts, ","); - - if (vtopts != NULL) { - err = pci_vtnet_parsemac(vtopts, sc->vsc_config.mac); - if (err != 0) { - free(devname); - return (err); - } - mac_provided = 1; - } - - if (strncmp(devname, "vale", 4) == 0) - pci_vtnet_netmap_setup(sc, devname); - if (strncmp(devname, "tap", 3) == 0 || - strncmp(devname, "vmnet", 5) == 0) - pci_vtnet_tap_setup(sc, devname); - - free(devname); - } - - /* - * The default MAC address is the standard NetApp OUI of 00-a0-98, - * followed by an MD5 of the PCI slot/func number and dev name - */ - if (!mac_provided) { - snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, - pi->pi_func, vmname); - - MD5Init(&mdctx); - MD5Update(&mdctx, nstr, strlen(nstr)); - MD5Final(digest, &mdctx); - - sc->vsc_config.mac[0] = 0x00; - sc->vsc_config.mac[1] = 0xa0; - sc->vsc_config.mac[2] = 0x98; - sc->vsc_config.mac[3] = digest[0]; - sc->vsc_config.mac[4] = digest[1]; - sc->vsc_config.mac[5] = digest[2]; - } - - /* initialize config space */ - pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); - pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); - pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); - pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); - pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); - - /* Link is up if we managed to open tap device or vale port. */ - sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0 || - sc->vsc_nmd != NULL); - - /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ - if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) - return (1); - - /* use BAR 0 to map config regs in IO space */ - vi_set_io_bar(&sc->vsc_vs, 0); - - sc->resetting = 0; - - sc->rx_merge = 1; - sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); - sc->rx_in_progress = 0; - pthread_mutex_init(&sc->rx_mtx, NULL); - - /* - * Initialize tx semaphore & spawn TX processing thread. - * As of now, only one thread for TX desc processing is - * spawned. - */ - sc->tx_in_progress = 0; - pthread_mutex_init(&sc->tx_mtx, NULL); - pthread_cond_init(&sc->tx_cond, NULL); - pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); - snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, - pi->pi_func); - pthread_set_name_np(sc->tx_tid, tname); - - return (0); -} - -static int -pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) -{ - struct pci_vtnet_softc *sc = vsc; - void *ptr; - - if (offset < 6) { - assert(offset + size <= 6); - /* - * The driver is allowed to change the MAC address - */ - ptr = &sc->vsc_config.mac[offset]; - memcpy(ptr, &value, size); - } else { - /* silently ignore other writes */ - DPRINTF(("vtnet: write to readonly reg %d\n\r", offset)); - } - - return (0); -} - -static int -pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval) -{ - struct pci_vtnet_softc *sc = vsc; - void *ptr; - - ptr = (uint8_t *)&sc->vsc_config + offset; - memcpy(retval, ptr, size); - return (0); -} - -static void -pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) -{ - struct pci_vtnet_softc *sc = vsc; - - sc->vsc_features = negotiated_features; - - if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) { - sc->rx_merge = 0; - /* non-merge rx header is 2 bytes shorter */ - sc->rx_vhdrlen -= 2; - } -} - -struct pci_devemu pci_de_vnet = { - .pe_emu = "virtio-net", - .pe_init = pci_vtnet_init, - .pe_barwrite = vi_pci_write, - .pe_barread = vi_pci_read -}; -PCI_EMUL_SET(pci_de_vnet); diff --git a/usr.sbin/bhyve/pci_virtio_rnd.c b/usr.sbin/bhyve/pci_virtio_rnd.c deleted file mode 100644 index 78448f5..0000000 --- a/usr.sbin/bhyve/pci_virtio_rnd.c +++ /dev/null @@ -1,189 +0,0 @@ -/*- - * Copyright (c) 2014 Nahanni Systems Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer - * in this position and unchanged. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * virtio entropy device emulation. - * Randomness is sourced from /dev/random which does not block - * once it has been seeded at bootup. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/linker_set.h> -#include <sys/uio.h> - -#include <errno.h> -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <assert.h> -#include <pthread.h> - -#include "bhyverun.h" -#include "pci_emul.h" -#include "virtio.h" - -#define VTRND_RINGSZ 64 - - -static int pci_vtrnd_debug; -#define DPRINTF(params) if (pci_vtrnd_debug) printf params -#define WPRINTF(params) printf params - -/* - * Per-device softc - */ -struct pci_vtrnd_softc { - struct virtio_softc vrsc_vs; - struct vqueue_info vrsc_vq; - pthread_mutex_t vrsc_mtx; - uint64_t vrsc_cfg; - int vrsc_fd; -}; - -static void pci_vtrnd_reset(void *); -static void pci_vtrnd_notify(void *, struct vqueue_info *); - -static struct virtio_consts vtrnd_vi_consts = { - "vtrnd", /* our name */ - 1, /* we support 1 virtqueue */ - 0, /* config reg size */ - pci_vtrnd_reset, /* reset */ - pci_vtrnd_notify, /* device-wide qnotify */ - NULL, /* read virtio config */ - NULL, /* write virtio config */ - NULL, /* apply negotiated features */ - 0, /* our capabilities */ -}; - - -static void -pci_vtrnd_reset(void *vsc) -{ - struct pci_vtrnd_softc *sc; - - sc = vsc; - - DPRINTF(("vtrnd: device reset requested !\n")); - vi_reset_dev(&sc->vrsc_vs); -} - - -static void -pci_vtrnd_notify(void *vsc, struct vqueue_info *vq) -{ - struct iovec iov; - struct pci_vtrnd_softc *sc; - int len; - uint16_t idx; - - sc = vsc; - - if (sc->vrsc_fd < 0) { - vq_endchains(vq, 0); - return; - } - - while (vq_has_descs(vq)) { - vq_getchain(vq, &idx, &iov, 1, NULL); - - len = read(sc->vrsc_fd, iov.iov_base, iov.iov_len); - - DPRINTF(("vtrnd: vtrnd_notify(): %d\r\n", len)); - - /* Catastrophe if unable to read from /dev/random */ - assert(len > 0); - - /* - * Release this chain and handle more - */ - vq_relchain(vq, idx, len); - } - vq_endchains(vq, 1); /* Generate interrupt if appropriate. */ -} - - -static int -pci_vtrnd_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) -{ - struct pci_vtrnd_softc *sc; - int fd; - int len; - uint8_t v; - - /* - * Should always be able to open /dev/random. - */ - fd = open("/dev/random", O_RDONLY | O_NONBLOCK); - - assert(fd >= 0); - - /* - * Check that device is seeded and non-blocking. - */ - len = read(fd, &v, sizeof(v)); - if (len <= 0) { - WPRINTF(("vtrnd: /dev/random not ready, read(): %d", len)); - return (1); - } - - sc = calloc(1, sizeof(struct pci_vtrnd_softc)); - - vi_softc_linkup(&sc->vrsc_vs, &vtrnd_vi_consts, sc, pi, &sc->vrsc_vq); - sc->vrsc_vs.vs_mtx = &sc->vrsc_mtx; - - sc->vrsc_vq.vq_qsize = VTRND_RINGSZ; - - /* keep /dev/random opened while emulating */ - sc->vrsc_fd = fd; - - /* initialize config space */ - pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_RANDOM); - pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); - pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_CRYPTO); - pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_ENTROPY); - pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); - - if (vi_intr_init(&sc->vrsc_vs, 1, fbsdrun_virtio_msix())) - return (1); - vi_set_io_bar(&sc->vrsc_vs, 0); - - return (0); -} - - -struct pci_devemu pci_de_vrnd = { - .pe_emu = "virtio-rnd", - .pe_init = pci_vtrnd_init, - .pe_barwrite = vi_pci_write, - .pe_barread = vi_pci_read -}; -PCI_EMUL_SET(pci_de_vrnd); diff --git a/usr.sbin/bhyve/pm.c b/usr.sbin/bhyve/pm.c deleted file mode 100644 index f7c1c23..0000000 --- a/usr.sbin/bhyve/pm.c +++ /dev/null @@ -1,312 +0,0 @@ -/*- - * Copyright (c) 2013 Hudson River Trading LLC - * Written by: John H. Baldwin <jhb@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> -#include <machine/vmm.h> - -#include <assert.h> -#include <errno.h> -#include <pthread.h> -#include <signal.h> -#include <vmmapi.h> - -#include "acpi.h" -#include "inout.h" -#include "mevent.h" -#include "pci_irq.h" -#include "pci_lpc.h" - -static pthread_mutex_t pm_lock = PTHREAD_MUTEX_INITIALIZER; -static struct mevent *power_button; -static sig_t old_power_handler; - -/* - * Reset Control register at I/O port 0xcf9. Bit 2 forces a system - * reset when it transitions from 0 to 1. Bit 1 selects the type of - * reset to attempt: 0 selects a "soft" reset, and 1 selects a "hard" - * reset. - */ -static int -reset_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - int error; - - static uint8_t reset_control; - - if (bytes != 1) - return (-1); - if (in) - *eax = reset_control; - else { - reset_control = *eax; - - /* Treat hard and soft resets the same. */ - if (reset_control & 0x4) { - error = vm_suspend(ctx, VM_SUSPEND_RESET); - assert(error == 0 || errno == EALREADY); - } - } - return (0); -} -INOUT_PORT(reset_reg, 0xCF9, IOPORT_F_INOUT, reset_handler); - -/* - * ACPI's SCI is a level-triggered interrupt. - */ -static int sci_active; - -static void -sci_assert(struct vmctx *ctx) -{ - - if (sci_active) - return; - vm_isa_assert_irq(ctx, SCI_INT, SCI_INT); - sci_active = 1; -} - -static void -sci_deassert(struct vmctx *ctx) -{ - - if (!sci_active) - return; - vm_isa_deassert_irq(ctx, SCI_INT, SCI_INT); - sci_active = 0; -} - -/* - * Power Management 1 Event Registers - * - * The only power management event supported is a power button upon - * receiving SIGTERM. - */ -static uint16_t pm1_enable, pm1_status; - -#define PM1_TMR_STS 0x0001 -#define PM1_BM_STS 0x0010 -#define PM1_GBL_STS 0x0020 -#define PM1_PWRBTN_STS 0x0100 -#define PM1_SLPBTN_STS 0x0200 -#define PM1_RTC_STS 0x0400 -#define PM1_WAK_STS 0x8000 - -#define PM1_TMR_EN 0x0001 -#define PM1_GBL_EN 0x0020 -#define PM1_PWRBTN_EN 0x0100 -#define PM1_SLPBTN_EN 0x0200 -#define PM1_RTC_EN 0x0400 - -static void -sci_update(struct vmctx *ctx) -{ - int need_sci; - - /* See if the SCI should be active or not. */ - need_sci = 0; - if ((pm1_enable & PM1_TMR_EN) && (pm1_status & PM1_TMR_STS)) - need_sci = 1; - if ((pm1_enable & PM1_GBL_EN) && (pm1_status & PM1_GBL_STS)) - need_sci = 1; - if ((pm1_enable & PM1_PWRBTN_EN) && (pm1_status & PM1_PWRBTN_STS)) - need_sci = 1; - if ((pm1_enable & PM1_SLPBTN_EN) && (pm1_status & PM1_SLPBTN_STS)) - need_sci = 1; - if ((pm1_enable & PM1_RTC_EN) && (pm1_status & PM1_RTC_STS)) - need_sci = 1; - if (need_sci) - sci_assert(ctx); - else - sci_deassert(ctx); -} - -static int -pm1_status_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - - if (bytes != 2) - return (-1); - - pthread_mutex_lock(&pm_lock); - if (in) - *eax = pm1_status; - else { - /* - * Writes are only permitted to clear certain bits by - * writing 1 to those flags. - */ - pm1_status &= ~(*eax & (PM1_WAK_STS | PM1_RTC_STS | - PM1_SLPBTN_STS | PM1_PWRBTN_STS | PM1_BM_STS)); - sci_update(ctx); - } - pthread_mutex_unlock(&pm_lock); - return (0); -} - -static int -pm1_enable_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - - if (bytes != 2) - return (-1); - - pthread_mutex_lock(&pm_lock); - if (in) - *eax = pm1_enable; - else { - /* - * Only permit certain bits to be set. We never use - * the global lock, but ACPI-CA whines profusely if it - * can't set GBL_EN. - */ - pm1_enable = *eax & (PM1_PWRBTN_EN | PM1_GBL_EN); - sci_update(ctx); - } - pthread_mutex_unlock(&pm_lock); - return (0); -} -INOUT_PORT(pm1_status, PM1A_EVT_ADDR, IOPORT_F_INOUT, pm1_status_handler); -INOUT_PORT(pm1_enable, PM1A_EVT_ADDR + 2, IOPORT_F_INOUT, pm1_enable_handler); - -static void -power_button_handler(int signal, enum ev_type type, void *arg) -{ - struct vmctx *ctx; - - ctx = arg; - pthread_mutex_lock(&pm_lock); - if (!(pm1_status & PM1_PWRBTN_STS)) { - pm1_status |= PM1_PWRBTN_STS; - sci_update(ctx); - } - pthread_mutex_unlock(&pm_lock); -} - -/* - * Power Management 1 Control Register - * - * This is mostly unimplemented except that we wish to handle writes that - * set SPL_EN to handle S5 (soft power off). - */ -static uint16_t pm1_control; - -#define PM1_SCI_EN 0x0001 -#define PM1_SLP_TYP 0x1c00 -#define PM1_SLP_EN 0x2000 -#define PM1_ALWAYS_ZERO 0xc003 - -static int -pm1_control_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - int error; - - if (bytes != 2) - return (-1); - if (in) - *eax = pm1_control; - else { - /* - * Various bits are write-only or reserved, so force them - * to zero in pm1_control. Always preserve SCI_EN as OSPM - * can never change it. - */ - pm1_control = (pm1_control & PM1_SCI_EN) | - (*eax & ~(PM1_SLP_EN | PM1_ALWAYS_ZERO)); - - /* - * If SLP_EN is set, check for S5. Bhyve's _S5_ method - * says that '5' should be stored in SLP_TYP for S5. - */ - if (*eax & PM1_SLP_EN) { - if ((pm1_control & PM1_SLP_TYP) >> 10 == 5) { - error = vm_suspend(ctx, VM_SUSPEND_POWEROFF); - assert(error == 0 || errno == EALREADY); - } - } - } - return (0); -} -INOUT_PORT(pm1_control, PM1A_CNT_ADDR, IOPORT_F_INOUT, pm1_control_handler); -SYSRES_IO(PM1A_EVT_ADDR, 8); - -/* - * ACPI SMI Command Register - * - * This write-only register is used to enable and disable ACPI. - */ -static int -smi_cmd_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - - assert(!in); - if (bytes != 1) - return (-1); - - pthread_mutex_lock(&pm_lock); - switch (*eax) { - case BHYVE_ACPI_ENABLE: - pm1_control |= PM1_SCI_EN; - if (power_button == NULL) { - power_button = mevent_add(SIGTERM, EVF_SIGNAL, - power_button_handler, ctx); - old_power_handler = signal(SIGTERM, SIG_IGN); - } - break; - case BHYVE_ACPI_DISABLE: - pm1_control &= ~PM1_SCI_EN; - if (power_button != NULL) { - mevent_delete(power_button); - power_button = NULL; - signal(SIGTERM, old_power_handler); - } - break; - } - pthread_mutex_unlock(&pm_lock); - return (0); -} -INOUT_PORT(smi_cmd, SMI_CMD, IOPORT_F_OUT, smi_cmd_handler); -SYSRES_IO(SMI_CMD, 1); - -void -sci_init(struct vmctx *ctx) -{ - - /* - * Mark ACPI's SCI as level trigger and bump its use count - * in the PIRQ router. - */ - pci_irq_use(SCI_INT); - vm_isa_set_irq_trigger(ctx, SCI_INT, LEVEL_TRIGGER); -} diff --git a/usr.sbin/bhyve/post.c b/usr.sbin/bhyve/post.c deleted file mode 100644 index 5215a0c..0000000 --- a/usr.sbin/bhyve/post.c +++ /dev/null @@ -1,53 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> - -#include <assert.h> - -#include "inout.h" -#include "pci_lpc.h" - -static int -post_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, - uint32_t *eax, void *arg) -{ - assert(in == 1); - - if (bytes != 1) - return (-1); - - *eax = 0xff; /* return some garbage */ - return (0); -} - -INOUT_PORT(post, 0x84, IOPORT_F_IN, post_data_handler); -SYSRES_IO(0x84, 1); diff --git a/usr.sbin/bhyve/rtc.c b/usr.sbin/bhyve/rtc.c deleted file mode 100644 index 5c70154..0000000 --- a/usr.sbin/bhyve/rtc.c +++ /dev/null @@ -1,129 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> - -#include <time.h> -#include <assert.h> - -#include <machine/vmm.h> -#include <vmmapi.h> - -#include "acpi.h" -#include "pci_lpc.h" -#include "rtc.h" - -#define IO_RTC 0x70 - -#define RTC_LMEM_LSB 0x34 -#define RTC_LMEM_MSB 0x35 -#define RTC_HMEM_LSB 0x5b -#define RTC_HMEM_SB 0x5c -#define RTC_HMEM_MSB 0x5d - -#define m_64KB (64*1024) -#define m_16MB (16*1024*1024) -#define m_4GB (4ULL*1024*1024*1024) - -/* - * Returns the current RTC time as number of seconds since 00:00:00 Jan 1, 1970 - */ -static time_t -rtc_time(struct vmctx *ctx, int use_localtime) -{ - struct tm tm; - time_t t; - - time(&t); - if (use_localtime) { - localtime_r(&t, &tm); - t = timegm(&tm); - } - return (t); -} - -void -rtc_init(struct vmctx *ctx, int use_localtime) -{ - size_t himem; - size_t lomem; - int err; - - /* XXX init diag/reset code/equipment/checksum ? */ - - /* - * Report guest memory size in nvram cells as required by UEFI. - * Little-endian encoding. - * 0x34/0x35 - 64KB chunks above 16MB, below 4GB - * 0x5b/0x5c/0x5d - 64KB chunks above 4GB - */ - lomem = (vm_get_lowmem_size(ctx) - m_16MB) / m_64KB; - err = vm_rtc_write(ctx, RTC_LMEM_LSB, lomem); - assert(err == 0); - err = vm_rtc_write(ctx, RTC_LMEM_MSB, lomem >> 8); - assert(err == 0); - - himem = vm_get_highmem_size(ctx) / m_64KB; - err = vm_rtc_write(ctx, RTC_HMEM_LSB, himem); - assert(err == 0); - err = vm_rtc_write(ctx, RTC_HMEM_SB, himem >> 8); - assert(err == 0); - err = vm_rtc_write(ctx, RTC_HMEM_MSB, himem >> 16); - assert(err == 0); - - err = vm_rtc_settime(ctx, rtc_time(ctx, use_localtime)); - assert(err == 0); -} - -static void -rtc_dsdt(void) -{ - - dsdt_line(""); - dsdt_line("Device (RTC)"); - dsdt_line("{"); - dsdt_line(" Name (_HID, EisaId (\"PNP0B00\"))"); - dsdt_line(" Name (_CRS, ResourceTemplate ()"); - dsdt_line(" {"); - dsdt_indent(2); - dsdt_fixed_ioport(IO_RTC, 2); - dsdt_fixed_irq(8); - dsdt_unindent(2); - dsdt_line(" })"); - dsdt_line("}"); -} -LPC_DSDT(rtc_dsdt); - -/* - * Reserve the extended RTC I/O ports although they are not emulated at this - * time. - */ -SYSRES_IO(0x72, 6); diff --git a/usr.sbin/bhyve/rtc.h b/usr.sbin/bhyve/rtc.h deleted file mode 100644 index 5b08ca3..0000000 --- a/usr.sbin/bhyve/rtc.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _RTC_H_ -#define _RTC_H_ - -void rtc_init(struct vmctx *ctx, int use_localtime); - -#endif /* _RTC_H_ */ diff --git a/usr.sbin/bhyve/smbiostbl.c b/usr.sbin/bhyve/smbiostbl.c deleted file mode 100644 index 59a1358..0000000 --- a/usr.sbin/bhyve/smbiostbl.c +++ /dev/null @@ -1,827 +0,0 @@ -/*- - * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> - -#include <assert.h> -#include <errno.h> -#include <md5.h> -#include <stdio.h> -#include <string.h> -#include <unistd.h> -#include <uuid.h> - -#include <machine/vmm.h> -#include <vmmapi.h> - -#include "bhyverun.h" -#include "smbiostbl.h" - -#define MB (1024*1024) -#define GB (1024ULL*1024*1024) - -#define SMBIOS_BASE 0xF1000 - -/* BHYVE_ACPI_BASE - SMBIOS_BASE) */ -#define SMBIOS_MAX_LENGTH (0xF2400 - 0xF1000) - -#define SMBIOS_TYPE_BIOS 0 -#define SMBIOS_TYPE_SYSTEM 1 -#define SMBIOS_TYPE_CHASSIS 3 -#define SMBIOS_TYPE_PROCESSOR 4 -#define SMBIOS_TYPE_MEMARRAY 16 -#define SMBIOS_TYPE_MEMDEVICE 17 -#define SMBIOS_TYPE_MEMARRAYMAP 19 -#define SMBIOS_TYPE_BOOT 32 -#define SMBIOS_TYPE_EOT 127 - -struct smbios_structure { - uint8_t type; - uint8_t length; - uint16_t handle; -} __packed; - -typedef int (*initializer_func_t)(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, - uint16_t *n, uint16_t *size); - -struct smbios_template_entry { - struct smbios_structure *entry; - const char **strings; - initializer_func_t initializer; -}; - -/* - * SMBIOS Structure Table Entry Point - */ -#define SMBIOS_ENTRY_EANCHOR "_SM_" -#define SMBIOS_ENTRY_EANCHORLEN 4 -#define SMBIOS_ENTRY_IANCHOR "_DMI_" -#define SMBIOS_ENTRY_IANCHORLEN 5 - -struct smbios_entry_point { - char eanchor[4]; /* anchor tag */ - uint8_t echecksum; /* checksum of entry point structure */ - uint8_t eplen; /* length in bytes of entry point */ - uint8_t major; /* major version of the SMBIOS spec */ - uint8_t minor; /* minor version of the SMBIOS spec */ - uint16_t maxssize; /* maximum size in bytes of a struct */ - uint8_t revision; /* entry point structure revision */ - uint8_t format[5]; /* entry point rev-specific data */ - char ianchor[5]; /* intermediate anchor tag */ - uint8_t ichecksum; /* intermediate checksum */ - uint16_t stlen; /* len in bytes of structure table */ - uint32_t staddr; /* physical addr of structure table */ - uint16_t stnum; /* number of structure table entries */ - uint8_t bcdrev; /* BCD value representing DMI ver */ -} __packed; - -/* - * BIOS Information - */ -#define SMBIOS_FL_ISA 0x00000010 /* ISA is supported */ -#define SMBIOS_FL_PCI 0x00000080 /* PCI is supported */ -#define SMBIOS_FL_SHADOW 0x00001000 /* BIOS shadowing is allowed */ -#define SMBIOS_FL_CDBOOT 0x00008000 /* Boot from CD is supported */ -#define SMBIOS_FL_SELBOOT 0x00010000 /* Selectable Boot supported */ -#define SMBIOS_FL_EDD 0x00080000 /* EDD Spec is supported */ - -#define SMBIOS_XB1_FL_ACPI 0x00000001 /* ACPI is supported */ - -#define SMBIOS_XB2_FL_BBS 0x00000001 /* BIOS Boot Specification */ -#define SMBIOS_XB2_FL_VM 0x00000010 /* Virtual Machine */ - -struct smbios_table_type0 { - struct smbios_structure header; - uint8_t vendor; /* vendor string */ - uint8_t version; /* version string */ - uint16_t segment; /* address segment location */ - uint8_t rel_date; /* release date */ - uint8_t size; /* rom size */ - uint64_t cflags; /* characteristics */ - uint8_t xc_bytes[2]; /* characteristics ext bytes */ - uint8_t sb_major_rel; /* system bios version */ - uint8_t sb_minor_rele; - uint8_t ecfw_major_rel; /* embedded ctrl fw version */ - uint8_t ecfw_minor_rel; -} __packed; - -/* - * System Information - */ -#define SMBIOS_WAKEUP_SWITCH 0x06 /* power switch */ - -struct smbios_table_type1 { - struct smbios_structure header; - uint8_t manufacturer; /* manufacturer string */ - uint8_t product; /* product name string */ - uint8_t version; /* version string */ - uint8_t serial; /* serial number string */ - uint8_t uuid[16]; /* uuid byte array */ - uint8_t wakeup; /* wake-up event */ - uint8_t sku; /* sku number string */ - uint8_t family; /* family name string */ -} __packed; - -/* - * System Enclosure or Chassis - */ -#define SMBIOS_CHT_UNKNOWN 0x02 /* unknown */ - -#define SMBIOS_CHST_SAFE 0x03 /* safe */ - -#define SMBIOS_CHSC_NONE 0x03 /* none */ - -struct smbios_table_type3 { - struct smbios_structure header; - uint8_t manufacturer; /* manufacturer string */ - uint8_t type; /* type */ - uint8_t version; /* version string */ - uint8_t serial; /* serial number string */ - uint8_t asset; /* asset tag string */ - uint8_t bustate; /* boot-up state */ - uint8_t psstate; /* power supply state */ - uint8_t tstate; /* thermal state */ - uint8_t security; /* security status */ - uint8_t uheight; /* height in 'u's */ - uint8_t cords; /* number of power cords */ - uint8_t elems; /* number of element records */ - uint8_t elemlen; /* length of records */ - uint8_t sku; /* sku number string */ -} __packed; - -/* - * Processor Information - */ -#define SMBIOS_PRT_CENTRAL 0x03 /* central processor */ - -#define SMBIOS_PRF_OTHER 0x01 /* other */ - -#define SMBIOS_PRS_PRESENT 0x40 /* socket is populated */ -#define SMBIOS_PRS_ENABLED 0x1 /* enabled */ - -#define SMBIOS_PRU_NONE 0x06 /* none */ - -#define SMBIOS_PFL_64B 0x04 /* 64-bit capable */ - -struct smbios_table_type4 { - struct smbios_structure header; - uint8_t socket; /* socket designation string */ - uint8_t type; /* processor type */ - uint8_t family; /* processor family */ - uint8_t manufacturer; /* manufacturer string */ - uint64_t cpuid; /* processor cpuid */ - uint8_t version; /* version string */ - uint8_t voltage; /* voltage */ - uint16_t clkspeed; /* ext clock speed in mhz */ - uint16_t maxspeed; /* maximum speed in mhz */ - uint16_t curspeed; /* current speed in mhz */ - uint8_t status; /* status */ - uint8_t upgrade; /* upgrade */ - uint16_t l1handle; /* l1 cache handle */ - uint16_t l2handle; /* l2 cache handle */ - uint16_t l3handle; /* l3 cache handle */ - uint8_t serial; /* serial number string */ - uint8_t asset; /* asset tag string */ - uint8_t part; /* part number string */ - uint8_t cores; /* cores per socket */ - uint8_t ecores; /* enabled cores */ - uint8_t threads; /* threads per socket */ - uint16_t cflags; /* processor characteristics */ - uint16_t family2; /* processor family 2 */ -} __packed; - -/* - * Physical Memory Array - */ -#define SMBIOS_MAL_SYSMB 0x03 /* system board or motherboard */ - -#define SMBIOS_MAU_SYSTEM 0x03 /* system memory */ - -#define SMBIOS_MAE_NONE 0x03 /* none */ - -struct smbios_table_type16 { - struct smbios_structure header; - uint8_t location; /* physical device location */ - uint8_t use; /* device functional purpose */ - uint8_t ecc; /* err detect/correct method */ - uint32_t size; /* max mem capacity in kb */ - uint16_t errhand; /* handle of error (if any) */ - uint16_t ndevs; /* num of slots or sockets */ - uint64_t xsize; /* max mem capacity in bytes */ -} __packed; - -/* - * Memory Device - */ -#define SMBIOS_MDFF_UNKNOWN 0x02 /* unknown */ - -#define SMBIOS_MDT_UNKNOWN 0x02 /* unknown */ - -#define SMBIOS_MDF_UNKNOWN 0x0004 /* unknown */ - -struct smbios_table_type17 { - struct smbios_structure header; - uint16_t arrayhand; /* handle of physl mem array */ - uint16_t errhand; /* handle of mem error data */ - uint16_t twidth; /* total width in bits */ - uint16_t dwidth; /* data width in bits */ - uint16_t size; /* size in bytes */ - uint8_t form; /* form factor */ - uint8_t set; /* set */ - uint8_t dloc; /* device locator string */ - uint8_t bloc; /* phys bank locator string */ - uint8_t type; /* memory type */ - uint16_t flags; /* memory characteristics */ - uint16_t maxspeed; /* maximum speed in mhz */ - uint8_t manufacturer; /* manufacturer string */ - uint8_t serial; /* serial number string */ - uint8_t asset; /* asset tag string */ - uint8_t part; /* part number string */ - uint8_t attributes; /* attributes */ - uint32_t xsize; /* extended size in mbs */ - uint16_t curspeed; /* current speed in mhz */ - uint16_t minvoltage; /* minimum voltage */ - uint16_t maxvoltage; /* maximum voltage */ - uint16_t curvoltage; /* configured voltage */ -} __packed; - -/* - * Memory Array Mapped Address - */ -struct smbios_table_type19 { - struct smbios_structure header; - uint32_t saddr; /* start phys addr in kb */ - uint32_t eaddr; /* end phys addr in kb */ - uint16_t arrayhand; /* physical mem array handle */ - uint8_t width; /* num of dev in row */ - uint64_t xsaddr; /* start phys addr in bytes */ - uint64_t xeaddr; /* end phys addr in bytes */ -} __packed; - -/* - * System Boot Information - */ -#define SMBIOS_BOOT_NORMAL 0 /* no errors detected */ - -struct smbios_table_type32 { - struct smbios_structure header; - uint8_t reserved[6]; - uint8_t status; /* boot status */ -} __packed; - -/* - * End-of-Table - */ -struct smbios_table_type127 { - struct smbios_structure header; -} __packed; - -struct smbios_table_type0 smbios_type0_template = { - { SMBIOS_TYPE_BIOS, sizeof (struct smbios_table_type0), 0 }, - 1, /* bios vendor string */ - 2, /* bios version string */ - 0xF000, /* bios address segment location */ - 3, /* bios release date */ - 0x0, /* bios size (64k * (n + 1) is the size in bytes) */ - SMBIOS_FL_ISA | SMBIOS_FL_PCI | SMBIOS_FL_SHADOW | - SMBIOS_FL_CDBOOT | SMBIOS_FL_EDD, - { SMBIOS_XB1_FL_ACPI, SMBIOS_XB2_FL_BBS | SMBIOS_XB2_FL_VM }, - 0x0, /* bios major release */ - 0x0, /* bios minor release */ - 0xff, /* embedded controller firmware major release */ - 0xff /* embedded controller firmware minor release */ -}; - -const char *smbios_type0_strings[] = { - "BHYVE", /* vendor string */ - "1.00", /* bios version string */ - "03/14/2014", /* bios release date string */ - NULL -}; - -struct smbios_table_type1 smbios_type1_template = { - { SMBIOS_TYPE_SYSTEM, sizeof (struct smbios_table_type1), 0 }, - 1, /* manufacturer string */ - 2, /* product string */ - 3, /* version string */ - 4, /* serial number string */ - { 0 }, - SMBIOS_WAKEUP_SWITCH, - 5, /* sku string */ - 6 /* family string */ -}; - -static int smbios_type1_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, - uint16_t *n, uint16_t *size); - -const char *smbios_type1_strings[] = { - " ", /* manufacturer string */ - "BHYVE", /* product name string */ - "1.0", /* version string */ - "None", /* serial number string */ - "None", /* sku string */ - " ", /* family name string */ - NULL -}; - -struct smbios_table_type3 smbios_type3_template = { - { SMBIOS_TYPE_CHASSIS, sizeof (struct smbios_table_type3), 0 }, - 1, /* manufacturer string */ - SMBIOS_CHT_UNKNOWN, - 2, /* version string */ - 3, /* serial number string */ - 4, /* asset tag string */ - SMBIOS_CHST_SAFE, - SMBIOS_CHST_SAFE, - SMBIOS_CHST_SAFE, - SMBIOS_CHSC_NONE, - 0, /* height in 'u's (0=enclosure height unspecified) */ - 0, /* number of power cords (0=number unspecified) */ - 0, /* number of contained element records */ - 0, /* length of records */ - 5 /* sku number string */ -}; - -const char *smbios_type3_strings[] = { - " ", /* manufacturer string */ - "1.0", /* version string */ - "None", /* serial number string */ - "None", /* asset tag string */ - "None", /* sku number string */ - NULL -}; - -struct smbios_table_type4 smbios_type4_template = { - { SMBIOS_TYPE_PROCESSOR, sizeof (struct smbios_table_type4), 0 }, - 1, /* socket designation string */ - SMBIOS_PRT_CENTRAL, - SMBIOS_PRF_OTHER, - 2, /* manufacturer string */ - 0, /* cpuid */ - 3, /* version string */ - 0, /* voltage */ - 0, /* external clock frequency in mhz (0=unknown) */ - 0, /* maximum frequency in mhz (0=unknown) */ - 0, /* current frequency in mhz (0=unknown) */ - SMBIOS_PRS_PRESENT | SMBIOS_PRS_ENABLED, - SMBIOS_PRU_NONE, - -1, /* l1 cache handle */ - -1, /* l2 cache handle */ - -1, /* l3 cache handle */ - 4, /* serial number string */ - 5, /* asset tag string */ - 6, /* part number string */ - 0, /* cores per socket (0=unknown) */ - 0, /* enabled cores per socket (0=unknown) */ - 0, /* threads per socket (0=unknown) */ - SMBIOS_PFL_64B, - SMBIOS_PRF_OTHER -}; - -const char *smbios_type4_strings[] = { - " ", /* socket designation string */ - " ", /* manufacturer string */ - " ", /* version string */ - "None", /* serial number string */ - "None", /* asset tag string */ - "None", /* part number string */ - NULL -}; - -static int smbios_type4_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, - uint16_t *n, uint16_t *size); - -struct smbios_table_type16 smbios_type16_template = { - { SMBIOS_TYPE_MEMARRAY, sizeof (struct smbios_table_type16), 0 }, - SMBIOS_MAL_SYSMB, - SMBIOS_MAU_SYSTEM, - SMBIOS_MAE_NONE, - 0x80000000, /* max mem capacity in kb (0x80000000=use extended) */ - -1, /* handle of error (if any) */ - 0, /* number of slots or sockets (TBD) */ - 0 /* extended maximum memory capacity in bytes (TBD) */ -}; - -static int smbios_type16_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, - uint16_t *n, uint16_t *size); - -struct smbios_table_type17 smbios_type17_template = { - { SMBIOS_TYPE_MEMDEVICE, sizeof (struct smbios_table_type17), 0 }, - -1, /* handle of physical memory array */ - -1, /* handle of memory error data */ - 64, /* total width in bits including ecc */ - 64, /* data width in bits */ - 0x7fff, /* size in bytes (0x7fff=use extended)*/ - SMBIOS_MDFF_UNKNOWN, - 0, /* set (0x00=none, 0xff=unknown) */ - 1, /* device locator string */ - 2, /* physical bank locator string */ - SMBIOS_MDT_UNKNOWN, - SMBIOS_MDF_UNKNOWN, - 0, /* maximum memory speed in mhz (0=unknown) */ - 3, /* manufacturer string */ - 4, /* serial number string */ - 5, /* asset tag string */ - 6, /* part number string */ - 0, /* attributes (0=unknown rank information) */ - 0, /* extended size in mb (TBD) */ - 0, /* current speed in mhz (0=unknown) */ - 0, /* minimum voltage in mv (0=unknown) */ - 0, /* maximum voltage in mv (0=unknown) */ - 0 /* configured voltage in mv (0=unknown) */ -}; - -const char *smbios_type17_strings[] = { - " ", /* device locator string */ - " ", /* physical bank locator string */ - " ", /* manufacturer string */ - "None", /* serial number string */ - "None", /* asset tag string */ - "None", /* part number string */ - NULL -}; - -static int smbios_type17_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, - uint16_t *n, uint16_t *size); - -struct smbios_table_type19 smbios_type19_template = { - { SMBIOS_TYPE_MEMARRAYMAP, sizeof (struct smbios_table_type19), 0 }, - 0xffffffff, /* starting phys addr in kb (0xffffffff=use ext) */ - 0xffffffff, /* ending phys addr in kb (0xffffffff=use ext) */ - -1, /* physical memory array handle */ - 1, /* number of devices that form a row */ - 0, /* extended starting phys addr in bytes (TDB) */ - 0 /* extended ending phys addr in bytes (TDB) */ -}; - -static int smbios_type19_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, - uint16_t *n, uint16_t *size); - -struct smbios_table_type32 smbios_type32_template = { - { SMBIOS_TYPE_BOOT, sizeof (struct smbios_table_type32), 0 }, - { 0, 0, 0, 0, 0, 0 }, - SMBIOS_BOOT_NORMAL -}; - -struct smbios_table_type127 smbios_type127_template = { - { SMBIOS_TYPE_EOT, sizeof (struct smbios_table_type127), 0 } -}; - -static int smbios_generic_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, - uint16_t *n, uint16_t *size); - -static struct smbios_template_entry smbios_template[] = { - { (struct smbios_structure *)&smbios_type0_template, - smbios_type0_strings, - smbios_generic_initializer }, - { (struct smbios_structure *)&smbios_type1_template, - smbios_type1_strings, - smbios_type1_initializer }, - { (struct smbios_structure *)&smbios_type3_template, - smbios_type3_strings, - smbios_generic_initializer }, - { (struct smbios_structure *)&smbios_type4_template, - smbios_type4_strings, - smbios_type4_initializer }, - { (struct smbios_structure *)&smbios_type16_template, - NULL, - smbios_type16_initializer }, - { (struct smbios_structure *)&smbios_type17_template, - smbios_type17_strings, - smbios_type17_initializer }, - { (struct smbios_structure *)&smbios_type19_template, - NULL, - smbios_type19_initializer }, - { (struct smbios_structure *)&smbios_type32_template, - NULL, - smbios_generic_initializer }, - { (struct smbios_structure *)&smbios_type127_template, - NULL, - smbios_generic_initializer }, - { NULL,NULL, NULL } -}; - -static uint64_t guest_lomem, guest_himem; -static uint16_t type16_handle; - -static int -smbios_generic_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, - uint16_t *n, uint16_t *size) -{ - struct smbios_structure *entry; - - memcpy(curaddr, template_entry, template_entry->length); - entry = (struct smbios_structure *)curaddr; - entry->handle = *n + 1; - curaddr += entry->length; - if (template_strings != NULL) { - int i; - - for (i = 0; template_strings[i] != NULL; i++) { - const char *string; - int len; - - string = template_strings[i]; - len = strlen(string) + 1; - memcpy(curaddr, string, len); - curaddr += len; - } - *curaddr = '\0'; - curaddr++; - } else { - /* Minimum string section is double nul */ - *curaddr = '\0'; - curaddr++; - *curaddr = '\0'; - curaddr++; - } - (*n)++; - *endaddr = curaddr; - - return (0); -} - -static int -smbios_type1_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, - uint16_t *n, uint16_t *size) -{ - struct smbios_table_type1 *type1; - - smbios_generic_initializer(template_entry, template_strings, - curaddr, endaddr, n, size); - type1 = (struct smbios_table_type1 *)curaddr; - - if (guest_uuid_str != NULL) { - uuid_t uuid; - uint32_t status; - - uuid_from_string(guest_uuid_str, &uuid, &status); - if (status != uuid_s_ok) - return (-1); - - uuid_enc_le(&type1->uuid, &uuid); - } else { - MD5_CTX mdctx; - u_char digest[16]; - char hostname[MAXHOSTNAMELEN]; - - /* - * Universally unique and yet reproducible are an - * oxymoron, however reproducible is desirable in - * this case. - */ - if (gethostname(hostname, sizeof(hostname))) - return (-1); - - MD5Init(&mdctx); - MD5Update(&mdctx, vmname, strlen(vmname)); - MD5Update(&mdctx, hostname, sizeof(hostname)); - MD5Final(digest, &mdctx); - - /* - * Set the variant and version number. - */ - digest[6] &= 0x0F; - digest[6] |= 0x30; /* version 3 */ - digest[8] &= 0x3F; - digest[8] |= 0x80; - - memcpy(&type1->uuid, digest, sizeof (digest)); - } - - return (0); -} - -static int -smbios_type4_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, - uint16_t *n, uint16_t *size) -{ - int i; - - for (i = 0; i < guest_ncpus; i++) { - struct smbios_table_type4 *type4; - char *p; - int nstrings, len; - - smbios_generic_initializer(template_entry, template_strings, - curaddr, endaddr, n, size); - type4 = (struct smbios_table_type4 *)curaddr; - p = curaddr + sizeof (struct smbios_table_type4); - nstrings = 0; - while (p < *endaddr - 1) { - if (*p++ == '\0') - nstrings++; - } - len = sprintf(*endaddr - 1, "CPU #%d", i) + 1; - *endaddr += len - 1; - *(*endaddr) = '\0'; - (*endaddr)++; - type4->socket = nstrings + 1; - curaddr = *endaddr; - } - - return (0); -} - -static int -smbios_type16_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, - uint16_t *n, uint16_t *size) -{ - struct smbios_table_type16 *type16; - - type16_handle = *n; - smbios_generic_initializer(template_entry, template_strings, - curaddr, endaddr, n, size); - type16 = (struct smbios_table_type16 *)curaddr; - type16->xsize = guest_lomem + guest_himem; - type16->ndevs = guest_himem > 0 ? 2 : 1; - - return (0); -} - -static int -smbios_type17_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, - uint16_t *n, uint16_t *size) -{ - struct smbios_table_type17 *type17; - - smbios_generic_initializer(template_entry, template_strings, - curaddr, endaddr, n, size); - type17 = (struct smbios_table_type17 *)curaddr; - type17->arrayhand = type16_handle; - type17->xsize = guest_lomem; - - if (guest_himem > 0) { - curaddr = *endaddr; - smbios_generic_initializer(template_entry, template_strings, - curaddr, endaddr, n, size); - type17 = (struct smbios_table_type17 *)curaddr; - type17->arrayhand = type16_handle; - type17->xsize = guest_himem; - } - - return (0); -} - -static int -smbios_type19_initializer(struct smbios_structure *template_entry, - const char **template_strings, char *curaddr, char **endaddr, - uint16_t *n, uint16_t *size) -{ - struct smbios_table_type19 *type19; - - smbios_generic_initializer(template_entry, template_strings, - curaddr, endaddr, n, size); - type19 = (struct smbios_table_type19 *)curaddr; - type19->arrayhand = type16_handle; - type19->xsaddr = 0; - type19->xeaddr = guest_lomem; - - if (guest_himem > 0) { - curaddr = *endaddr; - smbios_generic_initializer(template_entry, template_strings, - curaddr, endaddr, n, size); - type19 = (struct smbios_table_type19 *)curaddr; - type19->arrayhand = type16_handle; - type19->xsaddr = 4*GB; - type19->xeaddr = guest_himem; - } - - return (0); -} - -static void -smbios_ep_initializer(struct smbios_entry_point *smbios_ep, uint32_t staddr) -{ - memset(smbios_ep, 0, sizeof(*smbios_ep)); - memcpy(smbios_ep->eanchor, SMBIOS_ENTRY_EANCHOR, - SMBIOS_ENTRY_EANCHORLEN); - smbios_ep->eplen = 0x1F; - assert(sizeof (struct smbios_entry_point) == smbios_ep->eplen); - smbios_ep->major = 2; - smbios_ep->minor = 6; - smbios_ep->revision = 0; - memcpy(smbios_ep->ianchor, SMBIOS_ENTRY_IANCHOR, - SMBIOS_ENTRY_IANCHORLEN); - smbios_ep->staddr = staddr; - smbios_ep->bcdrev = 0x24; -} - -static void -smbios_ep_finalizer(struct smbios_entry_point *smbios_ep, uint16_t len, - uint16_t num, uint16_t maxssize) -{ - uint8_t checksum; - int i; - - smbios_ep->maxssize = maxssize; - smbios_ep->stlen = len; - smbios_ep->stnum = num; - - checksum = 0; - for (i = 0x10; i < 0x1f; i++) { - checksum -= ((uint8_t *)smbios_ep)[i]; - } - smbios_ep->ichecksum = checksum; - - checksum = 0; - for (i = 0; i < 0x1f; i++) { - checksum -= ((uint8_t *)smbios_ep)[i]; - } - smbios_ep->echecksum = checksum; -} - -int -smbios_build(struct vmctx *ctx) -{ - struct smbios_entry_point *smbios_ep; - uint16_t n; - uint16_t maxssize; - char *curaddr, *startaddr, *ststartaddr; - int i; - int err; - - guest_lomem = vm_get_lowmem_size(ctx); - guest_himem = vm_get_highmem_size(ctx); - - startaddr = paddr_guest2host(ctx, SMBIOS_BASE, SMBIOS_MAX_LENGTH); - if (startaddr == NULL) { - fprintf(stderr, "smbios table requires mapped mem\n"); - return (ENOMEM); - } - - curaddr = startaddr; - - smbios_ep = (struct smbios_entry_point *)curaddr; - smbios_ep_initializer(smbios_ep, SMBIOS_BASE + - sizeof(struct smbios_entry_point)); - curaddr += sizeof(struct smbios_entry_point); - ststartaddr = curaddr; - - n = 0; - maxssize = 0; - for (i = 0; smbios_template[i].entry != NULL; i++) { - struct smbios_structure *entry; - const char **strings; - initializer_func_t initializer; - char *endaddr; - uint16_t size; - - entry = smbios_template[i].entry; - strings = smbios_template[i].strings; - initializer = smbios_template[i].initializer; - - err = (*initializer)(entry, strings, curaddr, &endaddr, - &n, &size); - if (err != 0) - return (err); - - if (size > maxssize) - maxssize = size; - - curaddr = endaddr; - } - - assert(curaddr - startaddr < SMBIOS_MAX_LENGTH); - smbios_ep_finalizer(smbios_ep, curaddr - ststartaddr, n, maxssize); - - return (0); -} diff --git a/usr.sbin/bhyve/smbiostbl.h b/usr.sbin/bhyve/smbiostbl.h deleted file mode 100644 index e8b3a4f..0000000 --- a/usr.sbin/bhyve/smbiostbl.h +++ /dev/null @@ -1,36 +0,0 @@ -/*- - * Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _SMBIOSTBL_H_ -#define _SMBIOSTBL_H_ - -struct vmctx; - -int smbios_build(struct vmctx *ctx); - -#endif /* _SMBIOSTBL_H_ */ diff --git a/usr.sbin/bhyve/spinup_ap.c b/usr.sbin/bhyve/spinup_ap.c deleted file mode 100644 index c597023..0000000 --- a/usr.sbin/bhyve/spinup_ap.c +++ /dev/null @@ -1,104 +0,0 @@ -/*- - * Copyright (c) 2012 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/types.h> - -#include <machine/vmm.h> -#include <vmmapi.h> - -#include <stdio.h> -#include <stdlib.h> -#include <assert.h> - -#include "bhyverun.h" -#include "spinup_ap.h" - -static void -spinup_ap_realmode(struct vmctx *ctx, int newcpu, uint64_t *rip) -{ - int vector, error; - uint16_t cs; - uint64_t desc_base; - uint32_t desc_limit, desc_access; - - vector = *rip >> PAGE_SHIFT; - *rip = 0; - - /* - * Update the %cs and %rip of the guest so that it starts - * executing real mode code at at 'vector << 12'. - */ - error = vm_set_register(ctx, newcpu, VM_REG_GUEST_RIP, *rip); - assert(error == 0); - - error = vm_get_desc(ctx, newcpu, VM_REG_GUEST_CS, &desc_base, - &desc_limit, &desc_access); - assert(error == 0); - - desc_base = vector << PAGE_SHIFT; - error = vm_set_desc(ctx, newcpu, VM_REG_GUEST_CS, - desc_base, desc_limit, desc_access); - assert(error == 0); - - cs = (vector << PAGE_SHIFT) >> 4; - error = vm_set_register(ctx, newcpu, VM_REG_GUEST_CS, cs); - assert(error == 0); -} - -int -spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip) -{ - int error; - - assert(newcpu != 0); - assert(newcpu < guest_ncpus); - - error = vcpu_reset(ctx, newcpu); - assert(error == 0); - - fbsdrun_set_capabilities(ctx, newcpu); - - /* - * Enable the 'unrestricted guest' mode for 'newcpu'. - * - * Set up the processor state in power-on 16-bit mode, with the CS:IP - * init'd to the specified low-mem 4K page. - */ - error = vm_set_capability(ctx, newcpu, VM_CAP_UNRESTRICTED_GUEST, 1); - assert(error == 0); - - spinup_ap_realmode(ctx, newcpu, &rip); - - fbsdrun_addcpu(ctx, vcpu, newcpu, rip); - - return (newcpu); -} diff --git a/usr.sbin/bhyve/spinup_ap.h b/usr.sbin/bhyve/spinup_ap.h deleted file mode 100644 index 2749ee9..0000000 --- a/usr.sbin/bhyve/spinup_ap.h +++ /dev/null @@ -1,34 +0,0 @@ -/*- - * Copyright (c) 2012 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _SPINUP_AP_H_ -#define _SPINUP_AP_H_ - -int spinup_ap(struct vmctx *ctx, int vcpu, int newcpu, uint64_t rip); - -#endif diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c deleted file mode 100644 index 69dfaae..0000000 --- a/usr.sbin/bhyve/task_switch.c +++ /dev/null @@ -1,939 +0,0 @@ -/*- - * Copyright (c) 2014 Neel Natu <neel@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/_iovec.h> -#include <sys/mman.h> - -#include <x86/psl.h> -#include <x86/segments.h> -#include <x86/specialreg.h> -#include <machine/vmm.h> -#include <machine/vmm_instruction_emul.h> - -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <assert.h> -#include <errno.h> - -#include <vmmapi.h> - -#include "bhyverun.h" - -/* - * Using 'struct i386tss' is tempting but causes myriad sign extension - * issues because all of its fields are defined as signed integers. - */ -struct tss32 { - uint16_t tss_link; - uint16_t rsvd1; - uint32_t tss_esp0; - uint16_t tss_ss0; - uint16_t rsvd2; - uint32_t tss_esp1; - uint16_t tss_ss1; - uint16_t rsvd3; - uint32_t tss_esp2; - uint16_t tss_ss2; - uint16_t rsvd4; - uint32_t tss_cr3; - uint32_t tss_eip; - uint32_t tss_eflags; - uint32_t tss_eax; - uint32_t tss_ecx; - uint32_t tss_edx; - uint32_t tss_ebx; - uint32_t tss_esp; - uint32_t tss_ebp; - uint32_t tss_esi; - uint32_t tss_edi; - uint16_t tss_es; - uint16_t rsvd5; - uint16_t tss_cs; - uint16_t rsvd6; - uint16_t tss_ss; - uint16_t rsvd7; - uint16_t tss_ds; - uint16_t rsvd8; - uint16_t tss_fs; - uint16_t rsvd9; - uint16_t tss_gs; - uint16_t rsvd10; - uint16_t tss_ldt; - uint16_t rsvd11; - uint16_t tss_trap; - uint16_t tss_iomap; -}; -CTASSERT(sizeof(struct tss32) == 104); - -#define SEL_START(sel) (((sel) & ~0x7)) -#define SEL_LIMIT(sel) (((sel) | 0x7)) -#define TSS_BUSY(type) (((type) & 0x2) != 0) - -static uint64_t -GETREG(struct vmctx *ctx, int vcpu, int reg) -{ - uint64_t val; - int error; - - error = vm_get_register(ctx, vcpu, reg, &val); - assert(error == 0); - return (val); -} - -static void -SETREG(struct vmctx *ctx, int vcpu, int reg, uint64_t val) -{ - int error; - - error = vm_set_register(ctx, vcpu, reg, val); - assert(error == 0); -} - -static struct seg_desc -usd_to_seg_desc(struct user_segment_descriptor *usd) -{ - struct seg_desc seg_desc; - - seg_desc.base = (u_int)USD_GETBASE(usd); - if (usd->sd_gran) - seg_desc.limit = (u_int)(USD_GETLIMIT(usd) << 12) | 0xfff; - else - seg_desc.limit = (u_int)USD_GETLIMIT(usd); - seg_desc.access = usd->sd_type | usd->sd_dpl << 5 | usd->sd_p << 7; - seg_desc.access |= usd->sd_xx << 12; - seg_desc.access |= usd->sd_def32 << 14; - seg_desc.access |= usd->sd_gran << 15; - - return (seg_desc); -} - -/* - * Inject an exception with an error code that is a segment selector. - * The format of the error code is described in section 6.13, "Error Code", - * Intel SDM volume 3. - * - * Bit 0 (EXT) denotes whether the exception occurred during delivery - * of an external event like an interrupt. - * - * Bit 1 (IDT) indicates whether the selector points to a gate descriptor - * in the IDT. - * - * Bit 2(GDT/LDT) has the usual interpretation of Table Indicator (TI). - */ -static void -sel_exception(struct vmctx *ctx, int vcpu, int vector, uint16_t sel, int ext) -{ - /* - * Bit 2 from the selector is retained as-is in the error code. - * - * Bit 1 can be safely cleared because none of the selectors - * encountered during task switch emulation refer to a task - * gate in the IDT. - * - * Bit 0 is set depending on the value of 'ext'. - */ - sel &= ~0x3; - if (ext) - sel |= 0x1; - vm_inject_fault(ctx, vcpu, vector, 1, sel); -} - -/* - * Return 0 if the selector 'sel' in within the limits of the GDT/LDT - * and non-zero otherwise. - */ -static int -desc_table_limit_check(struct vmctx *ctx, int vcpu, uint16_t sel) -{ - uint64_t base; - uint32_t limit, access; - int error, reg; - - reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR; - error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access); - assert(error == 0); - - if (reg == VM_REG_GUEST_LDTR) { - if (SEG_DESC_UNUSABLE(access) || !SEG_DESC_PRESENT(access)) - return (-1); - } - - if (limit < SEL_LIMIT(sel)) - return (-1); - else - return (0); -} - -/* - * Read/write the segment descriptor 'desc' into the GDT/LDT slot referenced - * by the selector 'sel'. - * - * Returns 0 on success. - * Returns 1 if an exception was injected into the guest. - * Returns -1 otherwise. - */ -static int -desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint16_t sel, struct user_segment_descriptor *desc, bool doread, - int *faultptr) -{ - struct iovec iov[2]; - uint64_t base; - uint32_t limit, access; - int error, reg; - - reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR; - error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access); - assert(error == 0); - assert(limit >= SEL_LIMIT(sel)); - - error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel), - sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov), - faultptr); - if (error || *faultptr) - return (error); - - if (doread) - vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc)); - else - vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc)); - return (0); -} - -static int -desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) -{ - return (desc_table_rw(ctx, vcpu, paging, sel, desc, true, faultptr)); -} - -static int -desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) -{ - return (desc_table_rw(ctx, vcpu, paging, sel, desc, false, faultptr)); -} - -/* - * Read the TSS descriptor referenced by 'sel' into 'desc'. - * - * Returns 0 on success. - * Returns 1 if an exception was injected into the guest. - * Returns -1 otherwise. - */ -static int -read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, - uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) -{ - struct vm_guest_paging sup_paging; - int error; - - assert(!ISLDT(sel)); - assert(IDXSEL(sel) != 0); - - /* Fetch the new TSS descriptor */ - if (desc_table_limit_check(ctx, vcpu, sel)) { - if (ts->reason == TSR_IRET) - sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); - else - sel_exception(ctx, vcpu, IDT_GP, sel, ts->ext); - return (1); - } - - sup_paging = ts->paging; - sup_paging.cpl = 0; /* implicit supervisor mode */ - error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc, faultptr); - return (error); -} - -static bool -code_desc(int sd_type) -{ - /* code descriptor */ - return ((sd_type & 0x18) == 0x18); -} - -static bool -stack_desc(int sd_type) -{ - /* writable data descriptor */ - return ((sd_type & 0x1A) == 0x12); -} - -static bool -data_desc(int sd_type) -{ - /* data descriptor or a readable code descriptor */ - return ((sd_type & 0x18) == 0x10 || (sd_type & 0x1A) == 0x1A); -} - -static bool -ldt_desc(int sd_type) -{ - - return (sd_type == SDT_SYSLDT); -} - -/* - * Validate the descriptor 'seg_desc' associated with 'segment'. - */ -static int -validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, - int segment, struct seg_desc *seg_desc, int *faultptr) -{ - struct vm_guest_paging sup_paging; - struct user_segment_descriptor usd; - int error, idtvec; - int cpl, dpl, rpl; - uint16_t sel, cs; - bool ldtseg, codeseg, stackseg, dataseg, conforming; - - ldtseg = codeseg = stackseg = dataseg = false; - switch (segment) { - case VM_REG_GUEST_LDTR: - ldtseg = true; - break; - case VM_REG_GUEST_CS: - codeseg = true; - break; - case VM_REG_GUEST_SS: - stackseg = true; - break; - case VM_REG_GUEST_DS: - case VM_REG_GUEST_ES: - case VM_REG_GUEST_FS: - case VM_REG_GUEST_GS: - dataseg = true; - break; - default: - assert(0); - } - - /* Get the segment selector */ - sel = GETREG(ctx, vcpu, segment); - - /* LDT selector must point into the GDT */ - if (ldtseg && ISLDT(sel)) { - sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); - return (1); - } - - /* Descriptor table limit check */ - if (desc_table_limit_check(ctx, vcpu, sel)) { - sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); - return (1); - } - - /* NULL selector */ - if (IDXSEL(sel) == 0) { - /* Code and stack segment selectors cannot be NULL */ - if (codeseg || stackseg) { - sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); - return (1); - } - seg_desc->base = 0; - seg_desc->limit = 0; - seg_desc->access = 0x10000; /* unusable */ - return (0); - } - - /* Read the descriptor from the GDT/LDT */ - sup_paging = ts->paging; - sup_paging.cpl = 0; /* implicit supervisor mode */ - error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd, faultptr); - if (error || *faultptr) - return (error); - - /* Verify that the descriptor type is compatible with the segment */ - if ((ldtseg && !ldt_desc(usd.sd_type)) || - (codeseg && !code_desc(usd.sd_type)) || - (dataseg && !data_desc(usd.sd_type)) || - (stackseg && !stack_desc(usd.sd_type))) { - sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); - return (1); - } - - /* Segment must be marked present */ - if (!usd.sd_p) { - if (ldtseg) - idtvec = IDT_TS; - else if (stackseg) - idtvec = IDT_SS; - else - idtvec = IDT_NP; - sel_exception(ctx, vcpu, idtvec, sel, ts->ext); - return (1); - } - - cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS); - cpl = cs & SEL_RPL_MASK; - rpl = sel & SEL_RPL_MASK; - dpl = usd.sd_dpl; - - if (stackseg && (rpl != cpl || dpl != cpl)) { - sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); - return (1); - } - - if (codeseg) { - conforming = (usd.sd_type & 0x4) ? true : false; - if ((conforming && (cpl < dpl)) || - (!conforming && (cpl != dpl))) { - sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); - return (1); - } - } - - if (dataseg) { - /* - * A data segment is always non-conforming except when it's - * descriptor is a readable, conforming code segment. - */ - if (code_desc(usd.sd_type) && (usd.sd_type & 0x4) != 0) - conforming = true; - else - conforming = false; - - if (!conforming && (rpl > dpl || cpl > dpl)) { - sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); - return (1); - } - } - *seg_desc = usd_to_seg_desc(&usd); - return (0); -} - -static void -tss32_save(struct vmctx *ctx, int vcpu, struct vm_task_switch *task_switch, - uint32_t eip, struct tss32 *tss, struct iovec *iov) -{ - - /* General purpose registers */ - tss->tss_eax = GETREG(ctx, vcpu, VM_REG_GUEST_RAX); - tss->tss_ecx = GETREG(ctx, vcpu, VM_REG_GUEST_RCX); - tss->tss_edx = GETREG(ctx, vcpu, VM_REG_GUEST_RDX); - tss->tss_ebx = GETREG(ctx, vcpu, VM_REG_GUEST_RBX); - tss->tss_esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP); - tss->tss_ebp = GETREG(ctx, vcpu, VM_REG_GUEST_RBP); - tss->tss_esi = GETREG(ctx, vcpu, VM_REG_GUEST_RSI); - tss->tss_edi = GETREG(ctx, vcpu, VM_REG_GUEST_RDI); - - /* Segment selectors */ - tss->tss_es = GETREG(ctx, vcpu, VM_REG_GUEST_ES); - tss->tss_cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS); - tss->tss_ss = GETREG(ctx, vcpu, VM_REG_GUEST_SS); - tss->tss_ds = GETREG(ctx, vcpu, VM_REG_GUEST_DS); - tss->tss_fs = GETREG(ctx, vcpu, VM_REG_GUEST_FS); - tss->tss_gs = GETREG(ctx, vcpu, VM_REG_GUEST_GS); - - /* eflags and eip */ - tss->tss_eflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS); - if (task_switch->reason == TSR_IRET) - tss->tss_eflags &= ~PSL_NT; - tss->tss_eip = eip; - - /* Copy updated old TSS into guest memory */ - vm_copyout(ctx, vcpu, tss, iov, sizeof(struct tss32)); -} - -static void -update_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *sd) -{ - int error; - - error = vm_set_desc(ctx, vcpu, reg, sd->base, sd->limit, sd->access); - assert(error == 0); -} - -/* - * Update the vcpu registers to reflect the state of the new task. - */ -static int -tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, - uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr) -{ - struct seg_desc seg_desc, seg_desc2; - uint64_t *pdpte, maxphyaddr, reserved; - uint32_t eflags; - int error, i; - bool nested; - - nested = false; - if (ts->reason != TSR_IRET && ts->reason != TSR_JMP) { - tss->tss_link = ot_sel; - nested = true; - } - - eflags = tss->tss_eflags; - if (nested) - eflags |= PSL_NT; - - /* LDTR */ - SETREG(ctx, vcpu, VM_REG_GUEST_LDTR, tss->tss_ldt); - - /* PBDR */ - if (ts->paging.paging_mode != PAGING_MODE_FLAT) { - if (ts->paging.paging_mode == PAGING_MODE_PAE) { - /* - * XXX Assuming 36-bit MAXPHYADDR. - */ - maxphyaddr = (1UL << 36) - 1; - pdpte = paddr_guest2host(ctx, tss->tss_cr3 & ~0x1f, 32); - for (i = 0; i < 4; i++) { - /* Check reserved bits if the PDPTE is valid */ - if (!(pdpte[i] & 0x1)) - continue; - /* - * Bits 2:1, 8:5 and bits above the processor's - * maximum physical address are reserved. - */ - reserved = ~maxphyaddr | 0x1E6; - if (pdpte[i] & reserved) { - vm_inject_gp(ctx, vcpu); - return (1); - } - } - SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE0, pdpte[0]); - SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE1, pdpte[1]); - SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE2, pdpte[2]); - SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE3, pdpte[3]); - } - SETREG(ctx, vcpu, VM_REG_GUEST_CR3, tss->tss_cr3); - ts->paging.cr3 = tss->tss_cr3; - } - - /* eflags and eip */ - SETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS, eflags); - SETREG(ctx, vcpu, VM_REG_GUEST_RIP, tss->tss_eip); - - /* General purpose registers */ - SETREG(ctx, vcpu, VM_REG_GUEST_RAX, tss->tss_eax); - SETREG(ctx, vcpu, VM_REG_GUEST_RCX, tss->tss_ecx); - SETREG(ctx, vcpu, VM_REG_GUEST_RDX, tss->tss_edx); - SETREG(ctx, vcpu, VM_REG_GUEST_RBX, tss->tss_ebx); - SETREG(ctx, vcpu, VM_REG_GUEST_RSP, tss->tss_esp); - SETREG(ctx, vcpu, VM_REG_GUEST_RBP, tss->tss_ebp); - SETREG(ctx, vcpu, VM_REG_GUEST_RSI, tss->tss_esi); - SETREG(ctx, vcpu, VM_REG_GUEST_RDI, tss->tss_edi); - - /* Segment selectors */ - SETREG(ctx, vcpu, VM_REG_GUEST_ES, tss->tss_es); - SETREG(ctx, vcpu, VM_REG_GUEST_CS, tss->tss_cs); - SETREG(ctx, vcpu, VM_REG_GUEST_SS, tss->tss_ss); - SETREG(ctx, vcpu, VM_REG_GUEST_DS, tss->tss_ds); - SETREG(ctx, vcpu, VM_REG_GUEST_FS, tss->tss_fs); - SETREG(ctx, vcpu, VM_REG_GUEST_GS, tss->tss_gs); - - /* - * If this is a nested task then write out the new TSS to update - * the previous link field. - */ - if (nested) - vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss)); - - /* Validate segment descriptors */ - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc, - faultptr); - if (error || *faultptr) - return (error); - update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc); - - /* - * Section "Checks on Guest Segment Registers", Intel SDM, Vol 3. - * - * The SS and CS attribute checks on VM-entry are inter-dependent so - * we need to make sure that both segments are valid before updating - * either of them. This ensures that the VMCS state can pass the - * VM-entry checks so the guest can handle any exception injected - * during task switch emulation. - */ - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc, - faultptr); - if (error || *faultptr) - return (error); - - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2, - faultptr); - if (error || *faultptr) - return (error); - update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc); - update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2); - ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK; - - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc, - faultptr); - if (error || *faultptr) - return (error); - update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc); - - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc, - faultptr); - if (error || *faultptr) - return (error); - update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc); - - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc, - faultptr); - if (error || *faultptr) - return (error); - update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc); - - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc, - faultptr); - if (error || *faultptr) - return (error); - update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc); - - return (0); -} - -/* - * Push an error code on the stack of the new task. This is needed if the - * task switch was triggered by a hardware exception that causes an error - * code to be saved (e.g. #PF). - */ -static int -push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - int task_type, uint32_t errcode, int *faultptr) -{ - struct iovec iov[2]; - struct seg_desc seg_desc; - int stacksize, bytes, error; - uint64_t gla, cr0, rflags; - uint32_t esp; - uint16_t stacksel; - - *faultptr = 0; - - cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0); - rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS); - stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS); - - error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc.base, - &seg_desc.limit, &seg_desc.access); - assert(error == 0); - - /* - * Section "Error Code" in the Intel SDM vol 3: the error code is - * pushed on the stack as a doubleword or word (depending on the - * default interrupt, trap or task gate size). - */ - if (task_type == SDT_SYS386BSY || task_type == SDT_SYS386TSS) - bytes = 4; - else - bytes = 2; - - /* - * PUSH instruction from Intel SDM vol 2: the 'B' flag in the - * stack-segment descriptor determines the size of the stack - * pointer outside of 64-bit mode. - */ - if (SEG_DESC_DEF32(seg_desc.access)) - stacksize = 4; - else - stacksize = 2; - - esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP); - esp -= bytes; - - if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, - &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) { - sel_exception(ctx, vcpu, IDT_SS, stacksel, 1); - *faultptr = 1; - return (0); - } - - if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) { - vm_inject_ac(ctx, vcpu, 1); - *faultptr = 1; - return (0); - } - - error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE, - iov, nitems(iov), faultptr); - if (error || *faultptr) - return (error); - - vm_copyout(ctx, vcpu, &errcode, iov, bytes); - SETREG(ctx, vcpu, VM_REG_GUEST_RSP, esp); - return (0); -} - -/* - * Evaluate return value from helper functions and potentially return to - * the VM run loop. - */ -#define CHKERR(error,fault) \ - do { \ - assert((error == 0) || (error == EFAULT)); \ - if (error) \ - return (VMEXIT_ABORT); \ - else if (fault) \ - return (VMEXIT_CONTINUE); \ - } while (0) - -int -vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) -{ - struct seg_desc nt; - struct tss32 oldtss, newtss; - struct vm_task_switch *task_switch; - struct vm_guest_paging *paging, sup_paging; - struct user_segment_descriptor nt_desc, ot_desc; - struct iovec nt_iov[2], ot_iov[2]; - uint64_t cr0, ot_base; - uint32_t eip, ot_lim, access; - int error, ext, fault, minlimit, nt_type, ot_type, vcpu; - enum task_switch_reason reason; - uint16_t nt_sel, ot_sel; - - task_switch = &vmexit->u.task_switch; - nt_sel = task_switch->tsssel; - ext = vmexit->u.task_switch.ext; - reason = vmexit->u.task_switch.reason; - paging = &vmexit->u.task_switch.paging; - vcpu = *pvcpu; - - assert(paging->cpu_mode == CPU_MODE_PROTECTED); - - /* - * Calculate the instruction pointer to store in the old TSS. - */ - eip = vmexit->rip + vmexit->inst_length; - - /* - * Section 4.6, "Access Rights" in Intel SDM Vol 3. - * The following page table accesses are implicitly supervisor mode: - * - accesses to GDT or LDT to load segment descriptors - * - accesses to the task state segment during task switch - */ - sup_paging = *paging; - sup_paging.cpl = 0; /* implicit supervisor mode */ - - /* Fetch the new TSS descriptor */ - error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc, - &fault); - CHKERR(error, fault); - - nt = usd_to_seg_desc(&nt_desc); - - /* Verify the type of the new TSS */ - nt_type = SEG_DESC_TYPE(nt.access); - if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS && - nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) { - sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext); - goto done; - } - - /* TSS descriptor must have present bit set */ - if (!SEG_DESC_PRESENT(nt.access)) { - sel_exception(ctx, vcpu, IDT_NP, nt_sel, ext); - goto done; - } - - /* - * TSS must have a minimum length of 104 bytes for a 32-bit TSS and - * 44 bytes for a 16-bit TSS. - */ - if (nt_type == SDT_SYS386BSY || nt_type == SDT_SYS386TSS) - minlimit = 104 - 1; - else if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) - minlimit = 44 - 1; - else - minlimit = 0; - - assert(minlimit > 0); - if (nt.limit < minlimit) { - sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext); - goto done; - } - - /* TSS must be busy if task switch is due to IRET */ - if (reason == TSR_IRET && !TSS_BUSY(nt_type)) { - sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext); - goto done; - } - - /* - * TSS must be available (not busy) if task switch reason is - * CALL, JMP, exception or interrupt. - */ - if (reason != TSR_IRET && TSS_BUSY(nt_type)) { - sel_exception(ctx, vcpu, IDT_GP, nt_sel, ext); - goto done; - } - - /* Fetch the new TSS */ - error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1, - PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault); - CHKERR(error, fault); - vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1); - - /* Get the old TSS selector from the guest's task register */ - ot_sel = GETREG(ctx, vcpu, VM_REG_GUEST_TR); - if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) { - /* - * This might happen if a task switch was attempted without - * ever loading the task register with LTR. In this case the - * TR would contain the values from power-on: - * (sel = 0, base = 0, limit = 0xffff). - */ - sel_exception(ctx, vcpu, IDT_TS, ot_sel, task_switch->ext); - goto done; - } - - /* Get the old TSS base and limit from the guest's task register */ - error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim, - &access); - assert(error == 0); - assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access)); - ot_type = SEG_DESC_TYPE(access); - assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY); - - /* Fetch the old TSS descriptor */ - error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc, - &fault); - CHKERR(error, fault); - - /* Get the old TSS */ - error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1, - PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault); - CHKERR(error, fault); - vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1); - - /* - * Clear the busy bit in the old TSS descriptor if the task switch - * due to an IRET or JMP instruction. - */ - if (reason == TSR_IRET || reason == TSR_JMP) { - ot_desc.sd_type &= ~0x2; - error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel, - &ot_desc, &fault); - CHKERR(error, fault); - } - - if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) { - fprintf(stderr, "Task switch to 16-bit TSS not supported\n"); - return (VMEXIT_ABORT); - } - - /* Save processor state in old TSS */ - tss32_save(ctx, vcpu, task_switch, eip, &oldtss, ot_iov); - - /* - * If the task switch was triggered for any reason other than IRET - * then set the busy bit in the new TSS descriptor. - */ - if (reason != TSR_IRET) { - nt_desc.sd_type |= 0x2; - error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel, - &nt_desc, &fault); - CHKERR(error, fault); - } - - /* Update task register to point at the new TSS */ - SETREG(ctx, vcpu, VM_REG_GUEST_TR, nt_sel); - - /* Update the hidden descriptor state of the task register */ - nt = usd_to_seg_desc(&nt_desc); - update_seg_desc(ctx, vcpu, VM_REG_GUEST_TR, &nt); - - /* Set CR0.TS */ - cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0); - SETREG(ctx, vcpu, VM_REG_GUEST_CR0, cr0 | CR0_TS); - - /* - * We are now committed to the task switch. Any exceptions encountered - * after this point will be handled in the context of the new task and - * the saved instruction pointer will belong to the new task. - */ - error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, newtss.tss_eip); - assert(error == 0); - - /* Load processor state from new TSS */ - error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov, - &fault); - CHKERR(error, fault); - - /* - * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception - * caused an error code to be generated, this error code is copied - * to the stack of the new task. - */ - if (task_switch->errcode_valid) { - assert(task_switch->ext); - assert(task_switch->reason == TSR_IDT_GATE); - error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type, - task_switch->errcode, &fault); - CHKERR(error, fault); - } - - /* - * Treatment of virtual-NMI blocking if NMI is delivered through - * a task gate. - * - * Section "Architectural State Before A VM Exit", Intel SDM, Vol3: - * If the virtual NMIs VM-execution control is 1, VM entry injects - * an NMI, and delivery of the NMI causes a task switch that causes - * a VM exit, virtual-NMI blocking is in effect before the VM exit - * commences. - * - * Thus, virtual-NMI blocking is in effect at the time of the task - * switch VM exit. - */ - - /* - * Treatment of virtual-NMI unblocking on IRET from NMI handler task. - * - * Section "Changes to Instruction Behavior in VMX Non-Root Operation" - * If "virtual NMIs" control is 1 IRET removes any virtual-NMI blocking. - * This unblocking of virtual-NMI occurs even if IRET causes a fault. - * - * Thus, virtual-NMI blocking is cleared at the time of the task switch - * VM exit. - */ - - /* - * If the task switch was triggered by an event delivered through - * the IDT then extinguish the pending event from the vcpu's - * exitintinfo. - */ - if (task_switch->reason == TSR_IDT_GATE) { - error = vm_set_intinfo(ctx, vcpu, 0); - assert(error == 0); - } - - /* - * XXX should inject debug exception if 'T' bit is 1 - */ -done: - return (VMEXIT_CONTINUE); -} diff --git a/usr.sbin/bhyve/uart_emul.c b/usr.sbin/bhyve/uart_emul.c deleted file mode 100644 index 538bf58..0000000 --- a/usr.sbin/bhyve/uart_emul.c +++ /dev/null @@ -1,674 +0,0 @@ -/*- - * Copyright (c) 2012 NetApp, Inc. - * Copyright (c) 2013 Neel Natu <neel@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> -#include <dev/ic/ns16550.h> - -#include <stdio.h> -#include <stdlib.h> -#include <assert.h> -#include <fcntl.h> -#include <termios.h> -#include <unistd.h> -#include <stdbool.h> -#include <string.h> -#include <pthread.h> - -#include "mevent.h" -#include "uart_emul.h" - -#define COM1_BASE 0x3F8 -#define COM1_IRQ 4 -#define COM2_BASE 0x2F8 -#define COM2_IRQ 3 - -#define DEFAULT_RCLK 1843200 -#define DEFAULT_BAUD 9600 - -#define FCR_RX_MASK 0xC0 - -#define MCR_OUT1 0x04 -#define MCR_OUT2 0x08 - -#define MSR_DELTA_MASK 0x0f - -#ifndef REG_SCR -#define REG_SCR com_scr -#endif - -#define FIFOSZ 16 - -static bool uart_stdio; /* stdio in use for i/o */ -static struct termios tio_stdio_orig; - -static struct { - int baseaddr; - int irq; - bool inuse; -} uart_lres[] = { - { COM1_BASE, COM1_IRQ, false}, - { COM2_BASE, COM2_IRQ, false}, -}; - -#define UART_NLDEVS (sizeof(uart_lres) / sizeof(uart_lres[0])) - -struct fifo { - uint8_t buf[FIFOSZ]; - int rindex; /* index to read from */ - int windex; /* index to write to */ - int num; /* number of characters in the fifo */ - int size; /* size of the fifo */ -}; - -struct ttyfd { - bool opened; - int fd; /* tty device file descriptor */ - struct termios tio_orig, tio_new; /* I/O Terminals */ -}; - -struct uart_softc { - pthread_mutex_t mtx; /* protects all softc elements */ - uint8_t data; /* Data register (R/W) */ - uint8_t ier; /* Interrupt enable register (R/W) */ - uint8_t lcr; /* Line control register (R/W) */ - uint8_t mcr; /* Modem control register (R/W) */ - uint8_t lsr; /* Line status register (R/W) */ - uint8_t msr; /* Modem status register (R/W) */ - uint8_t fcr; /* FIFO control register (W) */ - uint8_t scr; /* Scratch register (R/W) */ - - uint8_t dll; /* Baudrate divisor latch LSB */ - uint8_t dlh; /* Baudrate divisor latch MSB */ - - struct fifo rxfifo; - struct mevent *mev; - - struct ttyfd tty; - bool thre_int_pending; /* THRE interrupt pending */ - - void *arg; - uart_intr_func_t intr_assert; - uart_intr_func_t intr_deassert; -}; - -static void uart_drain(int fd, enum ev_type ev, void *arg); - -static void -ttyclose(void) -{ - - tcsetattr(STDIN_FILENO, TCSANOW, &tio_stdio_orig); -} - -static void -ttyopen(struct ttyfd *tf) -{ - - tcgetattr(tf->fd, &tf->tio_orig); - - tf->tio_new = tf->tio_orig; - cfmakeraw(&tf->tio_new); - tf->tio_new.c_cflag |= CLOCAL; - tcsetattr(tf->fd, TCSANOW, &tf->tio_new); - - if (tf->fd == STDIN_FILENO) { - tio_stdio_orig = tf->tio_orig; - atexit(ttyclose); - } -} - -static int -ttyread(struct ttyfd *tf) -{ - unsigned char rb; - - if (read(tf->fd, &rb, 1) == 1) - return (rb); - else - return (-1); -} - -static void -ttywrite(struct ttyfd *tf, unsigned char wb) -{ - - (void)write(tf->fd, &wb, 1); -} - -static void -rxfifo_reset(struct uart_softc *sc, int size) -{ - char flushbuf[32]; - struct fifo *fifo; - ssize_t nread; - int error; - - fifo = &sc->rxfifo; - bzero(fifo, sizeof(struct fifo)); - fifo->size = size; - - if (sc->tty.opened) { - /* - * Flush any unread input from the tty buffer. - */ - while (1) { - nread = read(sc->tty.fd, flushbuf, sizeof(flushbuf)); - if (nread != sizeof(flushbuf)) - break; - } - - /* - * Enable mevent to trigger when new characters are available - * on the tty fd. - */ - error = mevent_enable(sc->mev); - assert(error == 0); - } -} - -static int -rxfifo_available(struct uart_softc *sc) -{ - struct fifo *fifo; - - fifo = &sc->rxfifo; - return (fifo->num < fifo->size); -} - -static int -rxfifo_putchar(struct uart_softc *sc, uint8_t ch) -{ - struct fifo *fifo; - int error; - - fifo = &sc->rxfifo; - - if (fifo->num < fifo->size) { - fifo->buf[fifo->windex] = ch; - fifo->windex = (fifo->windex + 1) % fifo->size; - fifo->num++; - if (!rxfifo_available(sc)) { - if (sc->tty.opened) { - /* - * Disable mevent callback if the FIFO is full. - */ - error = mevent_disable(sc->mev); - assert(error == 0); - } - } - return (0); - } else - return (-1); -} - -static int -rxfifo_getchar(struct uart_softc *sc) -{ - struct fifo *fifo; - int c, error, wasfull; - - wasfull = 0; - fifo = &sc->rxfifo; - if (fifo->num > 0) { - if (!rxfifo_available(sc)) - wasfull = 1; - c = fifo->buf[fifo->rindex]; - fifo->rindex = (fifo->rindex + 1) % fifo->size; - fifo->num--; - if (wasfull) { - if (sc->tty.opened) { - error = mevent_enable(sc->mev); - assert(error == 0); - } - } - return (c); - } else - return (-1); -} - -static int -rxfifo_numchars(struct uart_softc *sc) -{ - struct fifo *fifo = &sc->rxfifo; - - return (fifo->num); -} - -static void -uart_opentty(struct uart_softc *sc) -{ - - ttyopen(&sc->tty); - sc->mev = mevent_add(sc->tty.fd, EVF_READ, uart_drain, sc); - assert(sc->mev != NULL); -} - -static uint8_t -modem_status(uint8_t mcr) -{ - uint8_t msr; - - if (mcr & MCR_LOOPBACK) { - /* - * In the loopback mode certain bits from the MCR are - * reflected back into MSR. - */ - msr = 0; - if (mcr & MCR_RTS) - msr |= MSR_CTS; - if (mcr & MCR_DTR) - msr |= MSR_DSR; - if (mcr & MCR_OUT1) - msr |= MSR_RI; - if (mcr & MCR_OUT2) - msr |= MSR_DCD; - } else { - /* - * Always assert DCD and DSR so tty open doesn't block - * even if CLOCAL is turned off. - */ - msr = MSR_DCD | MSR_DSR; - } - assert((msr & MSR_DELTA_MASK) == 0); - - return (msr); -} - -/* - * The IIR returns a prioritized interrupt reason: - * - receive data available - * - transmit holding register empty - * - modem status change - * - * Return an interrupt reason if one is available. - */ -static int -uart_intr_reason(struct uart_softc *sc) -{ - - if ((sc->lsr & LSR_OE) != 0 && (sc->ier & IER_ERLS) != 0) - return (IIR_RLS); - else if (rxfifo_numchars(sc) > 0 && (sc->ier & IER_ERXRDY) != 0) - return (IIR_RXTOUT); - else if (sc->thre_int_pending && (sc->ier & IER_ETXRDY) != 0) - return (IIR_TXRDY); - else if ((sc->msr & MSR_DELTA_MASK) != 0 && (sc->ier & IER_EMSC) != 0) - return (IIR_MLSC); - else - return (IIR_NOPEND); -} - -static void -uart_reset(struct uart_softc *sc) -{ - uint16_t divisor; - - divisor = DEFAULT_RCLK / DEFAULT_BAUD / 16; - sc->dll = divisor; - sc->dlh = divisor >> 16; - sc->msr = modem_status(sc->mcr); - - rxfifo_reset(sc, 1); /* no fifo until enabled by software */ -} - -/* - * Toggle the COM port's intr pin depending on whether or not we have an - * interrupt condition to report to the processor. - */ -static void -uart_toggle_intr(struct uart_softc *sc) -{ - uint8_t intr_reason; - - intr_reason = uart_intr_reason(sc); - - if (intr_reason == IIR_NOPEND) - (*sc->intr_deassert)(sc->arg); - else - (*sc->intr_assert)(sc->arg); -} - -static void -uart_drain(int fd, enum ev_type ev, void *arg) -{ - struct uart_softc *sc; - int ch; - - sc = arg; - - assert(fd == sc->tty.fd); - assert(ev == EVF_READ); - - /* - * This routine is called in the context of the mevent thread - * to take out the softc lock to protect against concurrent - * access from a vCPU i/o exit - */ - pthread_mutex_lock(&sc->mtx); - - if ((sc->mcr & MCR_LOOPBACK) != 0) { - (void) ttyread(&sc->tty); - } else { - while (rxfifo_available(sc) && - ((ch = ttyread(&sc->tty)) != -1)) { - rxfifo_putchar(sc, ch); - } - uart_toggle_intr(sc); - } - - pthread_mutex_unlock(&sc->mtx); -} - -void -uart_write(struct uart_softc *sc, int offset, uint8_t value) -{ - int fifosz; - uint8_t msr; - - pthread_mutex_lock(&sc->mtx); - - /* - * Take care of the special case DLAB accesses first - */ - if ((sc->lcr & LCR_DLAB) != 0) { - if (offset == REG_DLL) { - sc->dll = value; - goto done; - } - - if (offset == REG_DLH) { - sc->dlh = value; - goto done; - } - } - - switch (offset) { - case REG_DATA: - if (sc->mcr & MCR_LOOPBACK) { - if (rxfifo_putchar(sc, value) != 0) - sc->lsr |= LSR_OE; - } else if (sc->tty.opened) { - ttywrite(&sc->tty, value); - } /* else drop on floor */ - sc->thre_int_pending = true; - break; - case REG_IER: - /* - * Apply mask so that bits 4-7 are 0 - * Also enables bits 0-3 only if they're 1 - */ - sc->ier = value & 0x0F; - break; - case REG_FCR: - /* - * When moving from FIFO and 16450 mode and vice versa, - * the FIFO contents are reset. - */ - if ((sc->fcr & FCR_ENABLE) ^ (value & FCR_ENABLE)) { - fifosz = (value & FCR_ENABLE) ? FIFOSZ : 1; - rxfifo_reset(sc, fifosz); - } - - /* - * The FCR_ENABLE bit must be '1' for the programming - * of other FCR bits to be effective. - */ - if ((value & FCR_ENABLE) == 0) { - sc->fcr = 0; - } else { - if ((value & FCR_RCV_RST) != 0) - rxfifo_reset(sc, FIFOSZ); - - sc->fcr = value & - (FCR_ENABLE | FCR_DMA | FCR_RX_MASK); - } - break; - case REG_LCR: - sc->lcr = value; - break; - case REG_MCR: - /* Apply mask so that bits 5-7 are 0 */ - sc->mcr = value & 0x1F; - msr = modem_status(sc->mcr); - - /* - * Detect if there has been any change between the - * previous and the new value of MSR. If there is - * then assert the appropriate MSR delta bit. - */ - if ((msr & MSR_CTS) ^ (sc->msr & MSR_CTS)) - sc->msr |= MSR_DCTS; - if ((msr & MSR_DSR) ^ (sc->msr & MSR_DSR)) - sc->msr |= MSR_DDSR; - if ((msr & MSR_DCD) ^ (sc->msr & MSR_DCD)) - sc->msr |= MSR_DDCD; - if ((sc->msr & MSR_RI) != 0 && (msr & MSR_RI) == 0) - sc->msr |= MSR_TERI; - - /* - * Update the value of MSR while retaining the delta - * bits. - */ - sc->msr &= MSR_DELTA_MASK; - sc->msr |= msr; - break; - case REG_LSR: - /* - * Line status register is not meant to be written to - * during normal operation. - */ - break; - case REG_MSR: - /* - * As far as I can tell MSR is a read-only register. - */ - break; - case REG_SCR: - sc->scr = value; - break; - default: - break; - } - -done: - uart_toggle_intr(sc); - pthread_mutex_unlock(&sc->mtx); -} - -uint8_t -uart_read(struct uart_softc *sc, int offset) -{ - uint8_t iir, intr_reason, reg; - - pthread_mutex_lock(&sc->mtx); - - /* - * Take care of the special case DLAB accesses first - */ - if ((sc->lcr & LCR_DLAB) != 0) { - if (offset == REG_DLL) { - reg = sc->dll; - goto done; - } - - if (offset == REG_DLH) { - reg = sc->dlh; - goto done; - } - } - - switch (offset) { - case REG_DATA: - reg = rxfifo_getchar(sc); - break; - case REG_IER: - reg = sc->ier; - break; - case REG_IIR: - iir = (sc->fcr & FCR_ENABLE) ? IIR_FIFO_MASK : 0; - - intr_reason = uart_intr_reason(sc); - - /* - * Deal with side effects of reading the IIR register - */ - if (intr_reason == IIR_TXRDY) - sc->thre_int_pending = false; - - iir |= intr_reason; - - reg = iir; - break; - case REG_LCR: - reg = sc->lcr; - break; - case REG_MCR: - reg = sc->mcr; - break; - case REG_LSR: - /* Transmitter is always ready for more data */ - sc->lsr |= LSR_TEMT | LSR_THRE; - - /* Check for new receive data */ - if (rxfifo_numchars(sc) > 0) - sc->lsr |= LSR_RXRDY; - else - sc->lsr &= ~LSR_RXRDY; - - reg = sc->lsr; - - /* The LSR_OE bit is cleared on LSR read */ - sc->lsr &= ~LSR_OE; - break; - case REG_MSR: - /* - * MSR delta bits are cleared on read - */ - reg = sc->msr; - sc->msr &= ~MSR_DELTA_MASK; - break; - case REG_SCR: - reg = sc->scr; - break; - default: - reg = 0xFF; - break; - } - -done: - uart_toggle_intr(sc); - pthread_mutex_unlock(&sc->mtx); - - return (reg); -} - -int -uart_legacy_alloc(int which, int *baseaddr, int *irq) -{ - - if (which < 0 || which >= UART_NLDEVS || uart_lres[which].inuse) - return (-1); - - uart_lres[which].inuse = true; - *baseaddr = uart_lres[which].baseaddr; - *irq = uart_lres[which].irq; - - return (0); -} - -struct uart_softc * -uart_init(uart_intr_func_t intr_assert, uart_intr_func_t intr_deassert, - void *arg) -{ - struct uart_softc *sc; - - sc = calloc(1, sizeof(struct uart_softc)); - - sc->arg = arg; - sc->intr_assert = intr_assert; - sc->intr_deassert = intr_deassert; - - pthread_mutex_init(&sc->mtx, NULL); - - uart_reset(sc); - - return (sc); -} - -static int -uart_tty_backend(struct uart_softc *sc, const char *opts) -{ - int fd; - int retval; - - retval = -1; - - fd = open(opts, O_RDWR | O_NONBLOCK); - if (fd > 0 && isatty(fd)) { - sc->tty.fd = fd; - sc->tty.opened = true; - retval = 0; - } - - return (retval); -} - -int -uart_set_backend(struct uart_softc *sc, const char *opts) -{ - int retval; - - retval = -1; - - if (opts == NULL) - return (0); - - if (strcmp("stdio", opts) == 0) { - if (!uart_stdio) { - sc->tty.fd = STDIN_FILENO; - sc->tty.opened = true; - uart_stdio = true; - retval = 0; - } - } else if (uart_tty_backend(sc, opts) == 0) { - retval = 0; - } - - /* Make the backend file descriptor non-blocking */ - if (retval == 0) - retval = fcntl(sc->tty.fd, F_SETFL, O_NONBLOCK); - - if (retval == 0) - uart_opentty(sc); - - return (retval); -} diff --git a/usr.sbin/bhyve/uart_emul.h b/usr.sbin/bhyve/uart_emul.h deleted file mode 100644 index 993b92e..0000000 --- a/usr.sbin/bhyve/uart_emul.h +++ /dev/null @@ -1,45 +0,0 @@ -/*- - * Copyright (c) 2013 Neel Natu <neel@freebsd.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _UART_EMUL_H_ -#define _UART_EMUL_H_ - - -#define UART_IO_BAR_SIZE 8 - -struct uart_softc; - -typedef void (*uart_intr_func_t)(void *arg); -struct uart_softc *uart_init(uart_intr_func_t intr_assert, - uart_intr_func_t intr_deassert, void *arg); - -int uart_legacy_alloc(int unit, int *ioaddr, int *irq); -uint8_t uart_read(struct uart_softc *sc, int offset); -void uart_write(struct uart_softc *sc, int offset, uint8_t value); -int uart_set_backend(struct uart_softc *sc, const char *opt); -#endif diff --git a/usr.sbin/bhyve/virtio.c b/usr.sbin/bhyve/virtio.c deleted file mode 100644 index 11b1e62..0000000 --- a/usr.sbin/bhyve/virtio.c +++ /dev/null @@ -1,777 +0,0 @@ -/*- - * Copyright (c) 2013 Chris Torek <torek @ torek net> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/uio.h> - -#include <stdio.h> -#include <stdint.h> -#include <pthread.h> -#include <pthread_np.h> - -#include "bhyverun.h" -#include "pci_emul.h" -#include "virtio.h" - -/* - * Functions for dealing with generalized "virtual devices" as - * defined by <https://www.google.com/#output=search&q=virtio+spec> - */ - -/* - * In case we decide to relax the "virtio softc comes at the - * front of virtio-based device softc" constraint, let's use - * this to convert. - */ -#define DEV_SOFTC(vs) ((void *)(vs)) - -/* - * Link a virtio_softc to its constants, the device softc, and - * the PCI emulation. - */ -void -vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc, - void *dev_softc, struct pci_devinst *pi, - struct vqueue_info *queues) -{ - int i; - - /* vs and dev_softc addresses must match */ - assert((void *)vs == dev_softc); - vs->vs_vc = vc; - vs->vs_pi = pi; - pi->pi_arg = vs; - - vs->vs_queues = queues; - for (i = 0; i < vc->vc_nvq; i++) { - queues[i].vq_vs = vs; - queues[i].vq_num = i; - } -} - -/* - * Reset device (device-wide). This erases all queues, i.e., - * all the queues become invalid (though we don't wipe out the - * internal pointers, we just clear the VQ_ALLOC flag). - * - * It resets negotiated features to "none". - * - * If MSI-X is enabled, this also resets all the vectors to NO_VECTOR. - */ -void -vi_reset_dev(struct virtio_softc *vs) -{ - struct vqueue_info *vq; - int i, nvq; - - if (vs->vs_mtx) - assert(pthread_mutex_isowned_np(vs->vs_mtx)); - - nvq = vs->vs_vc->vc_nvq; - for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) { - vq->vq_flags = 0; - vq->vq_last_avail = 0; - vq->vq_save_used = 0; - vq->vq_pfn = 0; - vq->vq_msix_idx = VIRTIO_MSI_NO_VECTOR; - } - vs->vs_negotiated_caps = 0; - vs->vs_curq = 0; - /* vs->vs_status = 0; -- redundant */ - if (vs->vs_isr) - pci_lintr_deassert(vs->vs_pi); - vs->vs_isr = 0; - vs->vs_msix_cfg_idx = VIRTIO_MSI_NO_VECTOR; -} - -/* - * Set I/O BAR (usually 0) to map PCI config registers. - */ -void -vi_set_io_bar(struct virtio_softc *vs, int barnum) -{ - size_t size; - - /* - * ??? should we use CFG0 if MSI-X is disabled? - * Existing code did not... - */ - size = VTCFG_R_CFG1 + vs->vs_vc->vc_cfgsize; - pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_IO, size); -} - -/* - * Initialize MSI-X vector capabilities if we're to use MSI-X, - * or MSI capabilities if not. - * - * We assume we want one MSI-X vector per queue, here, plus one - * for the config vec. - */ -int -vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix) -{ - int nvec; - - if (use_msix) { - vs->vs_flags |= VIRTIO_USE_MSIX; - VS_LOCK(vs); - vi_reset_dev(vs); /* set all vectors to NO_VECTOR */ - VS_UNLOCK(vs); - nvec = vs->vs_vc->vc_nvq + 1; - if (pci_emul_add_msixcap(vs->vs_pi, nvec, barnum)) - return (1); - } else - vs->vs_flags &= ~VIRTIO_USE_MSIX; - - /* Only 1 MSI vector for bhyve */ - pci_emul_add_msicap(vs->vs_pi, 1); - - /* Legacy interrupts are mandatory for virtio devices */ - pci_lintr_request(vs->vs_pi); - - return (0); -} - -/* - * Initialize the currently-selected virtio queue (vs->vs_curq). - * The guest just gave us a page frame number, from which we can - * calculate the addresses of the queue. - */ -void -vi_vq_init(struct virtio_softc *vs, uint32_t pfn) -{ - struct vqueue_info *vq; - uint64_t phys; - size_t size; - char *base; - - vq = &vs->vs_queues[vs->vs_curq]; - vq->vq_pfn = pfn; - phys = (uint64_t)pfn << VRING_PFN; - size = vring_size(vq->vq_qsize); - base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size); - - /* First page(s) are descriptors... */ - vq->vq_desc = (struct virtio_desc *)base; - base += vq->vq_qsize * sizeof(struct virtio_desc); - - /* ... immediately followed by "avail" ring (entirely uint16_t's) */ - vq->vq_avail = (struct vring_avail *)base; - base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t); - - /* Then it's rounded up to the next page... */ - base = (char *)roundup2((uintptr_t)base, VRING_ALIGN); - - /* ... and the last page(s) are the used ring. */ - vq->vq_used = (struct vring_used *)base; - - /* Mark queue as allocated, and start at 0 when we use it. */ - vq->vq_flags = VQ_ALLOC; - vq->vq_last_avail = 0; - vq->vq_save_used = 0; -} - -/* - * Helper inline for vq_getchain(): record the i'th "real" - * descriptor. - */ -static inline void -_vq_record(int i, volatile struct virtio_desc *vd, struct vmctx *ctx, - struct iovec *iov, int n_iov, uint16_t *flags) { - - if (i >= n_iov) - return; - iov[i].iov_base = paddr_guest2host(ctx, vd->vd_addr, vd->vd_len); - iov[i].iov_len = vd->vd_len; - if (flags != NULL) - flags[i] = vd->vd_flags; -} -#define VQ_MAX_DESCRIPTORS 512 /* see below */ - -/* - * Examine the chain of descriptors starting at the "next one" to - * make sure that they describe a sensible request. If so, return - * the number of "real" descriptors that would be needed/used in - * acting on this request. This may be smaller than the number of - * available descriptors, e.g., if there are two available but - * they are two separate requests, this just returns 1. Or, it - * may be larger: if there are indirect descriptors involved, - * there may only be one descriptor available but it may be an - * indirect pointing to eight more. We return 8 in this case, - * i.e., we do not count the indirect descriptors, only the "real" - * ones. - * - * Basically, this vets the vd_flags and vd_next field of each - * descriptor and tells you how many are involved. Since some may - * be indirect, this also needs the vmctx (in the pci_devinst - * at vs->vs_pi) so that it can find indirect descriptors. - * - * As we process each descriptor, we copy and adjust it (guest to - * host address wise, also using the vmtctx) into the given iov[] - * array (of the given size). If the array overflows, we stop - * placing values into the array but keep processing descriptors, - * up to VQ_MAX_DESCRIPTORS, before giving up and returning -1. - * So you, the caller, must not assume that iov[] is as big as the - * return value (you can process the same thing twice to allocate - * a larger iov array if needed, or supply a zero length to find - * out how much space is needed). - * - * If you want to verify the WRITE flag on each descriptor, pass a - * non-NULL "flags" pointer to an array of "uint16_t" of the same size - * as n_iov and we'll copy each vd_flags field after unwinding any - * indirects. - * - * If some descriptor(s) are invalid, this prints a diagnostic message - * and returns -1. If no descriptors are ready now it simply returns 0. - * - * You are assumed to have done a vq_ring_ready() if needed (note - * that vq_has_descs() does one). - */ -int -vq_getchain(struct vqueue_info *vq, uint16_t *pidx, - struct iovec *iov, int n_iov, uint16_t *flags) -{ - int i; - u_int ndesc, n_indir; - u_int idx, next; - volatile struct virtio_desc *vdir, *vindir, *vp; - struct vmctx *ctx; - struct virtio_softc *vs; - const char *name; - - vs = vq->vq_vs; - name = vs->vs_vc->vc_name; - - /* - * Note: it's the responsibility of the guest not to - * update vq->vq_avail->va_idx until all of the descriptors - * the guest has written are valid (including all their - * vd_next fields and vd_flags). - * - * Compute (last_avail - va_idx) in integers mod 2**16. This is - * the number of descriptors the device has made available - * since the last time we updated vq->vq_last_avail. - * - * We just need to do the subtraction as an unsigned int, - * then trim off excess bits. - */ - idx = vq->vq_last_avail; - ndesc = (uint16_t)((u_int)vq->vq_avail->va_idx - idx); - if (ndesc == 0) - return (0); - if (ndesc > vq->vq_qsize) { - /* XXX need better way to diagnose issues */ - fprintf(stderr, - "%s: ndesc (%u) out of range, driver confused?\r\n", - name, (u_int)ndesc); - return (-1); - } - - /* - * Now count/parse "involved" descriptors starting from - * the head of the chain. - * - * To prevent loops, we could be more complicated and - * check whether we're re-visiting a previously visited - * index, but we just abort if the count gets excessive. - */ - ctx = vs->vs_pi->pi_vmctx; - *pidx = next = vq->vq_avail->va_ring[idx & (vq->vq_qsize - 1)]; - vq->vq_last_avail++; - for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->vd_next) { - if (next >= vq->vq_qsize) { - fprintf(stderr, - "%s: descriptor index %u out of range, " - "driver confused?\r\n", - name, next); - return (-1); - } - vdir = &vq->vq_desc[next]; - if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) { - _vq_record(i, vdir, ctx, iov, n_iov, flags); - i++; - } else if ((vs->vs_vc->vc_hv_caps & - VIRTIO_RING_F_INDIRECT_DESC) == 0) { - fprintf(stderr, - "%s: descriptor has forbidden INDIRECT flag, " - "driver confused?\r\n", - name); - return (-1); - } else { - n_indir = vdir->vd_len / 16; - if ((vdir->vd_len & 0xf) || n_indir == 0) { - fprintf(stderr, - "%s: invalid indir len 0x%x, " - "driver confused?\r\n", - name, (u_int)vdir->vd_len); - return (-1); - } - vindir = paddr_guest2host(ctx, - vdir->vd_addr, vdir->vd_len); - /* - * Indirects start at the 0th, then follow - * their own embedded "next"s until those run - * out. Each one's indirect flag must be off - * (we don't really have to check, could just - * ignore errors...). - */ - next = 0; - for (;;) { - vp = &vindir[next]; - if (vp->vd_flags & VRING_DESC_F_INDIRECT) { - fprintf(stderr, - "%s: indirect desc has INDIR flag," - " driver confused?\r\n", - name); - return (-1); - } - _vq_record(i, vp, ctx, iov, n_iov, flags); - if (++i > VQ_MAX_DESCRIPTORS) - goto loopy; - if ((vp->vd_flags & VRING_DESC_F_NEXT) == 0) - break; - next = vp->vd_next; - if (next >= n_indir) { - fprintf(stderr, - "%s: invalid next %u > %u, " - "driver confused?\r\n", - name, (u_int)next, n_indir); - return (-1); - } - } - } - if ((vdir->vd_flags & VRING_DESC_F_NEXT) == 0) - return (i); - } -loopy: - fprintf(stderr, - "%s: descriptor loop? count > %d - driver confused?\r\n", - name, i); - return (-1); -} - -/* - * Return the currently-first request chain back to the available queue. - * - * (This chain is the one you handled when you called vq_getchain() - * and used its positive return value.) - */ -void -vq_retchain(struct vqueue_info *vq) -{ - - vq->vq_last_avail--; -} - -/* - * Return specified request chain to the guest, setting its I/O length - * to the provided value. - * - * (This chain is the one you handled when you called vq_getchain() - * and used its positive return value.) - */ -void -vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen) -{ - uint16_t uidx, mask; - volatile struct vring_used *vuh; - volatile struct virtio_used *vue; - - /* - * Notes: - * - mask is N-1 where N is a power of 2 so computes x % N - * - vuh points to the "used" data shared with guest - * - vue points to the "used" ring entry we want to update - * - head is the same value we compute in vq_iovecs(). - * - * (I apologize for the two fields named vu_idx; the - * virtio spec calls the one that vue points to, "id"...) - */ - mask = vq->vq_qsize - 1; - vuh = vq->vq_used; - - uidx = vuh->vu_idx; - vue = &vuh->vu_ring[uidx++ & mask]; - vue->vu_idx = idx; - vue->vu_tlen = iolen; - vuh->vu_idx = uidx; -} - -/* - * Driver has finished processing "available" chains and calling - * vq_relchain on each one. If driver used all the available - * chains, used_all should be set. - * - * If the "used" index moved we may need to inform the guest, i.e., - * deliver an interrupt. Even if the used index did NOT move we - * may need to deliver an interrupt, if the avail ring is empty and - * we are supposed to interrupt on empty. - * - * Note that used_all_avail is provided by the caller because it's - * a snapshot of the ring state when he decided to finish interrupt - * processing -- it's possible that descriptors became available after - * that point. (It's also typically a constant 1/True as well.) - */ -void -vq_endchains(struct vqueue_info *vq, int used_all_avail) -{ - struct virtio_softc *vs; - uint16_t event_idx, new_idx, old_idx; - int intr; - - /* - * Interrupt generation: if we're using EVENT_IDX, - * interrupt if we've crossed the event threshold. - * Otherwise interrupt is generated if we added "used" entries, - * but suppressed by VRING_AVAIL_F_NO_INTERRUPT. - * - * In any case, though, if NOTIFY_ON_EMPTY is set and the - * entire avail was processed, we need to interrupt always. - */ - vs = vq->vq_vs; - old_idx = vq->vq_save_used; - vq->vq_save_used = new_idx = vq->vq_used->vu_idx; - if (used_all_avail && - (vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY)) - intr = 1; - else if (vs->vs_negotiated_caps & VIRTIO_RING_F_EVENT_IDX) { - event_idx = VQ_USED_EVENT_IDX(vq); - /* - * This calculation is per docs and the kernel - * (see src/sys/dev/virtio/virtio_ring.h). - */ - intr = (uint16_t)(new_idx - event_idx - 1) < - (uint16_t)(new_idx - old_idx); - } else { - intr = new_idx != old_idx && - !(vq->vq_avail->va_flags & VRING_AVAIL_F_NO_INTERRUPT); - } - if (intr) - vq_interrupt(vs, vq); -} - -/* Note: these are in sorted order to make for a fast search */ -static struct config_reg { - uint16_t cr_offset; /* register offset */ - uint8_t cr_size; /* size (bytes) */ - uint8_t cr_ro; /* true => reg is read only */ - const char *cr_name; /* name of reg */ -} config_regs[] = { - { VTCFG_R_HOSTCAP, 4, 1, "HOSTCAP" }, - { VTCFG_R_GUESTCAP, 4, 0, "GUESTCAP" }, - { VTCFG_R_PFN, 4, 0, "PFN" }, - { VTCFG_R_QNUM, 2, 1, "QNUM" }, - { VTCFG_R_QSEL, 2, 0, "QSEL" }, - { VTCFG_R_QNOTIFY, 2, 0, "QNOTIFY" }, - { VTCFG_R_STATUS, 1, 0, "STATUS" }, - { VTCFG_R_ISR, 1, 0, "ISR" }, - { VTCFG_R_CFGVEC, 2, 0, "CFGVEC" }, - { VTCFG_R_QVEC, 2, 0, "QVEC" }, -}; - -static inline struct config_reg * -vi_find_cr(int offset) { - u_int hi, lo, mid; - struct config_reg *cr; - - lo = 0; - hi = sizeof(config_regs) / sizeof(*config_regs) - 1; - while (hi >= lo) { - mid = (hi + lo) >> 1; - cr = &config_regs[mid]; - if (cr->cr_offset == offset) - return (cr); - if (cr->cr_offset < offset) - lo = mid + 1; - else - hi = mid - 1; - } - return (NULL); -} - -/* - * Handle pci config space reads. - * If it's to the MSI-X info, do that. - * If it's part of the virtio standard stuff, do that. - * Otherwise dispatch to the actual driver. - */ -uint64_t -vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size) -{ - struct virtio_softc *vs = pi->pi_arg; - struct virtio_consts *vc; - struct config_reg *cr; - uint64_t virtio_config_size, max; - const char *name; - uint32_t newoff; - uint32_t value; - int error; - - if (vs->vs_flags & VIRTIO_USE_MSIX) { - if (baridx == pci_msix_table_bar(pi) || - baridx == pci_msix_pba_bar(pi)) { - return (pci_emul_msix_tread(pi, offset, size)); - } - } - - /* XXX probably should do something better than just assert() */ - assert(baridx == 0); - - if (vs->vs_mtx) - pthread_mutex_lock(vs->vs_mtx); - - vc = vs->vs_vc; - name = vc->vc_name; - value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff; - - if (size != 1 && size != 2 && size != 4) - goto bad; - - if (pci_msix_enabled(pi)) - virtio_config_size = VTCFG_R_CFG1; - else - virtio_config_size = VTCFG_R_CFG0; - - if (offset >= virtio_config_size) { - /* - * Subtract off the standard size (including MSI-X - * registers if enabled) and dispatch to underlying driver. - * If that fails, fall into general code. - */ - newoff = offset - virtio_config_size; - max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000; - if (newoff + size > max) - goto bad; - error = (*vc->vc_cfgread)(DEV_SOFTC(vs), newoff, size, &value); - if (!error) - goto done; - } - -bad: - cr = vi_find_cr(offset); - if (cr == NULL || cr->cr_size != size) { - if (cr != NULL) { - /* offset must be OK, so size must be bad */ - fprintf(stderr, - "%s: read from %s: bad size %d\r\n", - name, cr->cr_name, size); - } else { - fprintf(stderr, - "%s: read from bad offset/size %jd/%d\r\n", - name, (uintmax_t)offset, size); - } - goto done; - } - - switch (offset) { - case VTCFG_R_HOSTCAP: - value = vc->vc_hv_caps; - break; - case VTCFG_R_GUESTCAP: - value = vs->vs_negotiated_caps; - break; - case VTCFG_R_PFN: - if (vs->vs_curq < vc->vc_nvq) - value = vs->vs_queues[vs->vs_curq].vq_pfn; - break; - case VTCFG_R_QNUM: - value = vs->vs_curq < vc->vc_nvq ? - vs->vs_queues[vs->vs_curq].vq_qsize : 0; - break; - case VTCFG_R_QSEL: - value = vs->vs_curq; - break; - case VTCFG_R_QNOTIFY: - value = 0; /* XXX */ - break; - case VTCFG_R_STATUS: - value = vs->vs_status; - break; - case VTCFG_R_ISR: - value = vs->vs_isr; - vs->vs_isr = 0; /* a read clears this flag */ - if (value) - pci_lintr_deassert(pi); - break; - case VTCFG_R_CFGVEC: - value = vs->vs_msix_cfg_idx; - break; - case VTCFG_R_QVEC: - value = vs->vs_curq < vc->vc_nvq ? - vs->vs_queues[vs->vs_curq].vq_msix_idx : - VIRTIO_MSI_NO_VECTOR; - break; - } -done: - if (vs->vs_mtx) - pthread_mutex_unlock(vs->vs_mtx); - return (value); -} - -/* - * Handle pci config space writes. - * If it's to the MSI-X info, do that. - * If it's part of the virtio standard stuff, do that. - * Otherwise dispatch to the actual driver. - */ -void -vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size, uint64_t value) -{ - struct virtio_softc *vs = pi->pi_arg; - struct vqueue_info *vq; - struct virtio_consts *vc; - struct config_reg *cr; - uint64_t virtio_config_size, max; - const char *name; - uint32_t newoff; - int error; - - if (vs->vs_flags & VIRTIO_USE_MSIX) { - if (baridx == pci_msix_table_bar(pi) || - baridx == pci_msix_pba_bar(pi)) { - pci_emul_msix_twrite(pi, offset, size, value); - return; - } - } - - /* XXX probably should do something better than just assert() */ - assert(baridx == 0); - - if (vs->vs_mtx) - pthread_mutex_lock(vs->vs_mtx); - - vc = vs->vs_vc; - name = vc->vc_name; - - if (size != 1 && size != 2 && size != 4) - goto bad; - - if (pci_msix_enabled(pi)) - virtio_config_size = VTCFG_R_CFG1; - else - virtio_config_size = VTCFG_R_CFG0; - - if (offset >= virtio_config_size) { - /* - * Subtract off the standard size (including MSI-X - * registers if enabled) and dispatch to underlying driver. - */ - newoff = offset - virtio_config_size; - max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000; - if (newoff + size > max) - goto bad; - error = (*vc->vc_cfgwrite)(DEV_SOFTC(vs), newoff, size, value); - if (!error) - goto done; - } - -bad: - cr = vi_find_cr(offset); - if (cr == NULL || cr->cr_size != size || cr->cr_ro) { - if (cr != NULL) { - /* offset must be OK, wrong size and/or reg is R/O */ - if (cr->cr_size != size) - fprintf(stderr, - "%s: write to %s: bad size %d\r\n", - name, cr->cr_name, size); - if (cr->cr_ro) - fprintf(stderr, - "%s: write to read-only reg %s\r\n", - name, cr->cr_name); - } else { - fprintf(stderr, - "%s: write to bad offset/size %jd/%d\r\n", - name, (uintmax_t)offset, size); - } - goto done; - } - - switch (offset) { - case VTCFG_R_GUESTCAP: - vs->vs_negotiated_caps = value & vc->vc_hv_caps; - if (vc->vc_apply_features) - (*vc->vc_apply_features)(DEV_SOFTC(vs), - vs->vs_negotiated_caps); - break; - case VTCFG_R_PFN: - if (vs->vs_curq >= vc->vc_nvq) - goto bad_qindex; - vi_vq_init(vs, value); - break; - case VTCFG_R_QSEL: - /* - * Note that the guest is allowed to select an - * invalid queue; we just need to return a QNUM - * of 0 while the bad queue is selected. - */ - vs->vs_curq = value; - break; - case VTCFG_R_QNOTIFY: - if (value >= vc->vc_nvq) { - fprintf(stderr, "%s: queue %d notify out of range\r\n", - name, (int)value); - goto done; - } - vq = &vs->vs_queues[value]; - if (vq->vq_notify) - (*vq->vq_notify)(DEV_SOFTC(vs), vq); - else if (vc->vc_qnotify) - (*vc->vc_qnotify)(DEV_SOFTC(vs), vq); - else - fprintf(stderr, - "%s: qnotify queue %d: missing vq/vc notify\r\n", - name, (int)value); - break; - case VTCFG_R_STATUS: - vs->vs_status = value; - if (value == 0) - (*vc->vc_reset)(DEV_SOFTC(vs)); - break; - case VTCFG_R_CFGVEC: - vs->vs_msix_cfg_idx = value; - break; - case VTCFG_R_QVEC: - if (vs->vs_curq >= vc->vc_nvq) - goto bad_qindex; - vq = &vs->vs_queues[vs->vs_curq]; - vq->vq_msix_idx = value; - break; - } - goto done; - -bad_qindex: - fprintf(stderr, - "%s: write config reg %s: curq %d >= max %d\r\n", - name, cr->cr_name, vs->vs_curq, vc->vc_nvq); -done: - if (vs->vs_mtx) - pthread_mutex_unlock(vs->vs_mtx); -} diff --git a/usr.sbin/bhyve/virtio.h b/usr.sbin/bhyve/virtio.h deleted file mode 100644 index 0e96a1d..0000000 --- a/usr.sbin/bhyve/virtio.h +++ /dev/null @@ -1,464 +0,0 @@ -/*- - * Copyright (c) 2013 Chris Torek <torek @ torek net> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _VIRTIO_H_ -#define _VIRTIO_H_ - -/* - * These are derived from several virtio specifications. - * - * Some useful links: - * https://github.com/rustyrussell/virtio-spec - * http://people.redhat.com/pbonzini/virtio-spec.pdf - */ - -/* - * A virtual device has zero or more "virtual queues" (virtqueue). - * Each virtqueue uses at least two 4096-byte pages, laid out thus: - * - * +-----------------------------------------------+ - * | "desc": <N> descriptors, 16 bytes each | - * | ----------------------------------------- | - * | "avail": 2 uint16; <N> uint16; 1 uint16 | - * | ----------------------------------------- | - * | pad to 4k boundary | - * +-----------------------------------------------+ - * | "used": 2 x uint16; <N> elems; 1 uint16 | - * | ----------------------------------------- | - * | pad to 4k boundary | - * +-----------------------------------------------+ - * - * The number <N> that appears here is always a power of two and is - * limited to no more than 32768 (as it must fit in a 16-bit field). - * If <N> is sufficiently large, the above will occupy more than - * two pages. In any case, all pages must be physically contiguous - * within the guest's physical address space. - * - * The <N> 16-byte "desc" descriptors consist of a 64-bit guest - * physical address <addr>, a 32-bit length <len>, a 16-bit - * <flags>, and a 16-bit <next> field (all in guest byte order). - * - * There are three flags that may be set : - * NEXT descriptor is chained, so use its "next" field - * WRITE descriptor is for host to write into guest RAM - * (else host is to read from guest RAM) - * INDIRECT descriptor address field is (guest physical) - * address of a linear array of descriptors - * - * Unless INDIRECT is set, <len> is the number of bytes that may - * be read/written from guest physical address <addr>. If - * INDIRECT is set, WRITE is ignored and <len> provides the length - * of the indirect descriptors (and <len> must be a multiple of - * 16). Note that NEXT may still be set in the main descriptor - * pointing to the indirect, and should be set in each indirect - * descriptor that uses the next descriptor (these should generally - * be numbered sequentially). However, INDIRECT must not be set - * in the indirect descriptors. Upon reaching an indirect descriptor - * without a NEXT bit, control returns to the direct descriptors. - * - * Except inside an indirect, each <next> value must be in the - * range [0 .. N) (i.e., the half-open interval). (Inside an - * indirect, each <next> must be in the range [0 .. <len>/16).) - * - * The "avail" data structures reside in the same pages as the - * "desc" structures since both together are used by the device to - * pass information to the hypervisor's virtual driver. These - * begin with a 16-bit <flags> field and 16-bit index <idx>, then - * have <N> 16-bit <ring> values, followed by one final 16-bit - * field <used_event>. The <N> <ring> entries are simply indices - * indices into the descriptor ring (and thus must meet the same - * constraints as each <next> value). However, <idx> is counted - * up from 0 (initially) and simply wraps around after 65535; it - * is taken mod <N> to find the next available entry. - * - * The "used" ring occupies a separate page or pages, and contains - * values written from the virtual driver back to the guest OS. - * This begins with a 16-bit <flags> and 16-bit <idx>, then there - * are <N> "vring_used" elements, followed by a 16-bit <avail_event>. - * The <N> "vring_used" elements consist of a 32-bit <id> and a - * 32-bit <len> (vu_tlen below). The <id> is simply the index of - * the head of a descriptor chain the guest made available - * earlier, and the <len> is the number of bytes actually written, - * e.g., in the case of a network driver that provided a large - * receive buffer but received only a small amount of data. - * - * The two event fields, <used_event> and <avail_event>, in the - * avail and used rings (respectively -- note the reversal!), are - * always provided, but are used only if the virtual device - * negotiates the VIRTIO_RING_F_EVENT_IDX feature during feature - * negotiation. Similarly, both rings provide a flag -- - * VRING_AVAIL_F_NO_INTERRUPT and VRING_USED_F_NO_NOTIFY -- in - * their <flags> field, indicating that the guest does not need an - * interrupt, or that the hypervisor driver does not need a - * notify, when descriptors are added to the corresponding ring. - * (These are provided only for interrupt optimization and need - * not be implemented.) - */ -#define VRING_ALIGN 4096 - -#define VRING_DESC_F_NEXT (1 << 0) -#define VRING_DESC_F_WRITE (1 << 1) -#define VRING_DESC_F_INDIRECT (1 << 2) - -struct virtio_desc { /* AKA vring_desc */ - uint64_t vd_addr; /* guest physical address */ - uint32_t vd_len; /* length of scatter/gather seg */ - uint16_t vd_flags; /* VRING_F_DESC_* */ - uint16_t vd_next; /* next desc if F_NEXT */ -} __packed; - -struct virtio_used { /* AKA vring_used_elem */ - uint32_t vu_idx; /* head of used descriptor chain */ - uint32_t vu_tlen; /* length written-to */ -} __packed; - -#define VRING_AVAIL_F_NO_INTERRUPT 1 - -struct vring_avail { - uint16_t va_flags; /* VRING_AVAIL_F_* */ - uint16_t va_idx; /* counts to 65535, then cycles */ - uint16_t va_ring[]; /* size N, reported in QNUM value */ -/* uint16_t va_used_event; -- after N ring entries */ -} __packed; - -#define VRING_USED_F_NO_NOTIFY 1 -struct vring_used { - uint16_t vu_flags; /* VRING_USED_F_* */ - uint16_t vu_idx; /* counts to 65535, then cycles */ - struct virtio_used vu_ring[]; /* size N */ -/* uint16_t vu_avail_event; -- after N ring entries */ -} __packed; - -/* - * The address of any given virtual queue is determined by a single - * Page Frame Number register. The guest writes the PFN into the - * PCI config space. However, a device that has two or more - * virtqueues can have a different PFN, and size, for each queue. - * The number of queues is determinable via the PCI config space - * VTCFG_R_QSEL register. Writes to QSEL select the queue: 0 means - * queue #0, 1 means queue#1, etc. Once a queue is selected, the - * remaining PFN and QNUM registers refer to that queue. - * - * QNUM is a read-only register containing a nonzero power of two - * that indicates the (hypervisor's) queue size. Or, if reading it - * produces zero, the hypervisor does not have a corresponding - * queue. (The number of possible queues depends on the virtual - * device. The block device has just one; the network device - * provides either two -- 0 = receive, 1 = transmit -- or three, - * with 2 = control.) - * - * PFN is a read/write register giving the physical page address of - * the virtqueue in guest memory (the guest must allocate enough space - * based on the hypervisor's provided QNUM). - * - * QNOTIFY is effectively write-only: when the guest writes a queue - * number to the register, the hypervisor should scan the specified - * virtqueue. (Reading QNOTIFY currently always gets 0). - */ - -/* - * PFN register shift amount - */ -#define VRING_PFN 12 - -/* - * Virtio device types - * - * XXX Should really be merged with <dev/virtio/virtio.h> defines - */ -#define VIRTIO_TYPE_NET 1 -#define VIRTIO_TYPE_BLOCK 2 -#define VIRTIO_TYPE_CONSOLE 3 -#define VIRTIO_TYPE_ENTROPY 4 -#define VIRTIO_TYPE_BALLOON 5 -#define VIRTIO_TYPE_IOMEMORY 6 -#define VIRTIO_TYPE_RPMSG 7 -#define VIRTIO_TYPE_SCSI 8 -#define VIRTIO_TYPE_9P 9 - -/* experimental IDs start at 65535 and work down */ - -/* - * PCI vendor/device IDs - */ -#define VIRTIO_VENDOR 0x1AF4 -#define VIRTIO_DEV_NET 0x1000 -#define VIRTIO_DEV_BLOCK 0x1001 -#define VIRTIO_DEV_RANDOM 0x1002 - -/* - * PCI config space constants. - * - * If MSI-X is enabled, the ISR register is generally not used, - * and the configuration vector and queue vector appear at offsets - * 20 and 22 with the remaining configuration registers at 24. - * If MSI-X is not enabled, those two registers disappear and - * the remaining configuration registers start at offset 20. - */ -#define VTCFG_R_HOSTCAP 0 -#define VTCFG_R_GUESTCAP 4 -#define VTCFG_R_PFN 8 -#define VTCFG_R_QNUM 12 -#define VTCFG_R_QSEL 14 -#define VTCFG_R_QNOTIFY 16 -#define VTCFG_R_STATUS 18 -#define VTCFG_R_ISR 19 -#define VTCFG_R_CFGVEC 20 -#define VTCFG_R_QVEC 22 -#define VTCFG_R_CFG0 20 /* No MSI-X */ -#define VTCFG_R_CFG1 24 /* With MSI-X */ -#define VTCFG_R_MSIX 20 - -/* - * Bits in VTCFG_R_STATUS. Guests need not actually set any of these, - * but a guest writing 0 to this register means "please reset". - */ -#define VTCFG_STATUS_ACK 0x01 /* guest OS has acknowledged dev */ -#define VTCFG_STATUS_DRIVER 0x02 /* guest OS driver is loaded */ -#define VTCFG_STATUS_DRIVER_OK 0x04 /* guest OS driver ready */ -#define VTCFG_STATUS_FAILED 0x80 /* guest has given up on this dev */ - -/* - * Bits in VTCFG_R_ISR. These apply only if not using MSI-X. - * - * (We don't [yet?] ever use CONF_CHANGED.) - */ -#define VTCFG_ISR_QUEUES 0x01 /* re-scan queues */ -#define VTCFG_ISR_CONF_CHANGED 0x80 /* configuration changed */ - -#define VIRTIO_MSI_NO_VECTOR 0xFFFF - -/* - * Feature flags. - * Note: bits 0 through 23 are reserved to each device type. - */ -#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24) -#define VIRTIO_RING_F_INDIRECT_DESC (1 << 28) -#define VIRTIO_RING_F_EVENT_IDX (1 << 29) - -/* From section 2.3, "Virtqueue Configuration", of the virtio specification */ -static inline size_t -vring_size(u_int qsz) -{ - size_t size; - - /* constant 3 below = va_flags, va_idx, va_used_event */ - size = sizeof(struct virtio_desc) * qsz + sizeof(uint16_t) * (3 + qsz); - size = roundup2(size, VRING_ALIGN); - - /* constant 3 below = vu_flags, vu_idx, vu_avail_event */ - size += sizeof(uint16_t) * 3 + sizeof(struct virtio_used) * qsz; - size = roundup2(size, VRING_ALIGN); - - return (size); -} - -struct vmctx; -struct pci_devinst; -struct vqueue_info; - -/* - * A virtual device, with some number (possibly 0) of virtual - * queues and some size (possibly 0) of configuration-space - * registers private to the device. The virtio_softc should come - * at the front of each "derived class", so that a pointer to the - * virtio_softc is also a pointer to the more specific, derived- - * from-virtio driver's softc. - * - * Note: inside each hypervisor virtio driver, changes to these - * data structures must be locked against other threads, if any. - * Except for PCI config space register read/write, we assume each - * driver does the required locking, but we need a pointer to the - * lock (if there is one) for PCI config space read/write ops. - * - * When the guest reads or writes the device's config space, the - * generic layer checks for operations on the special registers - * described above. If the offset of the register(s) being read - * or written is past the CFG area (CFG0 or CFG1), the request is - * passed on to the virtual device, after subtracting off the - * generic-layer size. (So, drivers can just use the offset as - * an offset into "struct config", for instance.) - * - * (The virtio layer also makes sure that the read or write is to/ - * from a "good" config offset, hence vc_cfgsize, and on BAR #0. - * However, the driver must verify the read or write size and offset - * and that no one is writing a readonly register.) - * - * The BROKED flag ("this thing done gone and broked") is for future - * use. - */ -#define VIRTIO_USE_MSIX 0x01 -#define VIRTIO_EVENT_IDX 0x02 /* use the event-index values */ -#define VIRTIO_BROKED 0x08 /* ??? */ - -struct virtio_softc { - struct virtio_consts *vs_vc; /* constants (see below) */ - int vs_flags; /* VIRTIO_* flags from above */ - pthread_mutex_t *vs_mtx; /* POSIX mutex, if any */ - struct pci_devinst *vs_pi; /* PCI device instance */ - uint32_t vs_negotiated_caps; /* negotiated capabilities */ - struct vqueue_info *vs_queues; /* one per vc_nvq */ - int vs_curq; /* current queue */ - uint8_t vs_status; /* value from last status write */ - uint8_t vs_isr; /* ISR flags, if not MSI-X */ - uint16_t vs_msix_cfg_idx; /* MSI-X vector for config event */ -}; - -#define VS_LOCK(vs) \ -do { \ - if (vs->vs_mtx) \ - pthread_mutex_lock(vs->vs_mtx); \ -} while (0) - -#define VS_UNLOCK(vs) \ -do { \ - if (vs->vs_mtx) \ - pthread_mutex_unlock(vs->vs_mtx); \ -} while (0) - -struct virtio_consts { - const char *vc_name; /* name of driver (for diagnostics) */ - int vc_nvq; /* number of virtual queues */ - size_t vc_cfgsize; /* size of dev-specific config regs */ - void (*vc_reset)(void *); /* called on virtual device reset */ - void (*vc_qnotify)(void *, struct vqueue_info *); - /* called on QNOTIFY if no VQ notify */ - int (*vc_cfgread)(void *, int, int, uint32_t *); - /* called to read config regs */ - int (*vc_cfgwrite)(void *, int, int, uint32_t); - /* called to write config regs */ - void (*vc_apply_features)(void *, uint64_t); - /* called to apply negotiated features */ - uint64_t vc_hv_caps; /* hypervisor-provided capabilities */ -}; - -/* - * Data structure allocated (statically) per virtual queue. - * - * Drivers may change vq_qsize after a reset. When the guest OS - * requests a device reset, the hypervisor first calls - * vs->vs_vc->vc_reset(); then the data structure below is - * reinitialized (for each virtqueue: vs->vs_vc->vc_nvq). - * - * The remaining fields should only be fussed-with by the generic - * code. - * - * Note: the addresses of vq_desc, vq_avail, and vq_used are all - * computable from each other, but it's a lot simpler if we just - * keep a pointer to each one. The event indices are similarly - * (but more easily) computable, and this time we'll compute them: - * they're just XX_ring[N]. - */ -#define VQ_ALLOC 0x01 /* set once we have a pfn */ -#define VQ_BROKED 0x02 /* ??? */ -struct vqueue_info { - uint16_t vq_qsize; /* size of this queue (a power of 2) */ - void (*vq_notify)(void *, struct vqueue_info *); - /* called instead of vc_notify, if not NULL */ - - struct virtio_softc *vq_vs; /* backpointer to softc */ - uint16_t vq_num; /* we're the num'th queue in the softc */ - - uint16_t vq_flags; /* flags (see above) */ - uint16_t vq_last_avail; /* a recent value of vq_avail->va_idx */ - uint16_t vq_save_used; /* saved vq_used->vu_idx; see vq_endchains */ - uint16_t vq_msix_idx; /* MSI-X index, or VIRTIO_MSI_NO_VECTOR */ - - uint32_t vq_pfn; /* PFN of virt queue (not shifted!) */ - - volatile struct virtio_desc *vq_desc; /* descriptor array */ - volatile struct vring_avail *vq_avail; /* the "avail" ring */ - volatile struct vring_used *vq_used; /* the "used" ring */ - -}; -/* as noted above, these are sort of backwards, name-wise */ -#define VQ_AVAIL_EVENT_IDX(vq) \ - (*(volatile uint16_t *)&(vq)->vq_used->vu_ring[(vq)->vq_qsize]) -#define VQ_USED_EVENT_IDX(vq) \ - ((vq)->vq_avail->va_ring[(vq)->vq_qsize]) - -/* - * Is this ring ready for I/O? - */ -static inline int -vq_ring_ready(struct vqueue_info *vq) -{ - - return (vq->vq_flags & VQ_ALLOC); -} - -/* - * Are there "available" descriptors? (This does not count - * how many, just returns True if there are some.) - */ -static inline int -vq_has_descs(struct vqueue_info *vq) -{ - - return (vq_ring_ready(vq) && vq->vq_last_avail != - vq->vq_avail->va_idx); -} - -/* - * Deliver an interrupt to guest on the given virtual queue - * (if possible, or a generic MSI interrupt if not using MSI-X). - */ -static inline void -vq_interrupt(struct virtio_softc *vs, struct vqueue_info *vq) -{ - - if (pci_msix_enabled(vs->vs_pi)) - pci_generate_msix(vs->vs_pi, vq->vq_msix_idx); - else { - VS_LOCK(vs); - vs->vs_isr |= VTCFG_ISR_QUEUES; - pci_generate_msi(vs->vs_pi, 0); - pci_lintr_assert(vs->vs_pi); - VS_UNLOCK(vs); - } -} - -struct iovec; -void vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc, - void *dev_softc, struct pci_devinst *pi, - struct vqueue_info *queues); -int vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix); -void vi_reset_dev(struct virtio_softc *); -void vi_set_io_bar(struct virtio_softc *, int); - -int vq_getchain(struct vqueue_info *vq, uint16_t *pidx, - struct iovec *iov, int n_iov, uint16_t *flags); -void vq_retchain(struct vqueue_info *vq); -void vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen); -void vq_endchains(struct vqueue_info *vq, int used_all_avail); - -uint64_t vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size); -void vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size, uint64_t value); -#endif /* _VIRTIO_H_ */ diff --git a/usr.sbin/bhyve/xmsr.c b/usr.sbin/bhyve/xmsr.c deleted file mode 100644 index 5b7bfbb..0000000 --- a/usr.sbin/bhyve/xmsr.c +++ /dev/null @@ -1,230 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/types.h> - -#include <machine/cpufunc.h> -#include <machine/vmm.h> -#include <machine/specialreg.h> - -#include <vmmapi.h> - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include "xmsr.h" - -static int cpu_vendor_intel, cpu_vendor_amd; - -int -emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t val) -{ - - if (cpu_vendor_intel) { - switch (num) { - case 0xd04: /* Sandy Bridge uncore PMCs */ - case 0xc24: - return (0); - case MSR_BIOS_UPDT_TRIG: - return (0); - case MSR_BIOS_SIGN: - return (0); - default: - break; - } - } else if (cpu_vendor_amd) { - switch (num) { - case MSR_HWCR: - /* - * Ignore writes to hardware configuration MSR. - */ - return (0); - - case MSR_NB_CFG1: - case MSR_IC_CFG: - return (0); /* Ignore writes */ - - case MSR_PERFEVSEL0: - case MSR_PERFEVSEL1: - case MSR_PERFEVSEL2: - case MSR_PERFEVSEL3: - /* Ignore writes to the PerfEvtSel MSRs */ - return (0); - - case MSR_K7_PERFCTR0: - case MSR_K7_PERFCTR1: - case MSR_K7_PERFCTR2: - case MSR_K7_PERFCTR3: - /* Ignore writes to the PerfCtr MSRs */ - return (0); - - case MSR_P_STATE_CONTROL: - /* Ignore write to change the P-state */ - return (0); - - default: - break; - } - } - return (-1); -} - -int -emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t *val) -{ - int error = 0; - - if (cpu_vendor_intel) { - switch (num) { - case MSR_BIOS_SIGN: - case MSR_IA32_PLATFORM_ID: - case MSR_PKG_ENERGY_STATUS: - case MSR_PP0_ENERGY_STATUS: - case MSR_PP1_ENERGY_STATUS: - case MSR_DRAM_ENERGY_STATUS: - *val = 0; - break; - case MSR_RAPL_POWER_UNIT: - /* - * Use the default value documented in section - * "RAPL Interfaces" in Intel SDM vol3. - */ - *val = 0x000a1003; - break; - default: - error = -1; - break; - } - } else if (cpu_vendor_amd) { - switch (num) { - case MSR_BIOS_SIGN: - *val = 0; - break; - case MSR_HWCR: - /* - * Bios and Kernel Developer's Guides for AMD Families - * 12H, 14H, 15H and 16H. - */ - *val = 0x01000010; /* Reset value */ - *val |= 1 << 9; /* MONITOR/MWAIT disable */ - break; - - case MSR_NB_CFG1: - case MSR_IC_CFG: - /* - * The reset value is processor family dependent so - * just return 0. - */ - *val = 0; - break; - - case MSR_PERFEVSEL0: - case MSR_PERFEVSEL1: - case MSR_PERFEVSEL2: - case MSR_PERFEVSEL3: - /* - * PerfEvtSel MSRs are not properly virtualized so just - * return zero. - */ - *val = 0; - break; - - case MSR_K7_PERFCTR0: - case MSR_K7_PERFCTR1: - case MSR_K7_PERFCTR2: - case MSR_K7_PERFCTR3: - /* - * PerfCtr MSRs are not properly virtualized so just - * return zero. - */ - *val = 0; - break; - - case MSR_SMM_ADDR: - case MSR_SMM_MASK: - /* - * Return the reset value defined in the AMD Bios and - * Kernel Developer's Guide. - */ - *val = 0; - break; - - case MSR_P_STATE_LIMIT: - case MSR_P_STATE_CONTROL: - case MSR_P_STATE_STATUS: - case MSR_P_STATE_CONFIG(0): /* P0 configuration */ - *val = 0; - break; - - /* - * OpenBSD guests test bit 0 of this MSR to detect if the - * workaround for erratum 721 is already applied. - * http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf - */ - case 0xC0011029: - *val = 1; - break; - - default: - error = -1; - break; - } - } else { - error = -1; - } - return (error); -} - -int -init_msr(void) -{ - int error; - u_int regs[4]; - char cpu_vendor[13]; - - do_cpuid(0, regs); - ((u_int *)&cpu_vendor)[0] = regs[1]; - ((u_int *)&cpu_vendor)[1] = regs[3]; - ((u_int *)&cpu_vendor)[2] = regs[2]; - cpu_vendor[12] = '\0'; - - error = 0; - if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { - cpu_vendor_amd = 1; - } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) { - cpu_vendor_intel = 1; - } else { - fprintf(stderr, "Unknown cpu vendor \"%s\"\n", cpu_vendor); - error = -1; - } - return (error); -} diff --git a/usr.sbin/bhyve/xmsr.h b/usr.sbin/bhyve/xmsr.h deleted file mode 100644 index bcf65b7..0000000 --- a/usr.sbin/bhyve/xmsr.h +++ /dev/null @@ -1,36 +0,0 @@ -/*- - * Copyright (c) 2011 NetApp, Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _XMSR_H_ -#define _XMSR_H_ - -int init_msr(void); -int emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val); -int emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t *val); - -#endif |